Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

Commit

Permalink
Tuned gemm complex for cpu
Browse files Browse the repository at this point in the history
  • Loading branch information
OuadiElfarouki committed Oct 4, 2023
1 parent 9eb3258 commit b53d0f6
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 20 deletions.
7 changes: 2 additions & 5 deletions cmake/CmakeFunctionHelper.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -623,13 +623,10 @@ else() # default cpu backend
foreach(data ${data_list_c})
add_gemm_configuration(
"${data}" 64 "false" "false" "false"
64 2 2 8 8 1 1 1 1 1 1 1 1 1 float float "no_local" "standard" "full" 1 "strided" "false" "false")
64 2 2 4 4 1 1 1 1 1 1 1 1 1 float float "no_local" "standard" "full" 1 "strided" "false" "false")
add_gemm_configuration(
"${data}" 64 "false" "false" "false"
64 8 8 8 8 1 1 1 1 1 1 1 1 1 float float "no_local" "standard" "partial" 1 "strided" "false" "false")
add_gemm_configuration(
"${data}" 64 "false" "false" "false"
64 2 2 8 8 1 1 1 1 1 1 1 1 1 float float "local" "standard" "full" 1 "strided" "false" "false")
64 8 8 4 4 1 1 1 1 1 1 1 1 1 float float "no_local" "standard" "partial" 1 "strided" "false" "false")
endforeach()
endif() # BLAS_ENABLE_COMPLEX
endif()
Expand Down
19 changes: 4 additions & 15 deletions src/interface/blas3/backend/default_cpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -116,39 +116,28 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K,
container_2_t _c, index_t _ldc, index_t _stridec, index_t batch_size,
gemm_batch_type_t batch_type,
const typename sb_handle_t::event_t& _dependencies) {
if (_M <= 128 && _N <= 128 && _K <= 128 && !s_a && !s_b) {
if (_M <= 256 && _N <= 256 && _K <= 256) {
return blas::Gemm_Launcher<
container_0_t, container_1_t, container_2_t, 64, false, false, false,
64, Tile<2, 2, 8, 8>, _t_a, _t_b, s_a, s_b,
64, Tile<2, 2, 4, 4>, _t_a, _t_b, s_a, s_b,
static_cast<int>(gemm_memory_t::no_local),
static_cast<int>(gemm_algorithm_t::standard),
static_cast<int>(gemm_vectorization_t::full), is_beta_zero, 1,
static_cast<int>(gemm_batch_type_t::strided)>::
template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea,
_b, _ldb, _strideb, _beta, _c, _ldc, _stridec,
batch_size, _dependencies);
} else if (!s_a && !s_b) {
} else {
return blas::Gemm_Launcher<
container_0_t, container_1_t, container_2_t, 64, false, false, false,
64, Tile<8, 8, 8, 8>, _t_a, _t_b, s_a, s_b,
64, Tile<8, 8, 4, 4>, _t_a, _t_b, s_a, s_b,
static_cast<int>(gemm_memory_t::no_local),
static_cast<int>(gemm_algorithm_t::standard),
static_cast<int>(gemm_vectorization_t::partial), is_beta_zero, 1,
static_cast<int>(gemm_batch_type_t::strided)>::
template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea,
_b, _ldb, _strideb, _beta, _c, _ldc, _stridec,
batch_size, _dependencies);
} else {
return blas::Gemm_Launcher<
container_0_t, container_1_t, container_2_t, 64, false, false, false,
64, Tile<2, 2, 8, 8>, _t_a, _t_b, s_a, s_b,
static_cast<int>(gemm_memory_t::local),
static_cast<int>(gemm_algorithm_t::standard),
static_cast<int>(gemm_vectorization_t::full), is_beta_zero, 1,
static_cast<int>(gemm_batch_type_t::strided)>::
template _select_gemm(sb_handle, _M, _N, _K, _alpha, _a, _lda, _stridea,
_b, _ldb, _strideb, _beta, _c, _ldc, _stridec,
batch_size, _dependencies);
}
}
#endif
Expand Down

0 comments on commit b53d0f6

Please sign in to comment.