Skip to content
This repository has been archived by the owner on Jan 13, 2025. It is now read-only.

Commit

Permalink
Merge branch 'master' into matcopy_update
Browse files Browse the repository at this point in the history
  • Loading branch information
OuadiElfarouki committed Dec 26, 2023
2 parents c6f2d0f + 8c5224f commit b24e4ef
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 3 deletions.
3 changes: 3 additions & 0 deletions cmake/CmakeFunctionHelper.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -592,6 +592,9 @@ elseif(${TUNING_TARGET} STREQUAL "NVIDIA_GPU")
add_gemm_configuration(
"${data}" 256 "false" "true" "true"
128 8 8 16 16 1 1 1 1 1 1 1 1 1 float float "local" "standard" "full" 1 "strided" "false")
add_gemm_configuration(
"${data}" 64 "false" "false" "true"
64 8 8 8 8 1 1 2 2 1 1 1 1 1 float float "local" "standard" "full" 1 "strided" "false")
endforeach()
if(BLAS_ENABLE_COMPLEX)
# Extract list of complex<data> for each data in supported_types
Expand Down
6 changes: 3 additions & 3 deletions src/interface/blas3/backend/nvidia_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,9 @@ _gemm(sb_handle_t& sb_handle, index_t _M, index_t _N, index_t _K,

if (batch_size > 1) {
return blas::Gemm_Launcher<
container_0_t, container_1_t, container_2_t, 256, false, true, true,
128, Tile<8, 8, 16, 16, 1, 1, 1, 1, 1, 1, 1, 1, 1, float, float>,
_t_a, _t_b, s_a, s_b, static_cast<int>(gemm_memory_t::local),
container_0_t, container_1_t, container_2_t, 64, false, false, true,
64, Tile<8, 8, 8, 8, 1, 1, 2, 2, 1, 1, 1, 1, 1, float, float>, _t_a,
_t_b, s_a, s_b, static_cast<int>(gemm_memory_t::local),
static_cast<int>(gemm_algorithm_t::standard),
static_cast<int>(gemm_vectorization_t::full), is_beta_zero, 1,
static_cast<int>(gemm_batch_type_t::strided),
Expand Down

0 comments on commit b24e4ef

Please sign in to comment.