From c6f2d0fb9ffb03ca134a25e2a96322d020af7c31 Mon Sep 17 00:00:00 2001 From: Ouadie EL FAROUKI Date: Thu, 21 Dec 2023 22:02:05 +0000 Subject: [PATCH] removed extra attributes from transpose kernels --- include/operations/extension/transpose.h | 16 ---------------- src/operations/extension/transpose.hpp | 22 ++++++++++++---------- 2 files changed, 12 insertions(+), 26 deletions(-) diff --git a/include/operations/extension/transpose.h b/include/operations/extension/transpose.h index 71921b539..b684f0b01 100644 --- a/include/operations/extension/transpose.h +++ b/include/operations/extension/transpose.h @@ -80,11 +80,6 @@ class Transpose { static constexpr const index_t inner_tile_size_ = wg_size / Tile_size; static constexpr const index_t inner_tile_count_ = Tile_size / inner_tile_size_; - // Minimum number of Tile-mutliple rows & columns to cover the matrices - index_t M_pad_; - index_t N_pad_; - // Total size of Tile-mutliple covering matrix - index_t size_pad_; // Batch size when using batched transpose index_t batch_size_; // Number of contiguous elements to be used in local memory to avoid bank @@ -115,9 +110,6 @@ class Transpose { stride_a_(stride_a), stride_at_(stride_at), inc_at_(inc_at), - M_pad_(tile_count_m_ * Tile_size), - N_pad_(tile_count_n_ * Tile_size), - size_pad_(M_pad_ * N_pad_), batch_size_(batch_size) {} index_t get_size() const; @@ -209,11 +201,6 @@ class TransposeAdd { static constexpr const index_t inner_tile_size_ = wg_size / Tile_size; static constexpr const index_t inner_tile_count_ = Tile_size / inner_tile_size_; - // Minimum number of Tile-mutliple rows & columns to cover the output matrix - index_t M_pad_; - index_t N_pad_; - // Total size of Tile-mutliple covering matrix - index_t size_pad_; // Batch size when using batched transpose index_t batch_size_; // Number of contiguous elements to be used in local memory to avoid bank @@ -246,9 +233,6 @@ class TransposeAdd { tile_count_m_((M_ - 1) / Tile_size + 1), tile_count_n_((N_ - 1) / Tile_size + 1), tile_count_total_(tile_count_m_ * tile_count_n_), - M_pad_(tile_count_m_ * Tile_size), - N_pad_(tile_count_n_ * Tile_size), - size_pad_(M_pad_ * N_pad_), batch_size_(batch_size) {} index_t get_size() const; diff --git a/src/operations/extension/transpose.hpp b/src/operations/extension/transpose.hpp index 98ecf8e03..87485660e 100644 --- a/src/operations/extension/transpose.hpp +++ b/src/operations/extension/transpose.hpp @@ -55,7 +55,7 @@ Transpose::get_size() const { // Smallest TileSize square-multiple containing input/output matrices times // batch_size - return (size_pad_ * batch_size_); + return (tile_count_total_ * Tile_size * Tile_size * batch_size_); } template ::get_size() const { // Smallest TileSize square-multiple containing input/output matrices - return (size_pad_ * batch_size_); + return (tile_count_total_ * Tile_size * Tile_size * batch_size_); } template [0,N_], B & - *C otherwise -> [0,M_]) - * @param j [output] the global col-index (A & B when both_trans -> [0,M_], B & - *C otherwise -> [0,N_]) + * @param i [output] the global row-index (A & B when both_trans -> [0,N_], B + *& C otherwise -> [0,M_]) + * @param j [output] the global col-index (A & B when both_trans -> [0,M_], B + *& C otherwise -> [0,N_]) */ template