diff --git a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp index 31725dba9ca7..d722334795d0 100644 --- a/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp +++ b/compiler/src/iree/compiler/Codegen/LLVMGPU/Passes.cpp @@ -289,13 +289,12 @@ void addGPUVectorizationPassPipeline(OpPassManager &funcPassManager) { //===---------------------------------------------------------------------===// void addGPUTileAndFusePassPipeline(OpPassManager &funcPassManager) { + tileAndDistributeToWorkgroup(funcPassManager); // Step 1. Promote matmul operands and pack to intrinsic shapes. funcPassManager.addPass(createGPUPromoteMatmulOperandsPass()); funcPassManager.addPass(IREE::GPU::createPackToIntrinsicsPass()); - tileAndDistributeToWorkgroup(funcPassManager); - // Step 2. Tile and fuse tileable ops to reduction loops. { GPUApplyTilingLevelPassOptions options;