From 907b2cdbafb3f69666695b4e66b737932b6eb71b Mon Sep 17 00:00:00 2001
From: Stanley Winata <68087699+raikonenfnu@users.noreply.github.com>
Date: Sun, 21 Jul 2024 13:25:02 -0700
Subject: [PATCH] [Codegen] Ensure hoisted extraction replaced by induction
 var. (#17975)

This commit teaches the compiler to replace hoisted extraction of IV by
the newly generated IV with the correct shapes. Previously we would
hoist the extraction and replace the IV uses by the hoisted extraction,
however this may not always be correct since the IV's value may be
updated in the loop.

The main motivation of this PR is to fix numerical issue caused by such
case that exists in the attention-cpp pipeline. Although this happens at
the vector level as opposed to the test cases we have for at tensor
level, we can re-use said test. Specific example for this case will be
left in the comment section of this PR.

---------

Signed-off-by: Stanley Winata <stanley.winata@amd.com>
---
 .../Codegen/Common/OptimizeTensorInsertExtractSlices.cpp     | 5 ++++-
 .../Common/test/optimize_tensor_insert_extract_slices.mlir   | 5 +++--
 2 files changed, 7 insertions(+), 3 deletions(-)
diff --git a/compiler/src/iree/compiler/Codegen/Common/OptimizeTensorInsertExtractSlices.cpp b/compiler/src/iree/compiler/Codegen/Common/OptimizeTensorInsertExtractSlices.cpp
index 3ee4b53a2593..2b608be094e8 100644
--- a/compiler/src/iree/compiler/Codegen/Common/OptimizeTensorInsertExtractSlices.cpp
+++ b/compiler/src/iree/compiler/Codegen/Common/OptimizeTensorInsertExtractSlices.cpp
@@ -108,10 +108,13 @@ hoistLoopInvariantSubsetAtIterArg(RewriterBase &rewriter,
             ArrayRef<BlockArgument> innerNewBBArgs) -> SmallVector<Value> {
       return {insertion.getSourceOperand().get()};
     };
+
+    // replaceInitOperandUsesInLoop is set to true S.T we will use new IV
+    // instead of hoisted out extract.
     FailureOr<LoopLikeOpInterface> newLoop =
         loopLike.replaceWithAdditionalYields(
             rewriter, extraction.getResult(),
-            /*replaceInitOperandUsesInLoop=*/false, newYieldValuesFn);
+            /*replaceInitOperandUsesInLoop=*/true, newYieldValuesFn);
     if (failed(newLoop))
       return loopLike;
     loopLike = *newLoop;
diff --git a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
index 8a35b4bffa61..2cf5726e84ba 100644
--- a/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
+++ b/compiler/src/iree/compiler/Codegen/Common/test/optimize_tensor_insert_extract_slices.mlir
@@ -158,9 +158,10 @@ func.func @subset_hoisting_invariant_tensor(%init: tensor<64x64xf32>, %t: tensor
 
 // CHECK-LABEL: @subset_hoisting_invariant_tensor
 // CHECK:   tensor.extract_slice
-// CHECK:   scf.for
-// CHECK:     tensor.extract_slice
+// CHECK:   scf.for {{.*}} iter_args(%[[IV:.+]] = {{.*}})
+// CHECK:     %[[SLICE:.+]] = tensor.extract_slice
 // CHECK-NOT: tensor.extract_slice
+// CHECK:     linalg.add ins(%[[IV]], %[[SLICE]] : {{.*}})
 // CHECK:   scf.yield
 // CHECK:   tensor.insert_slice