Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[InstCombine] Improve bitfield addition #77184

Open
wants to merge 4 commits into
base: main
Choose a base branch
from

Conversation

ParkHanbum
Copy link
Contributor

@ParkHanbum ParkHanbum commented Jan 6, 2024

Fixes #33874.

@llvmbot
Copy link
Member

llvmbot commented Jan 6, 2024

@llvm/pr-subscribers-llvm-transforms

Author: hanbeom (ParkHanbum)

Changes

Patch is 21.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77184.diff

2 Files Affected:

  • (modified) llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp (+146)
  • (modified) llvm/test/Transforms/InstCombine/or.ll (+398)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c03f50d75814d8..b25d4fd9605788 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3314,6 +3314,149 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
   return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
 }
 
+struct BitFieldAddBitMask {
+  const APInt *Lower;
+  const APInt *Upper;
+};
+struct BitFieldOptBitMask {
+  const APInt *Lower;
+  const APInt *Upper;
+  const APInt *New;
+};
+struct BitFieldAddInfo {
+  Value *X;
+  Value *Y;
+  bool opt;
+  union {
+    BitFieldAddBitMask AddMask;
+    BitFieldOptBitMask OptMask;
+  };
+};
+
+static Value *foldBitFieldArithmetic(BinaryOperator &I,
+                                     InstCombiner::BuilderTy &Builder) {
+  auto *Disjoint = dyn_cast<PossiblyDisjointInst>(&I);
+  if (!Disjoint || !Disjoint->isDisjoint())
+    return nullptr;
+
+  unsigned BitWidth = I.getType()->getScalarSizeInBits();
+  auto AccumulateY = [&](Value *LoY, Value *UpY, APInt LoMask,
+                         APInt UpMask) -> Value * {
+    Value *Y = nullptr;
+    auto CLoY = dyn_cast_or_null<Constant>(LoY);
+    auto CUpY = dyn_cast_or_null<Constant>(UpY);
+    if ((CLoY == nullptr) ^ (CUpY == nullptr))
+      return nullptr;
+
+    if (CLoY && CUpY) {
+      APInt IUpY = CUpY->getUniqueInteger();
+      APInt ILoY = CLoY->getUniqueInteger();
+      if (!(IUpY.isSubsetOf(UpMask) && ILoY.isSubsetOf(LoMask)))
+        return nullptr;
+      Y = ConstantInt::get(CLoY->getType(), ILoY + IUpY);
+    } else if (LoY == UpY) {
+      Y = LoY;
+    }
+
+    return Y;
+  };
+
+  auto MatchBitFieldAdd =
+      [&](BinaryOperator &I) -> std::optional<BitFieldAddInfo> {
+    const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr;
+    Value *X, *Y, *UpY;
+    auto BitFieldAddUpper = m_CombineOr(
+        m_And(m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)),
+              m_APInt(UpMask2)),
+        m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)));
+    auto BitFieldAdd =
+        m_c_Or(BitFieldAddUpper,
+               m_And(m_c_Add(m_Deferred(X), m_Value(Y)), m_APInt(LoMask)));
+    auto BitFieldAddIC =
+        m_c_Or(m_And(m_c_Add(m_Value(X), m_Value(Y)), m_APInt(LoMask)),
+               m_And(m_c_Add(m_Deferred(X), m_Value(UpY)), m_APInt(UpMask)));
+    auto OptBitFieldAdd = m_c_Or(
+        m_c_Xor(m_CombineOr(
+                    m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)),
+                            m_And(m_Value(Y), m_APInt(OptLoMask))),
+                    m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))),
+                m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)),
+                            m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)),
+                                  m_APInt(OptUpMask)))),
+        BitFieldAddUpper);
+
+    if (match(&I, BitFieldAdd) || match(&I, BitFieldAddIC)) {
+      APInt Mask = APInt::getBitsSet(BitWidth, BitWidth - UpMask->countl_zero(),
+                                     BitWidth);
+      if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+            (LoMask->popcount() >= 2 && UpMask->popcount() >= 2) &&
+            (LoMask->isShiftedMask() && UpMask->isShiftedMask()) &&
+            ((*LoMask & *UpMask) == 0) &&
+            ((Mask ^ *LoMask ^ *UpMask).isAllOnes())))
+        return std::nullopt;
+
+      if (!(Y = AccumulateY(Y, UpY, *LoMask, *UpMask)))
+        return std::nullopt;
+
+      return {{X, Y, false, {{LoMask, UpMask}}}};
+    }
+
+    if (match(&I, OptBitFieldAdd)) {
+      APInt Mask = APInt::getBitsSet(
+          BitWidth, BitWidth - OptUpMask->countl_zero(), BitWidth);
+      APInt Mask2 = APInt::getBitsSet(
+          BitWidth, BitWidth - UpMask->countl_zero(), BitWidth);
+      if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+            (UpMask->isShiftedMask() && UpMask->popcount() >= 2) &&
+            ((*UpMask & (*OptLoMask | *OptUpMask)) == 0) &&
+            ((~*OptLoMask ^ Mask) == *OptUpMask) &&
+            (Mask2 ^ *UpMask ^ (*OptLoMask ^ *OptUpMask)).isAllOnes()))
+        return std::nullopt;
+
+      if (!(Y = AccumulateY(Y, UpY, (*OptLoMask + *OptUpMask), *UpMask)))
+        return std::nullopt;
+
+      struct BitFieldAddInfo Info = {X, Y, true, {{OptLoMask, OptUpMask}}};
+      Info.OptMask.New = UpMask;
+      return {Info};
+    }
+
+    return std::nullopt;
+  };
+
+  auto Info = MatchBitFieldAdd(I);
+  if (Info) {
+    Value *X = Info->X;
+    Value *Y = Info->Y;
+    APInt BitLoMask, BitUpMask;
+    if (Info->opt) {
+      unsigned NewHiBit = BitWidth - (Info->OptMask.New->countl_zero() + 1);
+      BitLoMask = *Info->OptMask.Lower | *Info->OptMask.New;
+      BitLoMask.clearBit(NewHiBit);
+      BitUpMask = *Info->OptMask.Upper;
+      BitUpMask.setBit(NewHiBit);
+    } else {
+      unsigned LowerHiBit = BitWidth - (Info->AddMask.Lower->countl_zero() + 1);
+      unsigned UpperHiBit = BitWidth - (Info->AddMask.Upper->countl_zero() + 1);
+      BitLoMask = *Info->AddMask.Lower | *Info->AddMask.Upper;
+      BitLoMask.clearBit(LowerHiBit);
+      BitLoMask.clearBit(UpperHiBit);
+      BitUpMask = APInt::getOneBitSet(BitWidth, LowerHiBit);
+      BitUpMask.setBit(UpperHiBit);
+    }
+
+    auto AndXLower = Builder.CreateAnd(X, BitLoMask);
+    auto AndYLower = Builder.CreateAnd(Y, BitLoMask);
+    auto Add = Builder.CreateNUWAdd(AndXLower, AndYLower);
+    auto Xor1 = Builder.CreateXor(X, Y);
+    auto AndUpper = Builder.CreateAnd(Xor1, BitUpMask);
+    auto Xor = Builder.CreateXor(Add, AndUpper);
+    return Xor;
+  }
+
+  return nullptr;
+}
+
 // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
 // here. We should standardize that construct where it is needed or choose some
 // other way to ensure that commutated variants of patterns are not missed.
@@ -3884,6 +4027,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
       return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
   }
 
+  if (Value *Res = foldBitFieldArithmetic(I, Builder))
+    return replaceInstUsesWith(I, Res);
+
   return nullptr;
 }
 
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 573a11599141a7..8eafd45466b994 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -1777,3 +1777,401 @@ if.then:
 if.else:
   ret i32 0
 }
+
+; test or disjoint which used for BitField Arithmetic.
+; Positive
+define i8 @src_2_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_2_bitfield_op(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i8 [[BF_SET20]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1228 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1228, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  ret i8 %bf.set20
+}
+
+define i8 @src_2_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_2_bitfield_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw nsw i8 [[TMP0]], 9
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[X]], 20
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i8 [[BF_SET20]]
+;
+entry:
+  %narrow = add i8 %x, 1
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1228 = add i8 %bf.lshr, 8
+  %bf.shl = and i8 %bf.lshr1228, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  ret i8 %bf.set20
+}
+
+define i8 @src_3_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_3_bitfield_op(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 107
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], -108
+; CHECK-NEXT:    [[BF_SET33:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_3_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_3_bitfield_const(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i8 [[TMP0]], 41
+; CHECK-NEXT:    [[TMP2:%.*]] = and i8 [[X]], -108
+; CHECK-NEXT:    [[BF_SET33:%.*]] = xor i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %x, 1
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, 8
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, 32
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+; test or disjoint which used for BitField Arithmetic.
+; Negative
+define i8 @src_bit_arithmetic_bitsize_1_low(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_low(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 1
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 30
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 30
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 1
+  %bf.lshr = and i8 %x, 30
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 30
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 15
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 16
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 15
+  %bf.lshr = and i8 %x, 16
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 16
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_high(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 59
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 59
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 68
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -128
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -128
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 120
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 120
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -128
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -128
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_low_over_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_low_over_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 17
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 17
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_over_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_over_high(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 27
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 27
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 36
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 56
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 56
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_under_lower(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_under_lower(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 28
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 28
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_high_under_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_high_under_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT:    [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT:    [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT:    [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT:    [[BF_LSHR22:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT:    [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -16
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %y, %x
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, %y
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -16
+  %bf.lshr2547 = add i8 %bf.lshr22, %y
+  %bf.value30 = and i8 %bf.lshr2547, -16
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_low(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_low(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[X:%.*]], 7
+; CHECK-NEXT:    [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT:    [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %x, 8
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, 8
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, 32
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_mid(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_mid(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT:    [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT:    [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT:    [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_LSHR]]
+; CHECK-NEXT:    [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT:    [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT:    [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT:    ret i8 [[BF_SET33]]
+;
+entry:
+  %narrow = add i8 %x, 1
+  %bf.value = and i8 %narrow, 7
+  %bf.lshr = and i8 %x, 24
+  %bf.lshr1244 = add i8 %bf.lshr, 32
+  %bf.shl = and i8 %bf.lshr1244, 24
+  %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+  %bf.lshr22 = and i8 %x, -32
+  %bf.lshr2547 = add i8 %bf.lshr22, 32
+  %bf.value30 = and i8 %bf.lshr2547, -32
+  %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+  ret i8 %bf.set33
+}
+
+define i8 @...
[truncated]

@dtcxzyw dtcxzyw changed the title Issue 33874 [InstCombine] Improve bitfield addition Jan 6, 2024
@RKSimon RKSimon self-requested a review January 8, 2024 12:50
@ParkHanbum
Copy link
Contributor Author

@RKSimon is this need to update for conflicts?

@RKSimon
Copy link
Collaborator

RKSimon commented Jan 23, 2024

Yes the patch doesn't currently cleanly merge with trubk

@RKSimon
Copy link
Collaborator

RKSimon commented Jan 23, 2024

@ParkHanbum reopen this? It still needs quite a bit of work tbh

@ParkHanbum
Copy link
Contributor Author

@RKSimon sorry, some problem occurred while rebase. I'll recover this ASAP

@RKSimon
Copy link
Collaborator

RKSimon commented Jan 23, 2024

@nikic can hopefully advise here but I think we can refactor this in more generic terms than exact bitfield patterns.

@ParkHanbum ParkHanbum reopened this Jan 23, 2024
@ParkHanbum ParkHanbum force-pushed the issue_33874 branch 2 times, most recently from cb2b2b6 to 1db7e0c Compare January 23, 2024 12:54
@ParkHanbum
Copy link
Contributor Author

I think it is ok now.

@RKSimon
Copy link
Collaborator

RKSimon commented Feb 2, 2024

@ParkHanbum Sorry for slow response - please can you fix the merge conflicts?

@ParkHanbum
Copy link
Contributor Author

@ParkHanbum Sorry for slow response - please can you fix the merge conflicts?

sure, I'll do it ASAP!!

@ParkHanbum ParkHanbum force-pushed the issue_33874 branch 2 times, most recently from d5ae641 to a10b733 Compare February 3, 2024 08:56
@ParkHanbum
Copy link
Contributor Author

it seems weird. I see only 2 commit pushed, but so many changed in comparing changes. if this is a problem, let me know.

dtcxzyw added a commit to dtcxzyw/llvm-opt-benchmark that referenced this pull request Feb 4, 2024
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Outdated Show resolved Hide resolved
llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp Outdated Show resolved Hide resolved
};
};

static Value *foldBitFieldArithmetic(BinaryOperator &I,
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To address the reported issues, this has been written in terms of improving bitfield math, but I'm not certain if we should be addressing this in terms of more generic canonicalizations or not - are we likely to hit sub-parts of this in other places do you think?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

honestly I have no idea. I focused how to implement your optimization idea in my boundary of llvm knowledge. some advise please to me to let me can think about that. what's mean terms of more generic canonicalizations or not?

@ParkHanbum ParkHanbum force-pushed the issue_33874 branch 2 times, most recently from ea35412 to ba6f4a8 Compare February 25, 2024 07:37
// OptUpMask is its result.
m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)),
m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)),
m_APInt(OptUpMask)))),
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you mean to be overwriting OptUpMask here and OptLoMask above?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but I made a mistake and fixed it

Copy link

github-actions bot commented Jul 21, 2024

✅ With the latest revision this PR passed the C/C++ code formatter.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Improve bitfield arithmetic
5 participants