-
Notifications
You must be signed in to change notification settings - Fork 12.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[InstCombine] Improve bitfield addition #77184
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-llvm-transforms Author: hanbeom (ParkHanbum) ChangesPatch is 21.95 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/77184.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index c03f50d75814d8..b25d4fd9605788 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -3314,6 +3314,149 @@ Value *InstCombinerImpl::foldAndOrOfICmps(ICmpInst *LHS, ICmpInst *RHS,
return foldAndOrOfICmpsUsingRanges(LHS, RHS, IsAnd);
}
+struct BitFieldAddBitMask {
+ const APInt *Lower;
+ const APInt *Upper;
+};
+struct BitFieldOptBitMask {
+ const APInt *Lower;
+ const APInt *Upper;
+ const APInt *New;
+};
+struct BitFieldAddInfo {
+ Value *X;
+ Value *Y;
+ bool opt;
+ union {
+ BitFieldAddBitMask AddMask;
+ BitFieldOptBitMask OptMask;
+ };
+};
+
+static Value *foldBitFieldArithmetic(BinaryOperator &I,
+ InstCombiner::BuilderTy &Builder) {
+ auto *Disjoint = dyn_cast<PossiblyDisjointInst>(&I);
+ if (!Disjoint || !Disjoint->isDisjoint())
+ return nullptr;
+
+ unsigned BitWidth = I.getType()->getScalarSizeInBits();
+ auto AccumulateY = [&](Value *LoY, Value *UpY, APInt LoMask,
+ APInt UpMask) -> Value * {
+ Value *Y = nullptr;
+ auto CLoY = dyn_cast_or_null<Constant>(LoY);
+ auto CUpY = dyn_cast_or_null<Constant>(UpY);
+ if ((CLoY == nullptr) ^ (CUpY == nullptr))
+ return nullptr;
+
+ if (CLoY && CUpY) {
+ APInt IUpY = CUpY->getUniqueInteger();
+ APInt ILoY = CLoY->getUniqueInteger();
+ if (!(IUpY.isSubsetOf(UpMask) && ILoY.isSubsetOf(LoMask)))
+ return nullptr;
+ Y = ConstantInt::get(CLoY->getType(), ILoY + IUpY);
+ } else if (LoY == UpY) {
+ Y = LoY;
+ }
+
+ return Y;
+ };
+
+ auto MatchBitFieldAdd =
+ [&](BinaryOperator &I) -> std::optional<BitFieldAddInfo> {
+ const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr;
+ Value *X, *Y, *UpY;
+ auto BitFieldAddUpper = m_CombineOr(
+ m_And(m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)),
+ m_APInt(UpMask2)),
+ m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)));
+ auto BitFieldAdd =
+ m_c_Or(BitFieldAddUpper,
+ m_And(m_c_Add(m_Deferred(X), m_Value(Y)), m_APInt(LoMask)));
+ auto BitFieldAddIC =
+ m_c_Or(m_And(m_c_Add(m_Value(X), m_Value(Y)), m_APInt(LoMask)),
+ m_And(m_c_Add(m_Deferred(X), m_Value(UpY)), m_APInt(UpMask)));
+ auto OptBitFieldAdd = m_c_Or(
+ m_c_Xor(m_CombineOr(
+ m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)),
+ m_And(m_Value(Y), m_APInt(OptLoMask))),
+ m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))),
+ m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)),
+ m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)),
+ m_APInt(OptUpMask)))),
+ BitFieldAddUpper);
+
+ if (match(&I, BitFieldAdd) || match(&I, BitFieldAddIC)) {
+ APInt Mask = APInt::getBitsSet(BitWidth, BitWidth - UpMask->countl_zero(),
+ BitWidth);
+ if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+ (LoMask->popcount() >= 2 && UpMask->popcount() >= 2) &&
+ (LoMask->isShiftedMask() && UpMask->isShiftedMask()) &&
+ ((*LoMask & *UpMask) == 0) &&
+ ((Mask ^ *LoMask ^ *UpMask).isAllOnes())))
+ return std::nullopt;
+
+ if (!(Y = AccumulateY(Y, UpY, *LoMask, *UpMask)))
+ return std::nullopt;
+
+ return {{X, Y, false, {{LoMask, UpMask}}}};
+ }
+
+ if (match(&I, OptBitFieldAdd)) {
+ APInt Mask = APInt::getBitsSet(
+ BitWidth, BitWidth - OptUpMask->countl_zero(), BitWidth);
+ APInt Mask2 = APInt::getBitsSet(
+ BitWidth, BitWidth - UpMask->countl_zero(), BitWidth);
+ if (!((UpMask2 == nullptr || *UpMask == *UpMask2) &&
+ (UpMask->isShiftedMask() && UpMask->popcount() >= 2) &&
+ ((*UpMask & (*OptLoMask | *OptUpMask)) == 0) &&
+ ((~*OptLoMask ^ Mask) == *OptUpMask) &&
+ (Mask2 ^ *UpMask ^ (*OptLoMask ^ *OptUpMask)).isAllOnes()))
+ return std::nullopt;
+
+ if (!(Y = AccumulateY(Y, UpY, (*OptLoMask + *OptUpMask), *UpMask)))
+ return std::nullopt;
+
+ struct BitFieldAddInfo Info = {X, Y, true, {{OptLoMask, OptUpMask}}};
+ Info.OptMask.New = UpMask;
+ return {Info};
+ }
+
+ return std::nullopt;
+ };
+
+ auto Info = MatchBitFieldAdd(I);
+ if (Info) {
+ Value *X = Info->X;
+ Value *Y = Info->Y;
+ APInt BitLoMask, BitUpMask;
+ if (Info->opt) {
+ unsigned NewHiBit = BitWidth - (Info->OptMask.New->countl_zero() + 1);
+ BitLoMask = *Info->OptMask.Lower | *Info->OptMask.New;
+ BitLoMask.clearBit(NewHiBit);
+ BitUpMask = *Info->OptMask.Upper;
+ BitUpMask.setBit(NewHiBit);
+ } else {
+ unsigned LowerHiBit = BitWidth - (Info->AddMask.Lower->countl_zero() + 1);
+ unsigned UpperHiBit = BitWidth - (Info->AddMask.Upper->countl_zero() + 1);
+ BitLoMask = *Info->AddMask.Lower | *Info->AddMask.Upper;
+ BitLoMask.clearBit(LowerHiBit);
+ BitLoMask.clearBit(UpperHiBit);
+ BitUpMask = APInt::getOneBitSet(BitWidth, LowerHiBit);
+ BitUpMask.setBit(UpperHiBit);
+ }
+
+ auto AndXLower = Builder.CreateAnd(X, BitLoMask);
+ auto AndYLower = Builder.CreateAnd(Y, BitLoMask);
+ auto Add = Builder.CreateNUWAdd(AndXLower, AndYLower);
+ auto Xor1 = Builder.CreateXor(X, Y);
+ auto AndUpper = Builder.CreateAnd(Xor1, BitUpMask);
+ auto Xor = Builder.CreateXor(Add, AndUpper);
+ return Xor;
+ }
+
+ return nullptr;
+}
+
// FIXME: We use commutative matchers (m_c_*) for some, but not all, matches
// here. We should standardize that construct where it is needed or choose some
// other way to ensure that commutated variants of patterns are not missed.
@@ -3884,6 +4027,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) {
return BinaryOperator::CreateAnd(X, ConstantInt::get(Ty, *C1 | *C2));
}
+ if (Value *Res = foldBitFieldArithmetic(I, Builder))
+ return replaceInstUsesWith(I, Res);
+
return nullptr;
}
diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll
index 573a11599141a7..8eafd45466b994 100644
--- a/llvm/test/Transforms/InstCombine/or.ll
+++ b/llvm/test/Transforms/InstCombine/or.ll
@@ -1777,3 +1777,401 @@ if.then:
if.else:
ret i32 0
}
+
+; test or disjoint which used for BitField Arithmetic.
+; Positive
+define i8 @src_2_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_2_bitfield_op(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: ret i8 [[BF_SET20]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1228 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1228, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ ret i8 %bf.set20
+}
+
+define i8 @src_2_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_2_bitfield_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[TMP0]], 9
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], 20
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i8 [[BF_SET20]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1228 = add i8 %bf.lshr, 8
+ %bf.shl = and i8 %bf.lshr1228, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ ret i8 %bf.set20
+}
+
+define i8 @src_3_bitfield_op(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_3_bitfield_op(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 107
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], -108
+; CHECK-NEXT: [[BF_SET33:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_3_bitfield_const(i8 %x) {
+; CHECK-LABEL: @src_3_bitfield_const(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 107
+; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[TMP0]], 41
+; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], -108
+; CHECK-NEXT: [[BF_SET33:%.*]] = xor i8 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 8
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 32
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+; test or disjoint which used for BitField Arithmetic.
+; Negative
+define i8 @src_bit_arithmetic_bitsize_1_low(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_low(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 1
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 30
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 30
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 1
+ %bf.lshr = and i8 %x, 30
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 30
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 15
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 16
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 16
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 15
+ %bf.lshr = and i8 %x, 16
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 16
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitsize_1_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_high(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 59
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 59
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 68
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -128
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -128
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 120
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 120
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -128
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -128
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_low_over_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_low_over_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 17
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 17
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_over_high(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_over_high(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 27
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 27
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 36
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 56
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 56
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_mid_under_lower(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_under_lower(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]]
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 28
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]]
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 28
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_bitmask_high_under_mid(i8 %x, i8 %y) {
+; CHECK-LABEL: @src_bit_arithmetic_bitmask_high_under_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 11
+; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]]
+; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 20
+; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]]
+; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -16
+; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]]
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -16
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %y, %x
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, %y
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -16
+ %bf.lshr2547 = add i8 %bf.lshr22, %y
+ %bf.value30 = and i8 %bf.lshr2547, -16
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_low(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_low(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[X:%.*]], 7
+; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8
+; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 8
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 8
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 32
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @src_bit_arithmetic_addition_over_bitmask_mid(i8 %x) {
+; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_mid(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1
+; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7
+; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24
+; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_LSHR]]
+; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32
+; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32
+; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]]
+; CHECK-NEXT: ret i8 [[BF_SET33]]
+;
+entry:
+ %narrow = add i8 %x, 1
+ %bf.value = and i8 %narrow, 7
+ %bf.lshr = and i8 %x, 24
+ %bf.lshr1244 = add i8 %bf.lshr, 32
+ %bf.shl = and i8 %bf.lshr1244, 24
+ %bf.set20 = or disjoint i8 %bf.value, %bf.shl
+ %bf.lshr22 = and i8 %x, -32
+ %bf.lshr2547 = add i8 %bf.lshr22, 32
+ %bf.value30 = and i8 %bf.lshr2547, -32
+ %bf.set33 = or disjoint i8 %bf.set20, %bf.value30
+ ret i8 %bf.set33
+}
+
+define i8 @...
[truncated]
|
cb2b2b6
to
4730bb0
Compare
@RKSimon is this need to update for conflicts? |
Yes the patch doesn't currently cleanly merge with trubk |
4730bb0
to
7d5c40b
Compare
@ParkHanbum reopen this? It still needs quite a bit of work tbh |
@RKSimon sorry, some problem occurred while rebase. I'll recover this ASAP |
@nikic can hopefully advise here but I think we can refactor this in more generic terms than exact bitfield patterns. |
cb2b2b6
to
1db7e0c
Compare
I think it is ok now. |
@ParkHanbum Sorry for slow response - please can you fix the merge conflicts? |
sure, I'll do it ASAP!! |
d5ae641
to
a10b733
Compare
it seems weird. I see only 2 commit pushed, but so many changed in comparing changes. if this is a problem, let me know. |
}; | ||
}; | ||
|
||
static Value *foldBitFieldArithmetic(BinaryOperator &I, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
To address the reported issues, this has been written in terms of improving bitfield math, but I'm not certain if we should be addressing this in terms of more generic canonicalizations or not - are we likely to hit sub-parts of this in other places do you think?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
honestly I have no idea. I focused how to implement your optimization idea in my boundary of llvm knowledge. some advise please to me to let me can think about that. what's mean terms of more generic canonicalizations or not
?
ea35412
to
ba6f4a8
Compare
// OptUpMask is its result. | ||
m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)), | ||
m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)), | ||
m_APInt(OptUpMask)))), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do you mean to be overwriting OptUpMask
here and OptLoMask
above?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, but I made a mistake and fixed it
✅ With the latest revision this PR passed the C/C++ code formatter. |
Fixes #33874.