From 97e293fae64385886ea0c296e4ccb9b3d8a49a74 Mon Sep 17 00:00:00 2001 From: Hanbum Park Date: Sat, 6 Jan 2024 16:44:19 +0900 Subject: [PATCH 1/4] [InstCombine] Add test for improving bitfield addition (#33874) Proof: https://alive2.llvm.org/ce/z/RUL3YU Fixes #33874 --- llvm/test/Transforms/InstCombine/or.ll | 408 +++++++++++++++++++++++++ 1 file changed, 408 insertions(+) diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 6e2085a8bb6c7d..5e1a4aa895f61c 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -2029,3 +2029,411 @@ define i32 @or_xor_and_commuted3(i32 %x, i32 %y, i32 %z) { %or1 = or i32 %xor, %yy ret i32 %or1 } + +; test or disjoint which used for BitField Arithmetic. +; Positive +define i8 @src_2_bitfield_op(i8 %x, i8 %y) { +; CHECK-LABEL: @src_2_bitfield_op( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 +; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: ret i8 [[BF_SET20]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1228 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1228, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + ret i8 %bf.set20 +} + +define i8 @src_2_bitfield_const(i8 %x) { +; CHECK-LABEL: @src_2_bitfield_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[X]], 8 +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: ret i8 [[BF_SET20]] +; +entry: + %narrow = add i8 %x, 1 + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1228 = add i8 %bf.lshr, 8 + %bf.shl = and i8 %bf.lshr1228, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + ret i8 %bf.set20 +} + +define i8 @src_3_bitfield_op(i8 %x, i8 %y) { +; CHECK-LABEL: @src_3_bitfield_op( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_3_bitfield_const(i8 %x) { +; CHECK-LABEL: @src_3_bitfield_const( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8 +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %x, 1 + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, 8 + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, 32 + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +; test or disjoint which used for BitField Arithmetic. +; Negative +define i8 @src_bit_arithmetic_bitsize_1_low(i8 %x, i8 %y) { +; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_low( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 1 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 30 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 30 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 1 + %bf.lshr = and i8 %x, 30 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 30 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_bitsize_1_mid(i8 %x, i8 %y) { +; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_mid( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 15 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 16 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 16 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 15 + %bf.lshr = and i8 %x, 16 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 16 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_bitsize_1_high(i8 %x, i8 %y) { +; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_high( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 120 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 120 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -128 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -128 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 120 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 120 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -128 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -128 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_bitmask_low_over_mid(i8 %x, i8 %y) { +; CHECK-LABEL: @src_bit_arithmetic_bitmask_low_over_mid( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 17 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 17 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_bitmask_mid_over_high(i8 %x, i8 %y) { +; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_over_high( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 56 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 56 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 56 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 56 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_bitmask_mid_under_lower(i8 %x, i8 %y) { +; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_under_lower( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 28 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 28 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_bitmask_high_under_mid(i8 %x, i8 %y) { +; CHECK-LABEL: @src_bit_arithmetic_bitmask_high_under_mid( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -16 +; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] +; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -16 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %y, %x + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, %y + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -16 + %bf.lshr2547 = add i8 %bf.lshr22, %y + %bf.value30 = and i8 %bf.lshr2547, -16 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_addition_over_bitmask_low(i8 %x) { +; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_low( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[X:%.*]], 7 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8 +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %x, 8 + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, 8 + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, 32 + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_addition_over_bitmask_mid(i8 %x) { +; CHECK-LABEL: @src_bit_arithmetic_addition_over_bitmask_mid( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_LSHR]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %x, 1 + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, 32 + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, 32 + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_addition_under_bitmask_mid(i8 %x) { +; CHECK-LABEL: @src_bit_arithmetic_addition_under_bitmask_mid( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_LSHR]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %x, 1 + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, 4 + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, 32 + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} + +define i8 @src_bit_arithmetic_addition_under_bitmask_high(i8 %x) { +; CHECK-LABEL: @src_bit_arithmetic_addition_under_bitmask_high( +; CHECK-NEXT: entry: +; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 +; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 +; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8 +; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 +; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 +; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_LSHR22]] +; CHECK-NEXT: ret i8 [[BF_SET33]] +; +entry: + %narrow = add i8 %x, 1 + %bf.value = and i8 %narrow, 7 + %bf.lshr = and i8 %x, 24 + %bf.lshr1244 = add i8 %bf.lshr, 8 + %bf.shl = and i8 %bf.lshr1244, 24 + %bf.set20 = or disjoint i8 %bf.value, %bf.shl + %bf.lshr22 = and i8 %x, -32 + %bf.lshr2547 = add i8 %bf.lshr22, 16 + %bf.value30 = and i8 %bf.lshr2547, -32 + %bf.set33 = or disjoint i8 %bf.set20, %bf.value30 + ret i8 %bf.set33 +} From 9c5c9a19f4b0dcf34ac4b5a6ee5edc67995096e1 Mon Sep 17 00:00:00 2001 From: Hanbum Park Date: Sat, 6 Jan 2024 16:44:52 +0900 Subject: [PATCH 2/4] [InstCombine] Improve bitfield addition (#33874) Proof: https://alive2.llvm.org/ce/z/RUL3YU Fixes #33874 --- .../InstCombine/InstCombineAndOrXor.cpp | 249 ++++++++++++++++++ llvm/test/Transforms/InstCombine/or.ll | 94 +++---- 2 files changed, 291 insertions(+), 52 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index f9caa4da44931a..2fe94c3aa5ec73 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3485,6 +3485,252 @@ static Value *foldOrOfInversions(BinaryOperator &I, return nullptr; } +struct BitFieldAddBitMask { + const APInt *Lower; + const APInt *Upper; +}; +struct BitFieldOptBitMask { + const APInt *Lower; + const APInt *Upper; + const APInt *New; +}; +struct BitFieldAddInfo { + Value *X; + Value *Y; + bool opt; + union { + BitFieldAddBitMask AddMask; + BitFieldOptBitMask OptMask; + }; +}; + +/// Bitfield operation is consisted of three-step as following, +/// 1. extracting the bits +/// 2. performing operations +/// 3. eliminating the bits beyond the specified range +/// +/// Depending on the location of the bitfield on which we want to perform +/// the operation, all or only some of these steps are performed. +/// +/// Consider: +/// %narrow = add i8 %y, %x +/// %bf.value = and i8 %narrow, 7 +/// %bf.lshr = and i8 %x, 24 +/// %bf.lshr1244 = add i8 %bf.lshr, %y +/// %bf.shl = and i8 %bf.lshr1244, 24 +/// %bf.set20 = or disjoint i8 %bf.value, %bf.shl +/// +/// This example show us bitfield operation that doing 0-3 bit first, 4-5 bit +/// second. as you can see, first 0-3 bitfield operation do not proceed step 1, +/// it is not necessary because it located bottom of bitfield. after that, +/// second 4-5 bit operation proceed 3-step as above described. +/// +/// After the operation for each bitfield is completed, all bits are collected +/// through the `or disjoint` operation and the result is returned. +/// +/// Our optimizing oppotunity is reducing 3-step of bitfield operation. +/// We show you optimized example with constant for more intuitive describing. +/// +/// Consider: +/// (first) (second) (final) +/// ????????(x) ????????(x) 00000??? +/// + 00000001 & 00011000 | 000??000 +/// ---------- ---------- ---------- +/// 0000???? 000??000 = 000????? +/// & 00000111 + 00001000 +/// = 00000??? ---------- +/// 00???000 +/// & 00011000 +/// ---------- +/// = 000??000 +/// +/// Optimized: +/// (first) (second) (final) +/// 000????? (x) 000????? (x) 000????? (x&11) + 9 +/// & 00001011 & 00010100 ^ 000?0?00 (x&20) +/// ---------- ---------- ---------- +/// 0000?0?? (x & 11) = 000?0?00 = 000????? +/// + 00001001 +/// ---------- +/// = 000????? (x&11) + 9 +/// +/// 1. Extract each bitfield exclude high bit. +/// 2. Add sum of all values to be added to each bitfield. +/// 3. Extract high bits of each bitfield. +/// 4. Perform ExcludeOR with 2 and 3. +/// +/// The most important logic here is part 4. ExclusiveOR operation is performed +/// on the highest bit of each pre-extracted bit field and the value after the +/// addition operation. Through this, we can obtain normally addition perfomed +/// results for the highest bit of the bitfield without removing the overflowed +/// bit. +static Value *foldBitFieldArithmetic(BinaryOperator &I, + InstCombiner::BuilderTy &Builder) { + auto *Disjoint = dyn_cast(&I); + if (!Disjoint || !Disjoint->isDisjoint()) + return nullptr; + + unsigned BitWidth = I.getType()->getScalarSizeInBits(); + + // If operand of bitfield operation is a constant, sum of the constants is + // computed and returned. if operand is not a constant, operand is + // returned. if this operation is not a bitfield operation, null is returned. + auto AccumulateY = [&](Value *LoY, Value *UpY, APInt LoMask, + APInt UpMask) -> Value * { + Value *Y = nullptr; + auto *CLoY = dyn_cast_or_null(LoY); + auto *CUpY = dyn_cast_or_null(UpY); + // If one of operand is constant, other also must be constant. + if ((CLoY == nullptr) ^ (CUpY == nullptr)) + return nullptr; + + if (CLoY && CUpY) { + APInt IUpY = CUpY->getUniqueInteger(); + APInt ILoY = CLoY->getUniqueInteger(); + // Each operands bits must in range of its own field. + if (!(IUpY.isSubsetOf(UpMask) && ILoY.isSubsetOf(LoMask))) + return nullptr; + Y = ConstantInt::get(CLoY->getType(), ILoY + IUpY); + } else if (LoY == UpY) { + Y = LoY; + } + + return Y; + }; + + // Perform whether this `OR disjoint` instruction is bitfield operation + // In the case of bitfield operation, the information necessary + // to optimize the bitfield operation is extracted and returned as + // BitFieldAddInfo. + auto MatchBitFieldAdd = + [&](BinaryOperator &I) -> std::optional { + const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr; + Value *X, *Y, *UpY; + + // Bitfield has more than 2 member. + // ((X&UpMask)+UpY)&UpMask2 | (X&UpMask)+UpY + auto BitFieldAddUpper = m_CombineOr( + m_And(m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY)), + m_APInt(UpMask2)), + m_c_Add(m_And(m_Value(X), m_APInt(UpMask)), m_Value(UpY))); + // Bitfield has more than 2 member but bottom bitfield + // BitFieldAddUpper | (X+Y)&LoMask + auto BitFieldAdd = + m_c_Or(BitFieldAddUpper, + m_And(m_c_Add(m_Deferred(X), m_Value(Y)), m_APInt(LoMask))); + // When bitfield has only 2 member + // (X+Y)&HiMask | (X+UpY)&LoMask + auto BitFieldAddIC = + m_c_Or(m_And(m_c_Add(m_Value(X), m_Value(Y)), m_APInt(LoMask)), + m_And(m_c_Add(m_Deferred(X), m_Value(UpY)), m_APInt(UpMask))); + // When `Or optimized-bitfield, BitFieldAddUpper` matched + // OptUpMask = highest bits of each bitfield + // OptLoMask = all bit of bitfield excluded highest bit + // BitFieldAddUpper | ((X&OptLoMask)+Y) ^ ((X&OptUpMask)) + auto OptBitFieldAdd = m_c_Or( + m_c_Xor(m_CombineOr( + // When Y is not the constant. + m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), + m_And(m_Value(Y), m_APInt(OptLoMask))), + // When Y is Constant, it can be accumulated. + m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))), + // If Y is a constant, X^Y&OptUpMask can be pre-computed and + // OptUpMask is its result. + m_CombineOr(m_And(m_Deferred(X), m_APInt(OptUpMask)), + m_And(m_c_Xor(m_Deferred(X), m_Value(UpY)), + m_APInt(OptUpMask)))), + BitFieldAddUpper); + + // Match bitfield operation. + if (match(&I, BitFieldAdd) || match(&I, BitFieldAddIC)) { + APInt Mask = APInt::getBitsSet(BitWidth, BitWidth - UpMask->countl_zero(), + BitWidth); + + if (!((UpMask2 == nullptr || *UpMask == *UpMask2) && + (LoMask->popcount() >= 2 && UpMask->popcount() >= 2) && + (LoMask->isShiftedMask() && UpMask->isShiftedMask()) && + // Lo & Hi mask must have no common bits + ((*LoMask & *UpMask) == 0) && + // These masks must fill all bits while having no common bits. + ((Mask ^ *LoMask ^ *UpMask).isAllOnes()))) + return std::nullopt; + + if (!(Y = AccumulateY(Y, UpY, *LoMask, *UpMask))) + return std::nullopt; + + return {{X, Y, false, {{LoMask, UpMask}}}}; + } + + // Match already optimized bitfield operation. + if (match(&I, OptBitFieldAdd)) { + APInt Mask = APInt::getBitsSet( + BitWidth, BitWidth - OptUpMask->countl_zero(), BitWidth); + APInt Mask2 = APInt::getBitsSet( + BitWidth, BitWidth - UpMask->countl_zero(), BitWidth); + + // OptLoMask : includes bits of each bit field member, but excludes + // highest bit of each bit field. + // OptHiMask : includes bits only highest bit of each member. + if (!((UpMask2 == nullptr || *UpMask == *UpMask2) && + (UpMask->isShiftedMask() && UpMask->popcount() >= 2) && + // must have no common bit if this operation is bitfield + ((*UpMask & (*OptLoMask | *OptUpMask)) == 0) && + // NOT(OptLoMask) must be equals OptUpMask + ((~*OptLoMask ^ Mask) == *OptUpMask) && + // These masks must fill all bits while having no common bits. + (Mask2 ^ *UpMask ^ (*OptLoMask ^ *OptUpMask)).isAllOnes())) + return std::nullopt; + + if (!(Y = AccumulateY(Y, UpY, (*OptLoMask + *OptUpMask), *UpMask))) + return std::nullopt; + + struct BitFieldAddInfo Info = {X, Y, true, {{OptLoMask, OptUpMask}}}; + Info.OptMask.New = UpMask; + return {Info}; + } + + return std::nullopt; + }; + + if (std::optional Info = MatchBitFieldAdd(I)) { + Value *X = Info->X; + Value *Y = Info->Y; + APInt BitLoMask, BitUpMask; + if (Info->opt) { + unsigned NewHiBit = BitWidth - (Info->OptMask.New->countl_zero() + 1); + // BitLoMask inlude bits of OptMask.New exclude its highest bit + BitLoMask = *Info->OptMask.Lower | *Info->OptMask.New; + BitLoMask.clearBit(NewHiBit); + // BitUpMask only include highest bit of OptMask.New + BitUpMask = *Info->OptMask.Upper; + BitUpMask.setBit(NewHiBit); + } else { + // In case BitField operation, we create new optmized bitfield mask. + unsigned LowerHiBit = BitWidth - (Info->AddMask.Lower->countl_zero() + 1); + unsigned UpperHiBit = BitWidth - (Info->AddMask.Upper->countl_zero() + 1); + // BitLoMask include all bits of each bitfield but exclude its highest + // bits + BitLoMask = *Info->AddMask.Lower | *Info->AddMask.Upper; + BitLoMask.clearBit(LowerHiBit); + BitLoMask.clearBit(UpperHiBit); + // BitUpMask only include highest bit of each bitfield. + BitUpMask = APInt::getOneBitSet(BitWidth, LowerHiBit); + BitUpMask.setBit(UpperHiBit); + } + + // Create optimized bitfield operation logic using the created bitmask. + Value *AndXLower = Builder.CreateAnd(X, BitLoMask); + Value *AndYLower = Builder.CreateAnd(Y, BitLoMask); + Value *Add = Builder.CreateNUWAdd(AndXLower, AndYLower); + Value *Xor1 = Builder.CreateXor(X, Y); + Value *AndUpper = Builder.CreateAnd(Xor1, BitUpMask); + Value *Xor = Builder.CreateXor(Add, AndUpper); + return Xor; + } + + return nullptr; +} + // FIXME: We use commutative matchers (m_c_*) for some, but not all, matches // here. We should standardize that construct where it is needed or choose some // other way to ensure that commutated variants of patterns are not missed. @@ -4034,6 +4280,9 @@ Instruction *InstCombinerImpl::visitOr(BinaryOperator &I) { if (Value *V = SimplifyAddWithRemainder(I)) return replaceInstUsesWith(I, V); + if (Value *Res = foldBitFieldArithmetic(I, Builder)) + return replaceInstUsesWith(I, Res); + return nullptr; } diff --git a/llvm/test/Transforms/InstCombine/or.ll b/llvm/test/Transforms/InstCombine/or.ll index 5e1a4aa895f61c..8272650ddf53de 100644 --- a/llvm/test/Transforms/InstCombine/or.ll +++ b/llvm/test/Transforms/InstCombine/or.ll @@ -2035,12 +2035,12 @@ define i32 @or_xor_and_commuted3(i32 %x, i32 %y, i32 %z) { define i8 @src_2_bitfield_op(i8 %x, i8 %y) { ; CHECK-LABEL: @src_2_bitfield_op( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 -; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[BF_LSHR]], [[Y]] -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 11 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 20 +; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]] ; CHECK-NEXT: ret i8 [[BF_SET20]] ; entry: @@ -2056,11 +2056,10 @@ entry: define i8 @src_2_bitfield_const(i8 %x) { ; CHECK-LABEL: @src_2_bitfield_const( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR1228:%.*]] = add i8 [[X]], 8 -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1228]], 24 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[TMP0]], 9 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], 20 +; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i8 [[BF_SET20]] ; entry: @@ -2076,16 +2075,12 @@ entry: define i8 @src_3_bitfield_op(i8 %x, i8 %y) { ; CHECK-LABEL: @src_3_bitfield_op( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 -; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] -; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 -; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] -; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 -; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 107 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 107 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], -108 +; CHECK-NEXT: [[BF_SET33:%.*]] = xor i8 [[TMP2]], [[TMP4]] ; CHECK-NEXT: ret i8 [[BF_SET33]] ; entry: @@ -2105,14 +2100,10 @@ entry: define i8 @src_3_bitfield_const(i8 %x) { ; CHECK-LABEL: @src_3_bitfield_const( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8 -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] -; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X]], -32 -; CHECK-NEXT: [[BF_VALUE30:%.*]] = add i8 [[TMP0]], 32 -; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_VALUE30]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 107 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw i8 [[TMP0]], 41 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], -108 +; CHECK-NEXT: [[BF_SET33:%.*]] = xor i8 [[TMP1]], [[TMP2]] ; CHECK-NEXT: ret i8 [[BF_SET33]] ; entry: @@ -2192,12 +2183,12 @@ entry: define i8 @src_bit_arithmetic_bitsize_1_high(i8 %x, i8 %y) { ; CHECK-LABEL: @src_bit_arithmetic_bitsize_1_high( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 120 -; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 120 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 59 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 59 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 68 +; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -128 ; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] ; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -128 @@ -2250,12 +2241,12 @@ entry: define i8 @src_bit_arithmetic_bitmask_mid_over_high(i8 %x, i8 %y) { ; CHECK-LABEL: @src_bit_arithmetic_bitmask_mid_over_high( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 56 -; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 56 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 27 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 27 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 36 +; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 ; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] ; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -32 @@ -2308,12 +2299,12 @@ entry: define i8 @src_bit_arithmetic_bitmask_high_under_mid(i8 %x, i8 %y) { ; CHECK-LABEL: @src_bit_arithmetic_bitmask_high_under_mid( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[Y:%.*]], [[X:%.*]] -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR:%.*]] = and i8 [[X]], 24 -; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[BF_LSHR]], [[Y]] -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[Y:%.*]], 11 +; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i8 [[TMP0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = xor i8 [[X]], [[Y]] +; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[TMP3]], 20 +; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP2]], [[TMP4]] ; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -16 ; CHECK-NEXT: [[BF_LSHR2547:%.*]] = add i8 [[BF_LSHR22]], [[Y]] ; CHECK-NEXT: [[BF_VALUE30:%.*]] = and i8 [[BF_LSHR2547]], -16 @@ -2415,11 +2406,10 @@ entry: define i8 @src_bit_arithmetic_addition_under_bitmask_high(i8 %x) { ; CHECK-LABEL: @src_bit_arithmetic_addition_under_bitmask_high( ; CHECK-NEXT: entry: -; CHECK-NEXT: [[NARROW:%.*]] = add i8 [[X:%.*]], 1 -; CHECK-NEXT: [[BF_VALUE:%.*]] = and i8 [[NARROW]], 7 -; CHECK-NEXT: [[BF_LSHR1244:%.*]] = add i8 [[X]], 8 -; CHECK-NEXT: [[BF_SHL:%.*]] = and i8 [[BF_LSHR1244]], 24 -; CHECK-NEXT: [[BF_SET20:%.*]] = or disjoint i8 [[BF_VALUE]], [[BF_SHL]] +; CHECK-NEXT: [[TMP0:%.*]] = and i8 [[X:%.*]], 11 +; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i8 [[TMP0]], 9 +; CHECK-NEXT: [[TMP2:%.*]] = and i8 [[X]], 20 +; CHECK-NEXT: [[BF_SET20:%.*]] = xor i8 [[TMP1]], [[TMP2]] ; CHECK-NEXT: [[BF_LSHR22:%.*]] = and i8 [[X]], -32 ; CHECK-NEXT: [[BF_SET33:%.*]] = or disjoint i8 [[BF_SET20]], [[BF_LSHR22]] ; CHECK-NEXT: ret i8 [[BF_SET33]] From 11baae5d3ba38a4d8d39e127a3cecb7f4fd342e2 Mon Sep 17 00:00:00 2001 From: hanbeom Date: Thu, 18 Jul 2024 12:03:09 +0900 Subject: [PATCH 3/4] Change APInt type of the arguments of AccumulateY to const APInt --- llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 2fe94c3aa5ec73..528250a9b2e17c 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3575,8 +3575,8 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I, // If operand of bitfield operation is a constant, sum of the constants is // computed and returned. if operand is not a constant, operand is // returned. if this operation is not a bitfield operation, null is returned. - auto AccumulateY = [&](Value *LoY, Value *UpY, APInt LoMask, - APInt UpMask) -> Value * { + auto AccumulateY = [&](Value *LoY, Value *UpY, const APInt LoMask, + const APInt UpMask) -> Value * { Value *Y = nullptr; auto *CLoY = dyn_cast_or_null(LoY); auto *CUpY = dyn_cast_or_null(UpY); From 77ed1ab462a78cf47191fd3b1d0ba3b813b7839f Mon Sep 17 00:00:00 2001 From: hanbeom Date: Sun, 21 Jul 2024 21:33:53 +0900 Subject: [PATCH 4/4] Fix overwriting the same variable due to incorrect using --- llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 528250a9b2e17c..3e9bc75a2955f3 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -3604,7 +3604,8 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I, // BitFieldAddInfo. auto MatchBitFieldAdd = [&](BinaryOperator &I) -> std::optional { - const APInt *OptLoMask, *OptUpMask, *LoMask, *UpMask, *UpMask2 = nullptr; + const APInt *OptLoMask, *OptLoMask2 = nullptr, *OptUpMask, *LoMask, *UpMask, + *UpMask2 = nullptr; Value *X, *Y, *UpY; // Bitfield has more than 2 member. @@ -3631,7 +3632,7 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I, m_c_Xor(m_CombineOr( // When Y is not the constant. m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), - m_And(m_Value(Y), m_APInt(OptLoMask))), + m_And(m_Value(Y), m_APInt(OptLoMask2))), // When Y is Constant, it can be accumulated. m_c_Add(m_And(m_Value(X), m_APInt(OptLoMask)), m_Value(Y))), // If Y is a constant, X^Y&OptUpMask can be pre-computed and @@ -3662,7 +3663,8 @@ static Value *foldBitFieldArithmetic(BinaryOperator &I, } // Match already optimized bitfield operation. - if (match(&I, OptBitFieldAdd)) { + if (match(&I, OptBitFieldAdd) && + (OptLoMask2 == OptLoMask || OptLoMask2 == nullptr)) { APInt Mask = APInt::getBitsSet( BitWidth, BitWidth - OptUpMask->countl_zero(), BitWidth); APInt Mask2 = APInt::getBitsSet(