From 4abab5c5ca7b562b80fdb5fb6279e6d2104dae16 Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Sat, 25 Apr 2020 11:25:03 -0400 Subject: [PATCH] [InstCombine] generalize canonicalization of masked equality comparisons (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC We have more analyis for 'and' patterns and already lean this way in the existing code, so this should be neutral or better in IR. If this does not do as well in codegen, the problem already exists and we should fix that based on target costs/heuristics. http://volta.cs.utah.edu:8080/z/oP3ecL define void @src(i8 %x, i8 %OrC, i8 %C, i1* %p0, i1* %p1) { %or = or i8 %x, %OrC %eq = icmp eq i8 %or, %C store i1 %eq, i1* %p0 %ne = icmp ne i8 %or, %C store i1 %ne, i1* %p1 ret void } define void @tgt(i8 %x, i8 %OrC, i8 %C, i1* %p0, i1* %p1) { %NotOrC = xor i8 %OrC, -1 %a = and i8 %x, %NotOrC %NewC = xor i8 %C, %OrC %eq = icmp eq i8 %a, %NewC store i1 %eq, i1* %p0 %ne = icmp ne i8 %a, %NewC store i1 %ne, i1* %p1 ret void } --- .../Transforms/InstCombine/InstCombineCompares.cpp | 24 +++++++++++-------- llvm/test/Transforms/InstCombine/and-or-icmps.ll | 28 +++++++++++----------- llvm/test/Transforms/InstCombine/assume2.ll | 8 +++---- llvm/test/Transforms/InstCombine/icmp-or.ll | 8 +++---- llvm/test/Transforms/InstCombine/icmp.ll | 16 ++++++------- llvm/test/Transforms/InstCombine/load-cmp.ll | 4 ++-- 6 files changed, 46 insertions(+), 42 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp index 37ed6a0..3a9a46f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCompares.cpp @@ -1886,20 +1886,24 @@ Instruction *InstCombiner::foldICmpOrConstant(ICmpInst &Cmp, BinaryOperator *Or, } Value *OrOp0 = Or->getOperand(0), *OrOp1 = Or->getOperand(1); - if (Cmp.isEquality() && Cmp.getOperand(1) == OrOp1) { - // X | C == C --> X <=u C - // X | C != C --> X >u C - // iff C+1 is a power of 2 (C is a bitmask of the low bits) - if ((C + 1).isPowerOf2()) { + const APInt *MaskC; + if (match(OrOp1, m_APInt(MaskC)) && Cmp.isEquality()) { + if (*MaskC == C && (C + 1).isPowerOf2()) { + // X | C == C --> X <=u C + // X | C != C --> X >u C + // iff C+1 is a power of 2 (C is a bitmask of the low bits) Pred = (Pred == CmpInst::ICMP_EQ) ? CmpInst::ICMP_ULE : CmpInst::ICMP_UGT; return new ICmpInst(Pred, OrOp0, OrOp1); } - // More general: are all bits outside of a mask constant set or not set? - // X | C == C --> (X & ~C) == 0 - // X | C != C --> (X & ~C) != 0 + + // More general: canonicalize 'equality with set bits mask' to + // 'equality with clear bits mask'. + // (X | MaskC) == C --> (X & ~MaskC) == C ^ MaskC + // (X | MaskC) != C --> (X & ~MaskC) != C ^ MaskC if (Or->hasOneUse()) { - Value *A = Builder.CreateAnd(OrOp0, ~C); - return new ICmpInst(Pred, A, ConstantInt::getNullValue(OrOp0->getType())); + Value *And = Builder.CreateAnd(OrOp0, ~(*MaskC)); + Constant *NewC = ConstantInt::get(Or->getType(), C ^ (*MaskC)); + return new ICmpInst(Pred, And, NewC); } } diff --git a/llvm/test/Transforms/InstCombine/and-or-icmps.ll b/llvm/test/Transforms/InstCombine/and-or-icmps.ll index cf82275..56d5b04 100644 --- a/llvm/test/Transforms/InstCombine/and-or-icmps.ll +++ b/llvm/test/Transforms/InstCombine/and-or-icmps.ll @@ -43,8 +43,8 @@ define i1 @PR2330(i32 %a, i32 %b) { define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 50 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp eq i32 %x, 50 @@ -57,8 +57,8 @@ define i1 @or_eq_with_one_bit_diff_constants1(i32 %x) { define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants1( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 51 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 50 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp ne i32 %x, 51 @@ -71,8 +71,8 @@ define i1 @and_ne_with_one_bit_diff_constants1(i32 %x) { define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 32 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -33 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 65 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp eq i32 %x, 97 @@ -83,8 +83,8 @@ define i1 @or_eq_with_one_bit_diff_constants2(i32 %x) { define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants2( -; CHECK-NEXT: [[TMP1:%.*]] = or i19 [[X:%.*]], 128 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 193 +; CHECK-NEXT: [[TMP1:%.*]] = and i19 [[X:%.*]], -129 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i19 [[TMP1]], 65 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp ne i19 %x, 65 @@ -97,8 +97,8 @@ define i1 @and_ne_with_one_bit_diff_constants2(i19 %x) { define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants3( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], -2 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i8 [[TMP1]], 126 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp eq i8 %x, 254 @@ -109,8 +109,8 @@ define i1 @or_eq_with_one_bit_diff_constants3(i8 %x) { define i1 @and_ne_with_one_bit_diff_constants3(i8 %x) { ; CHECK-LABEL: @and_ne_with_one_bit_diff_constants3( -; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[X:%.*]], -128 -; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], -63 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], 127 +; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i8 [[TMP1]], 65 ; CHECK-NEXT: ret i1 [[TMP2]] ; %cmp1 = icmp ne i8 %x, 65 @@ -179,8 +179,8 @@ define i1 @and_ne_with_diff_one_signed(i64 %x) { define <2 x i1> @or_eq_with_one_bit_diff_constants2_splatvec(<2 x i32> %x) { ; CHECK-LABEL: @or_eq_with_one_bit_diff_constants2_splatvec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[TMP2]] ; %cmp1 = icmp eq <2 x i32> %x, diff --git a/llvm/test/Transforms/InstCombine/assume2.ll b/llvm/test/Transforms/InstCombine/assume2.ll index 8dc8831..964b91a 100644 --- a/llvm/test/Transforms/InstCombine/assume2.ll +++ b/llvm/test/Transforms/InstCombine/assume2.ll @@ -36,8 +36,8 @@ define i32 @test2(i32 %a) #0 { define i32 @test3(i32 %a) #0 { ; CHECK-LABEL: @test3( -; CHECK-NEXT: [[V:%.*]] = or i32 [[A:%.*]], -16 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], -11 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 5 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 5 ; @@ -50,8 +50,8 @@ define i32 @test3(i32 %a) #0 { define i32 @test4(i32 %a) #0 { ; CHECK-LABEL: @test4( -; CHECK-NEXT: [[V:%.*]] = or i32 [[A:%.*]], -16 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[V]], -6 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[A:%.*]], 15 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 10 ; CHECK-NEXT: tail call void @llvm.assume(i1 [[CMP]]) ; CHECK-NEXT: ret i32 2 ; diff --git a/llvm/test/Transforms/InstCombine/icmp-or.ll b/llvm/test/Transforms/InstCombine/icmp-or.ll index 5fd19ed..ac2274e 100644 --- a/llvm/test/Transforms/InstCombine/icmp-or.ll +++ b/llvm/test/Transforms/InstCombine/icmp-or.ll @@ -3,8 +3,8 @@ define i1 @set_low_bit_mask_eq(i8 %x) { ; CHECK-LABEL: @set_low_bit_mask_eq( -; CHECK-NEXT: [[SUB:%.*]] = or i8 [[X:%.*]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[SUB]], 19 +; CHECK-NEXT: [[TMP1:%.*]] = and i8 [[X:%.*]], -2 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[TMP1]], 18 ; CHECK-NEXT: ret i1 [[CMP]] ; %sub = or i8 %x, 1 @@ -14,8 +14,8 @@ define i1 @set_low_bit_mask_eq(i8 %x) { define <2 x i1> @set_low_bit_mask_ne(<2 x i8> %x) { ; CHECK-LABEL: @set_low_bit_mask_ne( -; CHECK-NEXT: [[SUB:%.*]] = or <2 x i8> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[SUB]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i8> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i8> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %sub = or <2 x i8> %x, diff --git a/llvm/test/Transforms/InstCombine/icmp.ll b/llvm/test/Transforms/InstCombine/icmp.ll index 4786c67..67d254a 100644 --- a/llvm/test/Transforms/InstCombine/icmp.ll +++ b/llvm/test/Transforms/InstCombine/icmp.ll @@ -2163,8 +2163,8 @@ define <2 x i1> @icmp_add_X_-14_ult_2_vec(<2 x i32> %X) { define i1 @icmp_sub_3_X_ult_2(i32 %X) { ; CHECK-LABEL: @icmp_sub_3_X_ult_2( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; %add = sub i32 3, %X @@ -2174,8 +2174,8 @@ define i1 @icmp_sub_3_X_ult_2(i32 %X) { define <2 x i1> @icmp_sub_3_X_ult_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_sub_3_X_ult_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp eq <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = sub <2 x i32> , %X @@ -2207,8 +2207,8 @@ define <2 x i1> @icmp_add_X_-14_uge_2_vec(<2 x i32> %X) { define i1 @icmp_sub_3_X_uge_2(i32 %X) { ; CHECK-LABEL: @icmp_sub_3_X_uge_2( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 -; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 3 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 +; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 2 ; CHECK-NEXT: ret i1 [[CMP]] ; %add = sub i32 3, %X @@ -2218,8 +2218,8 @@ define i1 @icmp_sub_3_X_uge_2(i32 %X) { define <2 x i1> @icmp_sub_3_X_uge_2_vec(<2 x i32> %X) { ; CHECK-LABEL: @icmp_sub_3_X_uge_2_vec( -; CHECK-NEXT: [[TMP1:%.*]] = or <2 x i32> [[X:%.*]], -; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], +; CHECK-NEXT: [[TMP1:%.*]] = and <2 x i32> [[X:%.*]], +; CHECK-NEXT: [[CMP:%.*]] = icmp ne <2 x i32> [[TMP1]], ; CHECK-NEXT: ret <2 x i1> [[CMP]] ; %add = sub <2 x i32> , %X diff --git a/llvm/test/Transforms/InstCombine/load-cmp.ll b/llvm/test/Transforms/InstCombine/load-cmp.ll index 7b24ce3..84f692d 100644 --- a/llvm/test/Transforms/InstCombine/load-cmp.ll +++ b/llvm/test/Transforms/InstCombine/load-cmp.ll @@ -156,8 +156,8 @@ define i1 @test7(i32 %X) { define i1 @test8(i32 %X) { ; CHECK-LABEL: @test8( -; CHECK-NEXT: [[TMP1:%.*]] = or i32 [[X:%.*]], 1 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9 +; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[X:%.*]], -2 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 8 ; CHECK-NEXT: ret i1 [[TMP2]] ; %P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X -- 2.7.4