From a8f7dee1dffbf8c18acbcc7b1f6d659bf808798e Mon Sep 17 00:00:00 2001 From: Nikita Popov Date: Tue, 11 May 2021 23:01:29 +0200 Subject: [PATCH] [InstCombine] Support one-hot merge for logical and/or If a logical and/or is used, we need to be careful not to propagate a potential poison value from the RHS by inserting a freeze instruction. Otherwise it works the same way as bitwise and/or. This is intended to address the regression reported at https://reviews.llvm.org/D101191#2751002. Differential Revision: https://reviews.llvm.org/D102279 --- .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 7 +- .../Transforms/InstCombine/InstCombineInternal.h | 3 +- .../Transforms/InstCombine/InstCombineSelect.cpp | 6 + llvm/test/Transforms/InstCombine/onehot_merge.ll | 130 ++++++++++----------- 4 files changed, 76 insertions(+), 70 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 86056a4..c9e713d 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -799,7 +799,8 @@ foldAndOrOfEqualityCmpsWithConstants(ICmpInst *LHS, ICmpInst *RHS, Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, Instruction *CxtI, - bool IsAnd) { + bool IsAnd, + bool IsLogical) { CmpInst::Predicate Pred = IsAnd ? CmpInst::ICMP_NE : CmpInst::ICMP_EQ; if (LHS->getPredicate() != Pred || RHS->getPredicate() != Pred) return nullptr; @@ -819,6 +820,10 @@ Value *InstCombinerImpl::foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, if (L1 == R1 && isKnownToBeAPowerOfTwo(L2, false, 0, CxtI) && isKnownToBeAPowerOfTwo(R2, false, 0, CxtI)) { + // If this is a logical and/or, then we must prevent propagation of a + // poison value from the RHS by inserting freeze. + if (IsLogical) + R2 = Builder.CreateFreeze(R2); Value *Mask = Builder.CreateOr(L2, R2); Value *Masked = Builder.CreateAnd(L1, Mask); auto NewPred = IsAnd ? CmpInst::ICMP_EQ : CmpInst::ICMP_NE; diff --git a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h index a3eabfd..896fa9f 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineInternal.h +++ b/llvm/lib/Transforms/InstCombine/InstCombineInternal.h @@ -350,7 +350,8 @@ private: Value *foldLogicOfFCmps(FCmpInst *LHS, FCmpInst *RHS, bool IsAnd); Value *foldAndOrOfICmpsOfAndWithPow2(ICmpInst *LHS, ICmpInst *RHS, - Instruction *CxtI, bool IsAnd); + Instruction *CxtI, bool IsAnd, + bool IsLogical = false); Value *matchSelectFromAndOr(Value *A, Value *B, Value *C, Value *D); Value *getSelectCondition(Value *A, Value *B); diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index e203efb..50dc1a7 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -2733,6 +2733,12 @@ Instruction *InstCombinerImpl::visitSelectInst(SelectInst &SI) { if (auto *I = foldAndOrOfSelectUsingImpliedCond(CondVal, *Op1SI, /* IsAnd */ IsAnd)) return I; + + if (auto *ICmp0 = dyn_cast(CondVal)) + if (auto *ICmp1 = dyn_cast(Op1)) + if (auto *V = foldAndOrOfICmpsOfAndWithPow2(ICmp0, ICmp1, &SI, IsAnd, + /* IsLogical */ true)) + return replaceInstUsesWith(SI, V); } // select (select a, true, b), c, false -> select a, c, false diff --git a/llvm/test/Transforms/InstCombine/onehot_merge.ll b/llvm/test/Transforms/InstCombine/onehot_merge.ll index 5b31aa0..8cde517 100644 --- a/llvm/test/Transforms/InstCombine/onehot_merge.ll +++ b/llvm/test/Transforms/InstCombine/onehot_merge.ll @@ -66,12 +66,11 @@ define i1 @foo1_and_logical(i32 %k, i32 %c1, i32 %c2) { ; CHECK-LABEL: @foo1_and_logical( ; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[T]], [[K:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 -; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], [[K]] -; CHECK-NEXT: [[T6:%.*]] = icmp eq i32 [[T5]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T2]], i1 true, i1 [[T6]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t = shl i32 1, %c1 %t4 = shl i32 1, %c2 @@ -129,12 +128,11 @@ define i1 @foo1_and_commuted_logical(i32 %k, i32 %c1, i32 %c2) { ; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]] ; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[K2]], [[T]] -; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 -; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], [[K2]] -; CHECK-NEXT: [[T6:%.*]] = icmp eq i32 [[T5]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T2]], i1 true, i1 [[T6]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[K2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %k2 = mul i32 %k, %k ; to trick the complexity sorting %t = shl i32 1, %c1 @@ -233,12 +231,11 @@ define i1 @foo1_or_logical(i32 %k, i32 %c1, i32 %c2) { ; CHECK-LABEL: @foo1_or_logical( ; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[T]], [[K:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 -; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], [[K]] -; CHECK-NEXT: [[T6:%.*]] = icmp ne i32 [[T5]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T2]], i1 [[T6]], i1 false -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t = shl i32 1, %c1 %t4 = shl i32 1, %c2 @@ -296,12 +293,11 @@ define i1 @foo1_or_commuted_logical(i32 %k, i32 %c1, i32 %c2) { ; CHECK-NEXT: [[K2:%.*]] = mul i32 [[K:%.*]], [[K]] ; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T4:%.*]] = shl i32 1, [[C2:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[K2]], [[T]] -; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 -; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], [[K2]] -; CHECK-NEXT: [[T6:%.*]] = icmp ne i32 [[T5]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T2]], i1 [[T6]], i1 false -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[K2]], [[TMP2]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %k2 = mul i32 %k, %k ; to trick the complexity sorting %t = shl i32 1, %c1 @@ -358,12 +354,11 @@ define i1 @foo1_and_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { ; CHECK-LABEL: @foo1_and_signbit_lshr_logical( ; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T4:%.*]] = lshr i32 -2147483648, [[C2:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[T]], [[K:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp eq i32 [[T1]], 0 -; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], [[K]] -; CHECK-NEXT: [[T6:%.*]] = icmp eq i32 [[T5]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T2]], i1 true, i1 [[T6]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t = shl i32 1, %c1 %t4 = lshr i32 -2147483648, %c2 @@ -417,12 +412,11 @@ define i1 @foo1_or_signbit_lshr_logical(i32 %k, i32 %c1, i32 %c2) { ; CHECK-LABEL: @foo1_or_signbit_lshr_logical( ; CHECK-NEXT: [[T:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T4:%.*]] = lshr i32 -2147483648, [[C2:%.*]] -; CHECK-NEXT: [[T1:%.*]] = and i32 [[T]], [[K:%.*]] -; CHECK-NEXT: [[T2:%.*]] = icmp ne i32 [[T1]], 0 -; CHECK-NEXT: [[T5:%.*]] = and i32 [[T4]], [[K]] -; CHECK-NEXT: [[T6:%.*]] = icmp ne i32 [[T5]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T2]], i1 [[T6]], i1 false -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T4]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t = shl i32 1, %c1 %t4 = lshr i32 -2147483648, %c2 @@ -628,12 +622,11 @@ define i1 @foo1_and_extra_use_shl_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: store i32 [[T0]], i32* [[P:%.*]], align 4 ; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] -; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] -; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K]] -; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T3]], i1 true, i1 [[T5]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t0 = shl i32 1, %c1 store i32 %t0, i32* %p ; extra use of shl @@ -675,11 +668,11 @@ define i1 @foo1_and_extra_use_and_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] ; CHECK-NEXT: store i32 [[T2]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K]] -; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T3]], i1 true, i1 [[T5]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t0 = shl i32 1, %c1 %t1 = shl i32 1, %c2 @@ -723,10 +716,11 @@ define i1 @foo1_and_extra_use_cmp_logical(i32 %k, i32 %c1, i32 %c2, i1* %p) { ; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] ; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 ; CHECK-NEXT: store i1 [[T3]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K]] -; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T3]], i1 true, i1 [[T5]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t0 = shl i32 1, %c1 %t1 = shl i32 1, %c2 @@ -766,12 +760,11 @@ define i1 @foo1_and_extra_use_shl2_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] ; CHECK-NEXT: store i32 [[T1]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] -; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K]] -; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T3]], i1 true, i1 [[T5]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K:%.*]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t0 = shl i32 1, %c1 %t1 = shl i32 1, %c2 @@ -811,13 +804,13 @@ define i1 @foo1_and_extra_use_and2_logical(i32 %k, i32 %c1, i32 %c2, i32* %p) { ; CHECK-LABEL: @foo1_and_extra_use_and2_logical( ; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] -; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] -; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K]] +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K:%.*]] ; CHECK-NEXT: store i32 [[T4]], i32* [[P:%.*]], align 4 -; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], 0 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T3]], i1 true, i1 [[T5]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t0 = shl i32 1, %c1 %t1 = shl i32 1, %c2 @@ -858,13 +851,14 @@ define i1 @foo1_and_extra_use_cmp2_logical(i32 %k, i32 %c1, i32 %c2, i1* %p) { ; CHECK-LABEL: @foo1_and_extra_use_cmp2_logical( ; CHECK-NEXT: [[T0:%.*]] = shl i32 1, [[C1:%.*]] ; CHECK-NEXT: [[T1:%.*]] = shl i32 1, [[C2:%.*]] -; CHECK-NEXT: [[T2:%.*]] = and i32 [[T0]], [[K:%.*]] -; CHECK-NEXT: [[T3:%.*]] = icmp eq i32 [[T2]], 0 -; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K]] +; CHECK-NEXT: [[T4:%.*]] = and i32 [[T1]], [[K:%.*]] ; CHECK-NEXT: [[T5:%.*]] = icmp eq i32 [[T4]], 0 ; CHECK-NEXT: store i1 [[T5]], i1* [[P:%.*]], align 1 -; CHECK-NEXT: [[OR:%.*]] = select i1 [[T3]], i1 true, i1 [[T5]] -; CHECK-NEXT: ret i1 [[OR]] +; CHECK-NEXT: [[TMP1:%.*]] = freeze i32 [[T1]] +; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[T0]], [[TMP1]] +; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], [[K]] +; CHECK-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], [[TMP2]] +; CHECK-NEXT: ret i1 [[TMP4]] ; %t0 = shl i32 1, %c1 %t1 = shl i32 1, %c2 -- 2.7.4