From a7d992c0f2d235c67f04160405c5c5606408d4b1 Mon Sep 17 00:00:00 2001 From: Johannes Doerfert Date: Sun, 8 Dec 2019 22:11:16 -0600 Subject: [PATCH] [ValueTracking] Allow context-sensitive nullness check for non-pointers Summary: Same as D60846 and D69571 but with a fix for the problem encountered after them. Both times it was a missing context adjustment in the handling of PHI nodes. The reproducers created from the bugs that caused the old commits to be reverted are included. Reviewers: nikic, nlopes, mkazantsev, spatel, dlrobertson, uabelho, hakzsam, hans Subscribers: hiraditya, bollu, asbirlea, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D71181 --- llvm/lib/Analysis/InstructionSimplify.cpp | 10 +++- llvm/lib/Analysis/ValueTracking.cpp | 36 ++++++++++---- llvm/test/Transforms/Attributor/nonnull.ll | 2 +- llvm/test/Transforms/InstCombine/known-non-zero.ll | 4 +- .../test/Transforms/InstSimplify/known-non-zero.ll | 57 +++++++++++++++++++--- llvm/test/Transforms/LICM/hoist-mustexec.ll | 4 +- 6 files changed, 89 insertions(+), 24 deletions(-) diff --git a/llvm/lib/Analysis/InstructionSimplify.cpp b/llvm/lib/Analysis/InstructionSimplify.cpp index 22a76a0..c27e256 100644 --- a/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/llvm/lib/Analysis/InstructionSimplify.cpp @@ -543,10 +543,16 @@ static Value *ThreadCmpOverPHI(CmpInst::Predicate Pred, Value *LHS, Value *RHS, // Evaluate the BinOp on the incoming phi values. Value *CommonValue = nullptr; - for (Value *Incoming : PI->incoming_values()) { + for (unsigned u = 0, e = PI->getNumIncomingValues(); u < e; ++u) { + Value *Incoming = PI->getIncomingValue(u); + Instruction *InTI = PI->getIncomingBlock(u)->getTerminator(); // If the incoming value is the phi node itself, it can safely be skipped. if (Incoming == PI) continue; - Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q, MaxRecurse); + // Change the context instruction to the "edge" that flows into the phi. + // This is important because that is where incoming is actually "evaluated" + // even though it is used later somewhere else. + Value *V = SimplifyCmpInst(Pred, Incoming, RHS, Q.getWithInstruction(InTI), + MaxRecurse); // If the operation failed to simplify, or simplified to a different value // to previously, then give up. if (!V || (CommonValue && V != CommonValue)) diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp index f46bae7..e35b235 100644 --- a/llvm/lib/Analysis/ValueTracking.cpp +++ b/llvm/lib/Analysis/ValueTracking.cpp @@ -1353,6 +1353,8 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, for (unsigned i = 0; i != 2; ++i) { Value *L = P->getIncomingValue(i); Value *R = P->getIncomingValue(!i); + Instruction *RInst = P->getIncomingBlock(!i)->getTerminator(); + Instruction *LInst = P->getIncomingBlock(i)->getTerminator(); Operator *LU = dyn_cast(L); if (!LU) continue; @@ -1374,13 +1376,22 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, L = LL; else continue; // Check for recurrence with L and R flipped. + + // Change the context instruction to the "edge" that flows into the + // phi. This is important because that is where the value is actually + // "evaluated" even though it is used later somewhere else. (see also + // D69571). + Query RecQ = Q; + // Ok, we have a PHI of the form L op= R. Check for low // zero bits. - computeKnownBits(R, Known2, Depth + 1, Q); + RecQ.CxtI = RInst; + computeKnownBits(R, Known2, Depth + 1, RecQ); // We need to take the minimum number of known bits KnownBits Known3(Known); - computeKnownBits(L, Known3, Depth + 1, Q); + RecQ.CxtI = LInst; + computeKnownBits(L, Known3, Depth + 1, RecQ); Known.Zero.setLowBits(std::min(Known2.countMinTrailingZeros(), Known3.countMinTrailingZeros())); @@ -1436,14 +1447,22 @@ static void computeKnownBitsFromOperator(const Operator *I, KnownBits &Known, Known.Zero.setAllBits(); Known.One.setAllBits(); - for (Value *IncValue : P->incoming_values()) { + for (unsigned u = 0, e = P->getNumIncomingValues(); u < e; ++u) { + Value *IncValue = P->getIncomingValue(u); // Skip direct self references. if (IncValue == P) continue; + // Change the context instruction to the "edge" that flows into the + // phi. This is important because that is where the value is actually + // "evaluated" even though it is used later somewhere else. (see also + // D69571). + Query RecQ = Q; + RecQ.CxtI = P->getIncomingBlock(u)->getTerminator(); + Known2 = KnownBits(BitWidth); // Recurse, but cap the recursion to one level, because we don't // want to waste time spinning around in loops. - computeKnownBits(IncValue, Known2, MaxDepth - 1, Q); + computeKnownBits(IncValue, Known2, MaxDepth - 1, RecQ); Known.Zero &= Known2.Zero; Known.One &= Known2.One; // If all bits have been ruled out, there's no need to check @@ -1902,8 +1921,8 @@ static bool isGEPKnownNonNull(const GEPOperator *GEP, unsigned Depth, static bool isKnownNonNullFromDominatingCondition(const Value *V, const Instruction *CtxI, const DominatorTree *DT) { - assert(V->getType()->isPointerTy() && "V must be pointer type"); - assert(!isa(V) && "Did not expect ConstantPointerNull"); + if (isa(V)) + return false; if (!CtxI || !DT) return false; @@ -2078,12 +2097,11 @@ bool isKnownNonZero(const Value *V, unsigned Depth, const Query &Q) { } } + if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) + return true; // Check for recursive pointer simplifications. if (V->getType()->isPointerTy()) { - if (isKnownNonNullFromDominatingCondition(V, Q.CxtI, Q.DT)) - return true; - // Look through bitcast operations, GEPs, and int2ptr instructions as they // do not alter the value, or at least not the nullness property of the // value, e.g., int2ptr is allowed to zero/sign extend the value. diff --git a/llvm/test/Transforms/Attributor/nonnull.ll b/llvm/test/Transforms/Attributor/nonnull.ll index 260f04c..97485b76 100644 --- a/llvm/test/Transforms/Attributor/nonnull.ll +++ b/llvm/test/Transforms/Attributor/nonnull.ll @@ -789,7 +789,7 @@ define void @PR43833_simple(i32* %0, i32 %1) { ; ATTRIBUTOR_NPM-NEXT: ret void ; ATTRIBUTOR_NPM: 8: ; ATTRIBUTOR_NPM-NEXT: [[TMP9:%.*]] = phi i32 [ 1, [[TMP4]] ], [ [[TMP10:%.*]], [[TMP8]] ] -; ATTRIBUTOR_NPM-NEXT: tail call void @sink(i32* [[TMP6]]) +; ATTRIBUTOR_NPM-NEXT: tail call void @sink(i32* nonnull [[TMP6]]) ; ATTRIBUTOR_NPM-NEXT: [[TMP10]] = add nuw nsw i32 [[TMP9]], 1 ; ATTRIBUTOR_NPM-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], [[TMP1]] ; ATTRIBUTOR_NPM-NEXT: br i1 [[TMP11]], label [[TMP7]], label [[TMP8]] diff --git a/llvm/test/Transforms/InstCombine/known-non-zero.ll b/llvm/test/Transforms/InstCombine/known-non-zero.ll index 57980e0..5467db5 100644 --- a/llvm/test/Transforms/InstCombine/known-non-zero.ll +++ b/llvm/test/Transforms/InstCombine/known-non-zero.ll @@ -13,7 +13,7 @@ define i32 @test0(i64 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[NON_ZERO:%.*]] ; CHECK: non_zero: -; CHECK-NEXT: [[CTZ:%.*]] = call i64 @llvm.cttz.i64(i64 [[X]], i1 false), !range !0 +; CHECK-NEXT: [[CTZ:%.*]] = call i64 @llvm.cttz.i64(i64 [[X]], i1 true), !range !0 ; CHECK-NEXT: [[CTZ32:%.*]] = trunc i64 [[CTZ]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -40,7 +40,7 @@ define i32 @test1(i64 %x) { ; CHECK-NEXT: [[C:%.*]] = icmp eq i64 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[C]], label [[EXIT:%.*]], label [[NON_ZERO:%.*]] ; CHECK: non_zero: -; CHECK-NEXT: [[CTZ:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X]], i1 false), !range !0 +; CHECK-NEXT: [[CTZ:%.*]] = call i64 @llvm.ctlz.i64(i64 [[X]], i1 true), !range !0 ; CHECK-NEXT: [[CTZ32:%.*]] = trunc i64 [[CTZ]] to i32 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: diff --git a/llvm/test/Transforms/InstSimplify/known-non-zero.ll b/llvm/test/Transforms/InstSimplify/known-non-zero.ll index 7e819e8..524e51b 100644 --- a/llvm/test/Transforms/InstSimplify/known-non-zero.ll +++ b/llvm/test/Transforms/InstSimplify/known-non-zero.ll @@ -7,8 +7,7 @@ define i64 @test0(i64 %x) { ; CHECK-NEXT: [[A:%.*]] = icmp eq i64 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[A]], label [[EXIT:%.*]], label [[NON_ZERO:%.*]] ; CHECK: non_zero: -; CHECK-NEXT: [[B:%.*]] = icmp eq i64 [[X]], 0 -; CHECK-NEXT: br i1 [[B]], label [[UNREACHABLE:%.*]], label [[EXIT]] +; CHECK-NEXT: br i1 false, label [[UNREACHABLE:%.*]], label [[EXIT]] ; CHECK: unreachable: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -37,8 +36,7 @@ define i64 @test1(i64 %x) { ; CHECK-NEXT: [[A:%.*]] = icmp eq i64 [[X:%.*]], 0 ; CHECK-NEXT: br i1 [[A]], label [[EXIT:%.*]], label [[NON_ZERO:%.*]] ; CHECK: non_zero: -; CHECK-NEXT: [[B:%.*]] = icmp ugt i64 [[X]], 0 -; CHECK-NEXT: br i1 [[B]], label [[EXIT]], label [[UNREACHABLE:%.*]] +; CHECK-NEXT: br i1 true, label [[EXIT]], label [[UNREACHABLE:%.*]] ; CHECK: unreachable: ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: @@ -73,11 +71,9 @@ define i1 @test2(i64 %x, i1 %y) { ; CHECK: two: ; CHECK-NEXT: br label [[MAINBLOCK]] ; CHECK: mainblock: -; CHECK-NEXT: [[P:%.*]] = phi i64 [ [[X]], [[ONE]] ], [ 42, [[TWO]] ] -; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 [[P]], 0 ; CHECK-NEXT: br label [[EXIT]] ; CHECK: exit: -; CHECK-NEXT: [[RES:%.*]] = phi i1 [ [[CMP]], [[MAINBLOCK]] ], [ true, [[START:%.*]] ] +; CHECK-NEXT: [[RES:%.*]] = phi i1 [ false, [[MAINBLOCK]] ], [ true, [[START:%.*]] ] ; CHECK-NEXT: ret i1 [[RES]] ; start: @@ -102,3 +98,50 @@ exit: %res = phi i1 [ %cmp, %mainblock ], [ 1, %start ] ret i1 %res } + + +; The code below exposed a bug similar to the one exposed by D60846, see the commit 6ea477590085. +; In a nutshell, we should not replace %result.0 with 0 here. + +define zeroext i8 @update_phi_query_loc_in_recursive_call(i8* nocapture readonly %p){ +; CHECK-LABEL: @update_phi_query_loc_in_recursive_call( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[FOR_COND:%.*]] +; CHECK: for.cond: +; CHECK-NEXT: [[RESULT_0:%.*]] = phi i8 [ 0, [[ENTRY:%.*]] ], [ [[CONV2:%.*]], [[FOR_BODY:%.*]] ] +; CHECK-NEXT: [[SHIFT_0:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 1, [[FOR_BODY]] ] +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[SHIFT_0]], 0 +; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]] +; CHECK: for.cond.cleanup: +; CHECK-NEXT: ret i8 [[RESULT_0]] +; CHECK: for.body: +; CHECK-NEXT: [[TMP0:%.*]] = load i8, i8* [[P:%.*]], align 1 +; CHECK-NEXT: [[CONV:%.*]] = zext i8 [[TMP0]] to i32 +; CHECK-NEXT: [[MUL:%.*]] = shl nuw nsw i32 [[SHIFT_0]], 3 +; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw i32 [[CONV]], [[MUL]] +; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[SHL]] to i8 +; CHECK-NEXT: [[CONV2]] = or i8 [[RESULT_0]], [[TMP1]] +; CHECK-NEXT: br label [[FOR_COND]] +; +entry: + br label %for.cond + +for.cond: ; preds = %for.body, %entry + %result.0 = phi i8 [ 0, %entry ], [ %conv2, %for.body ] + %shift.0 = phi i32 [ 0, %entry ], [ 1, %for.body ] + %cmp = icmp eq i32 %shift.0, 0 + br i1 %cmp, label %for.body, label %for.cond.cleanup + +for.cond.cleanup: ; preds = %for.cond + ret i8 %result.0 + +for.body: ; preds = %for.cond + %0 = load i8, i8* %p, align 1 + %conv = zext i8 %0 to i32 + %mul = shl nuw nsw i32 %shift.0, 3 + %shl = shl nuw nsw i32 %conv, %mul + %1 = trunc i32 %shl to i8 + %conv2 = or i8 %result.0, %1 + %inc = add nuw nsw i32 %shift.0, 1 + br label %for.cond +} diff --git a/llvm/test/Transforms/LICM/hoist-mustexec.ll b/llvm/test/Transforms/LICM/hoist-mustexec.ll index 521d352..59184eb 100644 --- a/llvm/test/Transforms/LICM/hoist-mustexec.ll +++ b/llvm/test/Transforms/LICM/hoist-mustexec.ll @@ -129,8 +129,6 @@ fail: } ; requires fact length is non-zero -; TODO: IsKnownNonNullFromDominatingConditions is currently only be done for -; pointers; should handle integers too define i32 @test4(i32* noalias nocapture readonly %a) nounwind uwtable { ; CHECK-LABEL: @test4( ; CHECK-NEXT: entry: @@ -138,6 +136,7 @@ define i32 @test4(i32* noalias nocapture readonly %a) nounwind uwtable { ; CHECK-NEXT: [[IS_ZERO:%.*]] = icmp eq i32 [[LEN]], 0 ; CHECK-NEXT: br i1 [[IS_ZERO]], label [[FAIL:%.*]], label [[PREHEADER:%.*]] ; CHECK: preheader: +; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: br label [[FOR_BODY:%.*]] ; CHECK: for.body: ; CHECK-NEXT: [[IV:%.*]] = phi i32 [ 0, [[PREHEADER]] ], [ [[INC:%.*]], [[CONTINUE:%.*]] ] @@ -145,7 +144,6 @@ define i32 @test4(i32* noalias nocapture readonly %a) nounwind uwtable { ; CHECK-NEXT: [[R_CHK:%.*]] = icmp ult i32 [[IV]], [[LEN]] ; CHECK-NEXT: br i1 [[R_CHK]], label [[CONTINUE]], label [[FAIL_LOOPEXIT:%.*]] ; CHECK: continue: -; CHECK-NEXT: [[I1:%.*]] = load i32, i32* [[A]], align 4 ; CHECK-NEXT: [[ADD]] = add nsw i32 [[I1]], [[ACC]] ; CHECK-NEXT: [[INC]] = add nuw nsw i32 [[IV]], 1 ; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 1000 -- 2.7.4