const unsigned MaxDepth = 6;
-/// Enable an experimental feature to leverage information about dominating
-/// conditions to compute known bits. The individual options below control how
-/// hard we search. The defaults are chosen to be fairly aggressive. If you
-/// run into compile time problems when testing, scale them back and report
-/// your findings.
-static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
- cl::Hidden, cl::init(false));
-
-// This is expensive, so we only do it for the top level query value.
-// (TODO: evaluate cost vs profit, consider higher thresholds)
-static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
- cl::Hidden, cl::init(1));
-
-/// How many dominating blocks should be scanned looking for dominating
-/// conditions?
-static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
- cl::Hidden,
- cl::init(20));
-
// Controls the number of uses of the value searched for possible
// dominating comparisons.
static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
}
-/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
-/// true (at the context instruction.) This is mostly a utility function for
-/// the prototype dominating conditions reasoning below.
-static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
- APInt &KnownZero,
- APInt &KnownOne,
- unsigned Depth, const Query &Q) {
- Value *LHS = Cmp->getOperand(0);
- Value *RHS = Cmp->getOperand(1);
- // TODO: We could potentially be more aggressive here. This would be worth
- // evaluating. If we can, explore commoning this code with the assume
- // handling logic.
- if (LHS != V && RHS != V)
- return;
-
- const unsigned BitWidth = KnownZero.getBitWidth();
-
- switch (Cmp->getPredicate()) {
- default:
- // We know nothing from this condition
- break;
- // TODO: implement unsigned bound from below (known one bits)
- // TODO: common condition check implementations with assumes
- // TODO: implement other patterns from assume (e.g. V & B == A)
- case ICmpInst::ICMP_SGT:
- if (LHS == V) {
- APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
- computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
- if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
- // We know that the sign bit is zero.
- KnownZero |= APInt::getSignBit(BitWidth);
- }
- }
- break;
- case ICmpInst::ICMP_EQ:
- {
- APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
- if (LHS == V)
- computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
- else if (RHS == V)
- computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
- else
- llvm_unreachable("missing use?");
- KnownZero |= KnownZeroTemp;
- KnownOne |= KnownOneTemp;
- }
- break;
- case ICmpInst::ICMP_ULE:
- if (LHS == V) {
- APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
- computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
- // The known zero bits carry over
- unsigned SignBits = KnownZeroTemp.countLeadingOnes();
- KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
- }
- break;
- case ICmpInst::ICMP_ULT:
- if (LHS == V) {
- APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
- computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
- // Whatever high bits in rhs are zero are known to be zero (if rhs is a
- // power of 2, then one more).
- unsigned SignBits = KnownZeroTemp.countLeadingOnes();
- if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp)))
- SignBits++;
- KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
- }
- break;
- };
-}
-
-/// Compute known bits in 'V' from conditions which are known to be true along
-/// all paths leading to the context instruction. In particular, look for
-/// cases where one branch of an interesting condition dominates the context
-/// instruction. This does not do general dataflow.
-/// NOTE: This code is EXPERIMENTAL and currently off by default.
-static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
- APInt &KnownOne,
- unsigned Depth,
- const Query &Q) {
- // Need both the dominator tree and the query location to do anything useful
- if (!Q.DT || !Q.CxtI)
- return;
- Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
- // The context instruction might be in a statically unreachable block. If
- // so, asking dominator queries may yield suprising results. (e.g. the block
- // may not have a dom tree node)
- if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
- return;
-
- // Avoid useless work
- if (auto VI = dyn_cast<Instruction>(V))
- if (VI->getParent() == Cxt->getParent())
- return;
-
- // Note: We currently implement two options. It's not clear which of these
- // will survive long term, we need data for that.
- // Option 1 - Try walking the dominator tree looking for conditions which
- // might apply. This works well for local conditions (loop guards, etc..),
- // but not as well for things far from the context instruction (presuming a
- // low max blocks explored). If we can set an high enough limit, this would
- // be all we need.
- // Option 2 - We restrict out search to those conditions which are uses of
- // the value we're interested in. This is independent of dom structure,
- // but is slightly less powerful without looking through lots of use chains.
- // It does handle conditions far from the context instruction (e.g. early
- // function exits on entry) really well though.
-
- // Option 1 - Search the dom tree
- unsigned NumBlocksExplored = 0;
- BasicBlock *Current = Cxt->getParent();
- while (true) {
- // Stop searching if we've gone too far up the chain
- if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
- break;
- NumBlocksExplored++;
-
- if (!Q.DT->getNode(Current)->getIDom())
- break;
- Current = Q.DT->getNode(Current)->getIDom()->getBlock();
- if (!Current)
- // found function entry
- break;
-
- BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
- if (!BI || BI->isUnconditional())
- continue;
- ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
- if (!Cmp)
- continue;
-
- // We're looking for conditions that are guaranteed to hold at the context
- // instruction. Finding a condition where one path dominates the context
- // isn't enough because both the true and false cases could merge before
- // the context instruction we're actually interested in. Instead, we need
- // to ensure that the taken *edge* dominates the context instruction. We
- // know that the edge must be reachable since we started from a reachable
- // block.
- BasicBlock *BB0 = BI->getSuccessor(0);
- BasicBlockEdge Edge(BI->getParent(), BB0);
- if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
- continue;
-
- computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, Depth, Q);
- }
-
- // Option 2 - Search the other uses of V
- unsigned NumUsesExplored = 0;
- for (auto U : V->users()) {
- // Avoid massive lists
- if (NumUsesExplored >= DomConditionsMaxUses)
- break;
- NumUsesExplored++;
- // Consider only compare instructions uniquely controlling a branch
- ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
- if (!Cmp)
- continue;
-
- if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
- continue;
-
- for (auto *CmpU : Cmp->users()) {
- BranchInst *BI = dyn_cast<BranchInst>(CmpU);
- if (!BI || BI->isUnconditional())
- continue;
- // We're looking for conditions that are guaranteed to hold at the
- // context instruction. Finding a condition where one path dominates
- // the context isn't enough because both the true and false cases could
- // merge before the context instruction we're actually interested in.
- // Instead, we need to ensure that the taken *edge* dominates the context
- // instruction.
- BasicBlock *BB0 = BI->getSuccessor(0);
- BasicBlockEdge Edge(BI->getParent(), BB0);
- if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
- continue;
-
- computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, Depth, Q);
- }
- }
-}
-
static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
APInt &KnownOne, unsigned Depth,
const Query &Q) {
KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
}
- // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
- // strictly refines KnownZero and KnownOne. Therefore, we run them after
- // computeKnownBitsFromOperator.
+ // computeKnownBitsFromAssume strictly refines KnownZero and
+ // KnownOne. Therefore, we run them after computeKnownBitsFromOperator.
// Check whether a nearby assume intrinsic can determine some known bits.
computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q);
- // Check whether there's a dominating condition which implies something about
- // this value at the given context.
- if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
- computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, Depth, Q);
-
assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
}
+++ /dev/null
-; RUN: opt -S %s -value-tracking-dom-conditions -licm -load-combine | FileCheck %s
-; In pr24866.ll, we saw a crash when accessing a nullptr returned when
-; asking for a dominator tree Node. This reproducer is really fragile,
-; but it's currently the best we have.
-
-%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183 = type { [256 x i32], [256 x i8] }
-
-
-; Function Attrs: nounwind uwtable
-define void @encode_one_blockX2(%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* nocapture readonly %actbl) #0 {
-; CHECK-LABEL: @encode_one_blockX2
-entry:
- br i1 false, label %L_KLOOP_01, label %L_KLOOP.preheader
-
-L_KLOOP_01: ; preds = %while.end, %entry
- br label %L_KLOOP.preheader
-
-L_KLOOP_08: ; preds = %while.end
- br label %L_KLOOP.preheader
-
-L_KLOOP.preheader: ; preds = %L_KLOOP_08, %L_KLOOP_01, %entry
- %r.2.ph = phi i32 [ undef, %L_KLOOP_08 ], [ 0, %entry ], [ undef, %L_KLOOP_01 ]
- br label %L_KLOOP
-
-L_KLOOP: ; preds = %while.end, %L_KLOOP.preheader
- %r.2 = phi i32 [ 0, %while.end ], [ %r.2.ph, %L_KLOOP.preheader ]
- br i1 true, label %while.body, label %while.end
-
-while.body: ; preds = %while.body, %L_KLOOP
- br label %while.body
-
-while.end: ; preds = %L_KLOOP
- %shl105 = shl i32 %r.2, 4
- %add106 = add nsw i32 %shl105, undef
- %idxprom107 = sext i32 %add106 to i64
- %arrayidx108 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 0, i64 %idxprom107
- %0 = load i32, i32* %arrayidx108, align 4
- %arrayidx110 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 1, i64 %idxprom107
- %1 = load i8, i8* %arrayidx110, align 1
- indirectbr i8* undef, [label %L_KLOOP_DONE, label %L_KLOOP_01, label %L_KLOOP_08, label %L_KLOOP]
-
-L_KLOOP_DONE: ; preds = %while.end
- ret void
-}
+++ /dev/null
-; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-define i1 @test_cmp_ult(i64 %A) {
-; CHECK-LABEL: @test_cmp_ult
-entry:
- %cmp = icmp ult i64 %A, 64
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 false
- %cmp2 = icmp ugt i64 %A, 64
- ret i1 %cmp2
-untaken:
- ret i1 true
-}
-
-define i1 @test_cmp_ule(i64 %A) {
-; CHECK-LABEL: @test_cmp_ule
-entry:
- %cmp = icmp ule i64 %A, 64
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 false
- %cmp2 = icmp ugt i64 %A, 128
- ret i1 %cmp2
-untaken:
- ret i1 true
-}
-
-define i1 @test_cmp_sgt(i32 %A) {
-; CHECK-LABEL: @test_cmp_sgt
-entry:
- %cmp = icmp sgt i32 %A, 10
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 true
- %cmp2 = icmp sgt i32 %A, -1
- ret i1 %cmp2
-untaken:
- ret i1 true
-}
-
-define i64 @test_add_zero_bits(i64 %A) {
-; CHECK-LABEL: @test_add_zero_bits
-entry:
- %cmp = icmp eq i64 %A, 2
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i64 3
- %add = add i64 %A, 1
- ret i64 %add
-untaken:
- ret i64 %A
-}
-
-define i64 @test_add_nsw(i64 %A) {
-; CHECK-LABEL: @test_add_nsw
-entry:
- %cmp = icmp ult i64 %A, 20
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
-; CHECK-NEXT: ret i64 %add
- %add = add i64 %A, 1
- ret i64 %add
-untaken:
- ret i64 %A
-}
-
-; After sinking the instructions into the if block, check that we
-; can simplify some of them using dominating conditions.
-define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
-; CHECK-LABEL: @test_add_zero_bits_sink(
-; CHECK-NOT: sdiv i32
-entry:
- %a = add nsw i32 %x, 16
- %b = sdiv i32 %a, %x
- %cmp = icmp ult i32 %x, 7
- br i1 %cmp, label %bb1, label %bb2
-
-bb1:
-; CHECK-LABEL: bb1:
-; CHECK-NEXT: or i32 %x, 16
-; CHECK-NEXT: udiv i32
- ret i32 %b
-
-bb2:
- ret i32 %x
-}
-
-; A condition in the same block gives no information
-define i32 @test_neg1(i32 %x) nounwind ssp {
-; CHECK-LABEL: @test_neg1
-; CHECK: add
-; CHECK: sdiv
-; CHECK: icmp
-; CHECK: select
-entry:
- %a = add nsw i32 %x, 16
- %b = sdiv i32 %a, %x
- %cmp = icmp ult i32 %x, 7
- %ret = select i1 %cmp, i32 %a, i32 %b
- ret i32 %ret
-}
-
-; A non-dominating edge gives no information
-define i32 @test_neg2(i32 %x) {
-; CHECK-LABEL: @test_neg2
-entry:
- %cmp = icmp ult i32 %x, 7
- br i1 %cmp, label %bb1, label %merge
-
-bb1:
- br label %merge
-
-merge:
-; CHECK-LABEL: merge:
-; CHECK: icmp
-; CHECK: select
- %cmp2 = icmp ult i32 %x, 7
- %ret = select i1 %cmp2, i32 %x, i32 0
- ret i32 %ret
-}
-
-; A unconditional branch expressed as a condition one gives no
-; information (and shouldn't trip any asserts.)
-define i32 @test_neg3(i32 %x) {
-; CHECK-LABEL: @test_neg3
-entry:
- %cmp = icmp ult i32 %x, 7
- br i1 %cmp, label %merge, label %merge
-merge:
-; CHECK-LABEL: merge:
-; CHECK: icmp
-; CHECK: select
- %cmp2 = icmp ult i32 %x, 7
- %ret = select i1 %cmp2, i32 %x, i32 0
- ret i32 %ret
-}
-
-declare i32 @bar()