From 146307eb52f1fa03cf04a81e218807173d6e08f9 Mon Sep 17 00:00:00 2001
From: Philip Reames <listmail@philipreames.com>
Date: Thu, 3 Mar 2016 19:44:06 +0000
Subject: [PATCH] [ValueTracking] Remove dead code from an old experiment

This experiment was originally about trying to use facts implied dominating conditions to infer more precise known bits.  While the compile time was found to be acceptable on several large code bases, we never found sufficiently profitable examples to justify turning on the code by default.  Given this, it's time to abandon the experiment.

Several folks have commented that they've found this useful for experimentation, but nothing has come of those experiments.  Given how easy the patch is to apply, there's no reason to leave the code in tree.

For anyone interested in further investigation in this area, I recommend finding the summary email I sent on one of the original review threads.  In particular, I now believe the use-list based approach is strictly worse than the dom-tree-walking approach.

llvm-svn: 262646
---
 llvm/lib/Analysis/ValueTracking.cpp                | 210 +--------------------
 llvm/test/Analysis/ValueTracking/dom-cond.ll       |  18 --
 llvm/test/Analysis/ValueTracking/pr24866.ll        |  44 -----
 llvm/test/Transforms/InstCombine/dom-conditions.ll | 152 ---------------
 .../NVPTX/value-tracking-domtree.ll                |  33 ----
 5 files changed, 2 insertions(+), 455 deletions(-)
 delete mode 100644 llvm/test/Analysis/ValueTracking/dom-cond.ll
 delete mode 100644 llvm/test/Analysis/ValueTracking/pr24866.ll
 delete mode 100644 llvm/test/Transforms/InstCombine/dom-conditions.ll
 delete mode 100644 llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index a6211ec..5c26bc8 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -45,25 +45,6 @@ using namespace llvm::PatternMatch;
 
 const unsigned MaxDepth = 6;
 
-/// Enable an experimental feature to leverage information about dominating
-/// conditions to compute known bits.  The individual options below control how
-/// hard we search.  The defaults are chosen to be fairly aggressive.  If you
-/// run into compile time problems when testing, scale them back and report
-/// your findings.
-static cl::opt<bool> EnableDomConditions("value-tracking-dom-conditions",
-                                         cl::Hidden, cl::init(false));
-
-// This is expensive, so we only do it for the top level query value.
-// (TODO: evaluate cost vs profit, consider higher thresholds)
-static cl::opt<unsigned> DomConditionsMaxDepth("dom-conditions-max-depth",
-                                               cl::Hidden, cl::init(1));
-
-/// How many dominating blocks should be scanned looking for dominating
-/// conditions?
-static cl::opt<unsigned> DomConditionsMaxDomBlocks("dom-conditions-dom-blocks",
-                                                   cl::Hidden,
-                                                   cl::init(20));
-
 // Controls the number of uses of the value searched for possible
 // dominating comparisons.
 static cl::opt<unsigned> DomConditionsMaxUses("dom-conditions-max-uses",
@@ -546,187 +527,6 @@ m_c_Xor(const LHS &L, const RHS &R) {
   return m_CombineOr(m_Xor(L, R), m_Xor(R, L));
 }
 
-/// Compute known bits in 'V' under the assumption that the condition 'Cmp' is
-/// true (at the context instruction.)  This is mostly a utility function for
-/// the prototype dominating conditions reasoning below.
-static void computeKnownBitsFromTrueCondition(Value *V, ICmpInst *Cmp,
-                                              APInt &KnownZero,
-                                              APInt &KnownOne,
-                                              unsigned Depth, const Query &Q) {
-  Value *LHS = Cmp->getOperand(0);
-  Value *RHS = Cmp->getOperand(1);
-  // TODO: We could potentially be more aggressive here.  This would be worth
-  // evaluating.  If we can, explore commoning this code with the assume
-  // handling logic.
-  if (LHS != V && RHS != V)
-    return;
-
-  const unsigned BitWidth = KnownZero.getBitWidth();
-
-  switch (Cmp->getPredicate()) {
-  default:
-    // We know nothing from this condition
-    break;
-  // TODO: implement unsigned bound from below (known one bits)
-  // TODO: common condition check implementations with assumes
-  // TODO: implement other patterns from assume (e.g. V & B == A)
-  case ICmpInst::ICMP_SGT:
-    if (LHS == V) {
-      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
-      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
-      if (KnownOneTemp.isAllOnesValue() || KnownZeroTemp.isNegative()) {
-        // We know that the sign bit is zero.
-        KnownZero |= APInt::getSignBit(BitWidth);
-      }
-    }
-    break;
-  case ICmpInst::ICMP_EQ:
-    {
-      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
-      if (LHS == V)
-        computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
-      else if (RHS == V)
-        computeKnownBits(LHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
-      else
-        llvm_unreachable("missing use?");
-      KnownZero |= KnownZeroTemp;
-      KnownOne |= KnownOneTemp;
-    }
-    break;
-  case ICmpInst::ICMP_ULE:
-    if (LHS == V) {
-      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
-      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
-      // The known zero bits carry over
-      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
-      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
-    }
-    break;
-  case ICmpInst::ICMP_ULT:
-    if (LHS == V) {
-      APInt KnownZeroTemp(BitWidth, 0), KnownOneTemp(BitWidth, 0);
-      computeKnownBits(RHS, KnownZeroTemp, KnownOneTemp, Depth + 1, Q);
-      // Whatever high bits in rhs are zero are known to be zero (if rhs is a
-      // power of 2, then one more).
-      unsigned SignBits = KnownZeroTemp.countLeadingOnes();
-      if (isKnownToBeAPowerOfTwo(RHS, false, Depth + 1, Query(Q, Cmp)))
-        SignBits++;
-      KnownZero |= APInt::getHighBitsSet(BitWidth, SignBits);
-    }
-    break;
-  };
-}
-
-/// Compute known bits in 'V' from conditions which are known to be true along
-/// all paths leading to the context instruction.  In particular, look for
-/// cases where one branch of an interesting condition dominates the context
-/// instruction.  This does not do general dataflow.
-/// NOTE: This code is EXPERIMENTAL and currently off by default.
-static void computeKnownBitsFromDominatingCondition(Value *V, APInt &KnownZero,
-                                                    APInt &KnownOne,
-                                                    unsigned Depth,
-                                                    const Query &Q) {
-  // Need both the dominator tree and the query location to do anything useful
-  if (!Q.DT || !Q.CxtI)
-    return;
-  Instruction *Cxt = const_cast<Instruction *>(Q.CxtI);
-  // The context instruction might be in a statically unreachable block.  If
-  // so, asking dominator queries may yield suprising results.  (e.g. the block
-  // may not have a dom tree node)
-  if (!Q.DT->isReachableFromEntry(Cxt->getParent()))
-    return;
-
-  // Avoid useless work
-  if (auto VI = dyn_cast<Instruction>(V))
-    if (VI->getParent() == Cxt->getParent())
-      return;
-
-  // Note: We currently implement two options.  It's not clear which of these
-  // will survive long term, we need data for that.
-  // Option 1 - Try walking the dominator tree looking for conditions which
-  // might apply.  This works well for local conditions (loop guards, etc..),
-  // but not as well for things far from the context instruction (presuming a
-  // low max blocks explored).  If we can set an high enough limit, this would
-  // be all we need.
-  // Option 2 - We restrict out search to those conditions which are uses of
-  // the value we're interested in.  This is independent of dom structure,
-  // but is slightly less powerful without looking through lots of use chains.
-  // It does handle conditions far from the context instruction (e.g. early
-  // function exits on entry) really well though.
-
-  // Option 1 - Search the dom tree
-  unsigned NumBlocksExplored = 0;
-  BasicBlock *Current = Cxt->getParent();
-  while (true) {
-    // Stop searching if we've gone too far up the chain
-    if (NumBlocksExplored >= DomConditionsMaxDomBlocks)
-      break;
-    NumBlocksExplored++;
-
-    if (!Q.DT->getNode(Current)->getIDom())
-      break;
-    Current = Q.DT->getNode(Current)->getIDom()->getBlock();
-    if (!Current)
-      // found function entry
-      break;
-
-    BranchInst *BI = dyn_cast<BranchInst>(Current->getTerminator());
-    if (!BI || BI->isUnconditional())
-      continue;
-    ICmpInst *Cmp = dyn_cast<ICmpInst>(BI->getCondition());
-    if (!Cmp)
-      continue;
-
-    // We're looking for conditions that are guaranteed to hold at the context
-    // instruction.  Finding a condition where one path dominates the context
-    // isn't enough because both the true and false cases could merge before
-    // the context instruction we're actually interested in.  Instead, we need
-    // to ensure that the taken *edge* dominates the context instruction.  We
-    // know that the edge must be reachable since we started from a reachable
-    // block.
-    BasicBlock *BB0 = BI->getSuccessor(0);
-    BasicBlockEdge Edge(BI->getParent(), BB0);
-    if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
-      continue;
-
-    computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, Depth, Q);
-  }
-
-  // Option 2 - Search the other uses of V
-  unsigned NumUsesExplored = 0;
-  for (auto U : V->users()) {
-    // Avoid massive lists
-    if (NumUsesExplored >= DomConditionsMaxUses)
-      break;
-    NumUsesExplored++;
-    // Consider only compare instructions uniquely controlling a branch
-    ICmpInst *Cmp = dyn_cast<ICmpInst>(U);
-    if (!Cmp)
-      continue;
-
-    if (DomConditionsSingleCmpUse && !Cmp->hasOneUse())
-      continue;
-
-    for (auto *CmpU : Cmp->users()) {
-      BranchInst *BI = dyn_cast<BranchInst>(CmpU);
-      if (!BI || BI->isUnconditional())
-        continue;
-      // We're looking for conditions that are guaranteed to hold at the
-      // context instruction.  Finding a condition where one path dominates
-      // the context isn't enough because both the true and false cases could
-      // merge before the context instruction we're actually interested in.
-      // Instead, we need to ensure that the taken *edge* dominates the context
-      // instruction. 
-      BasicBlock *BB0 = BI->getSuccessor(0);
-      BasicBlockEdge Edge(BI->getParent(), BB0);
-      if (!Edge.isSingleEdge() || !Q.DT->dominates(Edge, Q.CxtI->getParent()))
-        continue;
-
-      computeKnownBitsFromTrueCondition(V, Cmp, KnownZero, KnownOne, Depth, Q);
-    }
-  }
-}
-
 static void computeKnownBitsFromAssume(Value *V, APInt &KnownZero,
                                        APInt &KnownOne, unsigned Depth,
                                        const Query &Q) {
@@ -1657,18 +1457,12 @@ void computeKnownBits(Value *V, APInt &KnownZero, APInt &KnownOne,
       KnownZero |= APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align));
   }
 
-  // computeKnownBitsFromAssume and computeKnownBitsFromDominatingCondition
-  // strictly refines KnownZero and KnownOne. Therefore, we run them after
-  // computeKnownBitsFromOperator.
+  // computeKnownBitsFromAssume strictly refines KnownZero and
+  // KnownOne. Therefore, we run them after computeKnownBitsFromOperator.
 
   // Check whether a nearby assume intrinsic can determine some known bits.
   computeKnownBitsFromAssume(V, KnownZero, KnownOne, Depth, Q);
 
-  // Check whether there's a dominating condition which implies something about
-  // this value at the given context.
-  if (EnableDomConditions && Depth <= DomConditionsMaxDepth)
-    computeKnownBitsFromDominatingCondition(V, KnownZero, KnownOne, Depth, Q);
-
   assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?");
 }
 
diff --git a/llvm/test/Analysis/ValueTracking/dom-cond.ll b/llvm/test/Analysis/ValueTracking/dom-cond.ll
deleted file mode 100644
index c0cafdd..0000000
--- a/llvm/test/Analysis/ValueTracking/dom-cond.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -instcombine -value-tracking-dom-conditions -S | FileCheck %s
-
-define i32 @dom_cond(i32 %a, i32 %b) {
-; CHECK-LABEL: @dom_cond(
-entry:
-  %v = add i32 %a, %b
-  %cond = icmp ule i32 %v, 7
-  br i1 %cond, label %then, label %exit
-
-then:
-  %v2 = add i32 %v, 8
-; CHECK: or i32 %v, 8
-  br label %exit
-
-exit:
-  %v3 = phi i32 [ %v, %entry ], [ %v2, %then ]
-  ret i32 %v3
-}
diff --git a/llvm/test/Analysis/ValueTracking/pr24866.ll b/llvm/test/Analysis/ValueTracking/pr24866.ll
deleted file mode 100644
index b146b4a..0000000
--- a/llvm/test/Analysis/ValueTracking/pr24866.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt -S %s -value-tracking-dom-conditions -licm -load-combine | FileCheck %s
-; In pr24866.ll, we saw a crash when accessing a nullptr returned when
-; asking for a dominator tree Node.  This reproducer is really fragile,
-; but it's currently the best we have.
-
-%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183 = type { [256 x i32], [256 x i8] }
-
-
-; Function Attrs: nounwind uwtable
-define void @encode_one_blockX2(%struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* nocapture readonly %actbl) #0 {
-; CHECK-LABEL: @encode_one_blockX2
-entry:
-  br i1 false, label %L_KLOOP_01, label %L_KLOOP.preheader
-
-L_KLOOP_01:                                       ; preds = %while.end, %entry
-  br label %L_KLOOP.preheader
-
-L_KLOOP_08:                                       ; preds = %while.end
-  br label %L_KLOOP.preheader
-
-L_KLOOP.preheader:                                ; preds = %L_KLOOP_08, %L_KLOOP_01, %entry
-  %r.2.ph = phi i32 [ undef, %L_KLOOP_08 ], [ 0, %entry ], [ undef, %L_KLOOP_01 ]
-  br label %L_KLOOP
-
-L_KLOOP:                                          ; preds = %while.end, %L_KLOOP.preheader
-  %r.2 = phi i32 [ 0, %while.end ], [ %r.2.ph, %L_KLOOP.preheader ]
-  br i1 true, label %while.body, label %while.end
-
-while.body:                                       ; preds = %while.body, %L_KLOOP
-  br label %while.body
-
-while.end:                                        ; preds = %L_KLOOP
-  %shl105 = shl i32 %r.2, 4
-  %add106 = add nsw i32 %shl105, undef
-  %idxprom107 = sext i32 %add106 to i64
-  %arrayidx108 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 0, i64 %idxprom107
-  %0 = load i32, i32* %arrayidx108, align 4
-  %arrayidx110 = getelementptr inbounds %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183, %struct.c_derived_tbl.2.5.8.11.14.17.23.38.59.80.92.98.104.107.155.183* %actbl, i64 0, i32 1, i64 %idxprom107
-  %1 = load i8, i8* %arrayidx110, align 1
-  indirectbr i8* undef, [label %L_KLOOP_DONE, label %L_KLOOP_01, label %L_KLOOP_08, label %L_KLOOP]
-
-L_KLOOP_DONE:                                     ; preds = %while.end
-  ret void
-}
diff --git a/llvm/test/Transforms/InstCombine/dom-conditions.ll b/llvm/test/Transforms/InstCombine/dom-conditions.ll
deleted file mode 100644
index 4264043..0000000
--- a/llvm/test/Transforms/InstCombine/dom-conditions.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-define i1 @test_cmp_ult(i64 %A) {
-; CHECK-LABEL: @test_cmp_ult
-entry:
-  %cmp = icmp ult i64 %A, 64
-  br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 false
-  %cmp2 = icmp ugt i64 %A, 64
-  ret i1 %cmp2
-untaken:
-  ret i1 true
-}
-
-define i1 @test_cmp_ule(i64 %A) {
-; CHECK-LABEL: @test_cmp_ule
-entry:
-  %cmp = icmp ule i64 %A, 64
-  br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 false
-  %cmp2 = icmp ugt i64 %A, 128
-  ret i1 %cmp2
-untaken:
-  ret i1 true
-}
-
-define i1 @test_cmp_sgt(i32 %A) {
-; CHECK-LABEL: @test_cmp_sgt
-entry:
-  %cmp = icmp sgt i32 %A, 10
-  br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 true
-  %cmp2 = icmp sgt i32 %A, -1
-  ret i1 %cmp2
-untaken:
-  ret i1 true
-}
-
-define i64 @test_add_zero_bits(i64 %A) {
-; CHECK-LABEL: @test_add_zero_bits
-entry:
-  %cmp = icmp eq i64 %A, 2
-  br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i64 3
-  %add = add i64 %A, 1
-  ret i64 %add
-untaken:
-  ret i64 %A
-}
-
-define i64 @test_add_nsw(i64 %A) {
-; CHECK-LABEL: @test_add_nsw
-entry:
-  %cmp = icmp ult i64 %A, 20
-  br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
-; CHECK-NEXT: ret i64 %add
-  %add = add i64 %A, 1
-  ret i64 %add
-untaken:
-  ret i64 %A
-}
-
-; After sinking the instructions into the if block, check that we 
-; can simplify some of them using dominating conditions.
-define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
-; CHECK-LABEL: @test_add_zero_bits_sink(
-; CHECK-NOT: sdiv i32
-entry:
-  %a = add nsw i32 %x, 16
-  %b = sdiv i32 %a, %x
-  %cmp = icmp ult i32 %x, 7
-  br i1 %cmp, label %bb1, label %bb2
-
-bb1:
-; CHECK-LABEL: bb1:
-; CHECK-NEXT: or i32 %x, 16
-; CHECK-NEXT: udiv i32
-  ret i32 %b
-
-bb2:
-  ret i32 %x
-}
-
-; A condition in the same block gives no information
-define i32 @test_neg1(i32 %x) nounwind ssp {
-; CHECK-LABEL: @test_neg1
-; CHECK: add
-; CHECK: sdiv
-; CHECK: icmp
-; CHECK: select
-entry:
-  %a = add nsw i32 %x, 16
-  %b = sdiv i32 %a, %x
-  %cmp = icmp ult i32 %x, 7
-  %ret = select i1 %cmp, i32 %a, i32 %b
-  ret i32 %ret
-}
-
-; A non-dominating edge gives no information 
-define i32 @test_neg2(i32 %x) {
-; CHECK-LABEL: @test_neg2
-entry:
-  %cmp = icmp ult i32 %x, 7
-  br i1 %cmp, label %bb1, label %merge
-
-bb1:
-  br label %merge
-
-merge:
-; CHECK-LABEL: merge:
-; CHECK: icmp
-; CHECK: select
-  %cmp2 = icmp ult i32 %x, 7
-  %ret = select i1 %cmp2, i32 %x, i32 0
-  ret i32 %ret
-}
-
-; A unconditional branch expressed as a condition one gives no
-; information (and shouldn't trip any asserts.)
-define i32 @test_neg3(i32 %x) {
-; CHECK-LABEL: @test_neg3
-entry:
-  %cmp = icmp ult i32 %x, 7
-  br i1 %cmp, label %merge, label %merge
-merge:
-; CHECK-LABEL: merge:
-; CHECK: icmp
-; CHECK: select
-  %cmp2 = icmp ult i32 %x, 7
-  %ret = select i1 %cmp2, i32 %x, i32 0
-  ret i32 %ret
-}
-
-declare i32 @bar()
diff --git a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
deleted file mode 100644
index 601ca52..0000000
--- a/llvm/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -separate-const-offset-from-gep -value-tracking-dom-conditions -reassociate-geps-verify-no-dead-code -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "nvptx64-unknown-unknown"
-
-; if (i == 4)
-;   p = &input[i | 3];
-;
-; =>
-;
-; if (i == 4) {
-;   base = &input[i];
-;   p = &base[3];
-; }
-;
-; We should treat (i | 3) as (i + 3) because i is guaranteed to be 4, which
-; does not share any set bits with 3.
-define float* @guarded_or(float* %input, i64 %i) {
-; CHECK-LABEL: @guarded_or(
-entry:
-  %is4 = icmp eq i64 %i, 4
-  br i1 %is4, label %then, label %exit
-
-then:
-  %or = or i64 %i, 3
-  %p = getelementptr inbounds float, float* %input, i64 %or
-; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr inbounds float, float* [[base]], i64 3
-  ret float* %p
-
-exit:
-  ret float* null
-}
-- 
2.7.4