From 4f772b095525059521f2f88112d29dcfaa178101 Mon Sep 17 00:00:00 2001
From: Nikita Popov <npopov@redhat.com>
Date: Wed, 11 Jan 2023 11:58:00 +0100
Subject: [PATCH] [LVI][CVP] Make use of condition known at use

When an instruction is only used in a select or phi operand, we might
be able to make use of additional information from the select/branch
condition. For example in

  %sub = call i16 @llvm.usub.sat.i16(i16 %x, i16 10)
  %cmp = icmp uge i16 %x, 10
  %sel = select i1 %cmp, i16 %sub, i16 42

the usub.sat is only used in a select where %x uge 10 is known to
hold, so we can fold it based on that knowledge.

This addresses the regression reported at
https://reviews.llvm.org/D140798#4039748, but also provides a
solution to a recurring problem we've had, where we fail to make
use of range information after a branch+phi has been converted
into a select. Our current solution to this is to hope that IPSCCP
can perform the fold before that happens, but handling this in LVI
is a somewhat more general solution.

Currently we only make use of this for the willNotOverflow() fold,
but I plan to adjust other folds to use the new API as well.

Differential Revision: https://reviews.llvm.org/D141482
---
 llvm/include/llvm/Analysis/LazyValueInfo.h         |  4 +++
 llvm/lib/Analysis/LazyValueInfo.cpp                | 37 ++++++++++++++++++++++
 .../Scalar/CorrelatedValuePropagation.cpp          |  4 +--
 .../CorrelatedValuePropagation/cond-at-use.ll      | 26 +++++++++------
 4 files changed, 59 insertions(+), 12 deletions(-)
diff --git a/llvm/include/llvm/Analysis/LazyValueInfo.h b/llvm/include/llvm/Analysis/LazyValueInfo.h
index 24c2bfc..b109b7f 100644
--- a/llvm/include/llvm/Analysis/LazyValueInfo.h
+++ b/llvm/include/llvm/Analysis/LazyValueInfo.h
@@ -95,6 +95,10 @@ public:
   ConstantRange getConstantRange(Value *V, Instruction *CxtI,
                                  bool UndefAllowed = true);
 
+  /// Return the ConstantRange constraint that is known to hold for the value
+  /// at a specific use-site.
+  ConstantRange getConstantRangeAtUse(const Use &U, bool UndefAllowed = true);
+
   /// Determine whether the specified value is known to be a
   /// constant on the specified edge.  Return null if not.
   Constant *getConstantOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB,
diff --git a/llvm/lib/Analysis/LazyValueInfo.cpp b/llvm/lib/Analysis/LazyValueInfo.cpp
index 1832c84..d49e18a 100644
--- a/llvm/lib/Analysis/LazyValueInfo.cpp
+++ b/llvm/lib/Analysis/LazyValueInfo.cpp
@@ -1650,6 +1650,43 @@ ConstantRange LazyValueInfo::getConstantRange(Value *V, Instruction *CxtI,
   return ConstantRange::getFull(Width);
 }
 
+ConstantRange LazyValueInfo::getConstantRangeAtUse(const Use &U,
+                                                   bool UndefAllowed) {
+  Value *V = U.get();
+  ConstantRange CR =
+      getConstantRange(V, cast<Instruction>(U.getUser()), UndefAllowed);
+
+  // Check whether the only (possibly transitive) use of the value is in a
+  // position where V can be constrained by a select or branch condition.
+  const Use *CurrU = &U;
+  // TODO: Increase limit?
+  const unsigned MaxUsesToInspect = 2;
+  for (unsigned I = 0; I < MaxUsesToInspect; ++I) {
+    std::optional<ValueLatticeElement> CondVal;
+    auto *CurrI = cast<Instruction>(CurrU->getUser());
+    if (auto *SI = dyn_cast<SelectInst>(CurrI)) {
+      if (CurrU->getOperandNo() == 1)
+        CondVal = getValueFromCondition(V, SI->getCondition(), true);
+      else if (CurrU->getOperandNo() == 2)
+        CondVal = getValueFromCondition(V, SI->getCondition(), false);
+    } else if (auto *PHI = dyn_cast<PHINode>(CurrI)) {
+      // TODO: Use non-local query?
+      CondVal =
+          getEdgeValueLocal(V, PHI->getIncomingBlock(*CurrU), PHI->getParent());
+    }
+    if (CondVal && CondVal->isConstantRange())
+      CR = CR.intersectWith(CondVal->getConstantRange());
+
+    // Only follow one-use chain, to allow direct intersection of conditions.
+    // If there are multiple uses, we would have to intersect with the union of
+    // all conditions at different uses.
+    if (!CurrI->hasOneUse())
+      break;
+    CurrU = &*CurrI->use_begin();
+  }
+  return CR;
+}
+
 /// Determine whether the specified value is known to be a
 /// constant on the specified edge. Return null if not.
 Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
diff --git a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index c50ba30..258db62 100644
--- a/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/llvm/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -438,8 +438,8 @@ static bool processSwitch(SwitchInst *I, LazyValueInfo *LVI,
 
 // See if we can prove that the given binary op intrinsic will not overflow.
 static bool willNotOverflow(BinaryOpIntrinsic *BO, LazyValueInfo *LVI) {
-  ConstantRange LRange = LVI->getConstantRange(BO->getLHS(), BO);
-  ConstantRange RRange = LVI->getConstantRange(BO->getRHS(), BO);
+  ConstantRange LRange = LVI->getConstantRangeAtUse(BO->getOperandUse(0));
+  ConstantRange RRange = LVI->getConstantRangeAtUse(BO->getOperandUse(1));
   ConstantRange NWRegion = ConstantRange::makeGuaranteedNoWrapRegion(
       BO->getBinaryOp(), RRange, BO->getNoWrapKind());
   return NWRegion.contains(LRange);
diff --git a/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll b/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll
index 370f41f..2dc736e 100644
--- a/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll
+++ b/llvm/test/Transforms/CorrelatedValuePropagation/cond-at-use.ll
@@ -7,9 +7,9 @@ declare {i16, i1} @llvm.usub.with.overflow.i16(i16, i16)
 
 define i16 @sel_true_cond(i16 %x) {
 ; CHECK-LABEL: @sel_true_cond(
-; CHECK-NEXT:    [[SUB:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[X:%.*]], i16 10)
+; CHECK-NEXT:    [[SUB1:%.*]] = sub nuw i16 [[X:%.*]], 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i16 [[X]], 10
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[SUB]], i16 42
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 [[SUB1]], i16 42
 ; CHECK-NEXT:    ret i16 [[SEL]]
 ;
   %sub = call i16 @llvm.usub.sat.i16(i16 %x, i16 10)
@@ -72,6 +72,8 @@ define i16 @sel_true_cond_extra_use(i16 %x) {
   ret i16 %sel
 }
 
+; TODO: We could handle this case by raising the limit on the number of
+; instructions we look through.
 define i16 @sel_true_cond_longer_chain(i16 %x) {
 ; CHECK-LABEL: @sel_true_cond_longer_chain(
 ; CHECK-NEXT:    [[SUB:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[X:%.*]], i16 10)
@@ -89,9 +91,9 @@ define i16 @sel_true_cond_longer_chain(i16 %x) {
 
 define i16 @sel_false_cond(i16 %x) {
 ; CHECK-LABEL: @sel_false_cond(
-; CHECK-NEXT:    [[SUB:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[X:%.*]], i16 10)
+; CHECK-NEXT:    [[SUB1:%.*]] = sub nuw i16 [[X:%.*]], 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[X]], 10
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 42, i16 [[SUB]]
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[CMP]], i16 42, i16 [[SUB1]]
 ; CHECK-NEXT:    ret i16 [[SEL]]
 ;
   %sub = call i16 @llvm.usub.sat.i16(i16 %x, i16 10)
@@ -116,13 +118,13 @@ define i16 @sel_false_cond_insufficient(i16 %x) {
 define i16 @phi_true_cond(i16 %x) {
 ; CHECK-LABEL: @phi_true_cond(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SUB:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[X:%.*]], i16 10)
+; CHECK-NEXT:    [[SUB1:%.*]] = sub nuw i16 [[X:%.*]], 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp uge i16 [[X]], 10
 ; CHECK-NEXT:    br i1 [[CMP]], label [[JOIN:%.*]], label [[SPLIT:%.*]]
 ; CHECK:       split:
 ; CHECK-NEXT:    br label [[JOIN]]
 ; CHECK:       join:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i16 [ [[SUB]], [[ENTRY:%.*]] ], [ 42, [[SPLIT]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i16 [ [[SUB1]], [[ENTRY:%.*]] ], [ 42, [[SPLIT]] ]
 ; CHECK-NEXT:    ret i16 [[PHI]]
 ;
 entry:
@@ -163,6 +165,8 @@ join:
   ret i16 %phi
 }
 
+; TODO: We could handle this by using conditions that are not directly on the
+; phi edge.
 define i16 @phi_true_cond_non_local(i16 %x) {
 ; CHECK-LABEL: @phi_true_cond_non_local(
 ; CHECK-NEXT:  entry:
@@ -191,13 +195,13 @@ join:
 define i16 @phi_false_cond(i16 %x) {
 ; CHECK-LABEL: @phi_false_cond(
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SUB:%.*]] = call i16 @llvm.usub.sat.i16(i16 [[X:%.*]], i16 10)
+; CHECK-NEXT:    [[SUB1:%.*]] = sub nuw i16 [[X:%.*]], 10
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp ult i16 [[X]], 10
 ; CHECK-NEXT:    br i1 [[CMP]], label [[SPLIT:%.*]], label [[JOIN:%.*]]
 ; CHECK:       split:
 ; CHECK-NEXT:    br label [[JOIN]]
 ; CHECK:       join:
-; CHECK-NEXT:    [[PHI:%.*]] = phi i16 [ [[SUB]], [[ENTRY:%.*]] ], [ 42, [[SPLIT]] ]
+; CHECK-NEXT:    [[PHI:%.*]] = phi i16 [ [[SUB1]], [[ENTRY:%.*]] ], [ 42, [[SPLIT]] ]
 ; CHECK-NEXT:    ret i16 [[PHI]]
 ;
 entry:
@@ -238,6 +242,8 @@ join:
   ret i16 %phi
 }
 
+; TODO: We could handle this by using conditions that are not directly on the
+; phi edge.
 define i16 @phi_false_cond_non_local(i16 %x) {
 ; CHECK-LABEL: @phi_false_cond_non_local(
 ; CHECK-NEXT:  entry:
@@ -268,10 +274,10 @@ define i16 @loop_cond() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
-; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ 1000, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ 1000, [[ENTRY:%.*]] ], [ [[IV_NEXT1:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[COUNT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[COUNT_NEXT:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i16 [[IV]], 0
-; CHECK-NEXT:    [[IV_NEXT]] = call i16 @llvm.usub.sat.i16(i16 [[IV]], i16 1)
+; CHECK-NEXT:    [[IV_NEXT1]] = sub nuw i16 [[IV]], 1
 ; CHECK-NEXT:    [[COUNT_NEXT]] = add i16 [[COUNT]], 1
 ; CHECK-NEXT:    br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
 ; CHECK:       exit:
-- 
2.7.4