From 3bc09c7da50a7bb2df51b1829f837f017da5ee34 Mon Sep 17 00:00:00 2001
From: Philip Reames <preames@rivosinc.com>
Date: Thu, 14 Jul 2022 08:50:44 -0700
Subject: [PATCH] [SCEVExpander] Allow udiv with isKnownNonZero(RHS) + add
 vscale case

Motivation here is to unblock LSRs ability to use ICmpZero uses - the major effect of which is to enable count down IVs. The test changes reflect this goal, but the potential impact is much broader since this isn't a change in LSR at all.

SCEVExpander needs(*) to prove that expanding the expression is safe anywhere the SCEV expression is valid. In general, we can't expand any node which might fault (or exhibit UB) unless we can either a) prove it won't fault, or b) guard the faulting case. We'd been allowing non-zero constants here; this change extends it to non-zero values.

vscale is never zero. This is already implemented in ValueTracking, and this change just adds the same logic in SCEV's range computation (which in turn drives isKnownNonZero). We should common up some logic here, but let's do that in separate changes.

(*) As an aside, "needs" is such an interesting word here. First, we don't actually need to guard this at all; we could choose to emit a select for the RHS of ever udiv and remove this code entirely. Secondly, the property being checked here is way too strong. What the client actually needs is to expand the SCEV at some particular point in some particular loop. In the examples, the original urem dominates that loop and yet we completely ignore that information when analyzing legality. I don't plan to actively pursue either direction, just noting it for future reference.

Differential Revision: https://reviews.llvm.org/D129710
---
 llvm/lib/Analysis/ScalarEvolution.cpp                      |  7 +++++++
 llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp      |  7 ++-----
 llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll | 12 ++++++------
 3 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index f61806b..d04eb35 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -6651,6 +6651,13 @@ ScalarEvolution::getRangeRef(const SCEV *S,
       }
     }
 
+    // vscale can't be equal to zero
+    if (const auto *II = dyn_cast<IntrinsicInst>(U->getValue()))
+      if (II->getIntrinsicID() == Intrinsic::vscale) {
+        ConstantRange Disallowed = APInt::getZero(BitWidth);
+        ConservativeResult = ConservativeResult.difference(Disallowed);
+      }
+
     return setRange(U, SignHint, std::move(ConservativeResult));
   }
 
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index 67ce982..8f53787 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -2572,9 +2572,7 @@ namespace {
 // only needed when the expression includes some subexpression that is not IV
 // derived.
 //
-// Currently, we only allow division by a nonzero constant here. If this is
-// inadequate, we could easily allow division by SCEVUnknown by using
-// ValueTracking to check isKnownNonZero().
+// Currently, we only allow division by a value provably non-zero here.
 //
 // We cannot generally expand recurrences unless the step dominates the loop
 // header. The expander handles the special case of affine recurrences by
@@ -2592,8 +2590,7 @@ struct SCEVFindUnsafe {
 
   bool follow(const SCEV *S) {
     if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
-      const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
-      if (!SC || SC->getValue()->isZero()) {
+      if (!SE.isKnownNonZero(D->getRHS())) {
         IsUnsafe = true;
         return false;
       }
diff --git a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
index 10994ac..58cc10c 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll
@@ -129,10 +129,10 @@ define void @icmp_zero_urem_nonzero(i64 %N, i64 %M, ptr %p) {
 ; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[N:%.*]], [[NONZERO]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    store i64 0, ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 2
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
@@ -161,10 +161,10 @@ define void @icmp_zero_urem_vscale(i64 %N, ptr %p) {
 ; CHECK-NEXT:    [[UREM:%.*]] = urem i64 [[N:%.*]], [[VSCALE]]
 ; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
 ; CHECK:       vector.body:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[VECTOR_BODY]] ], [ [[UREM]], [[ENTRY:%.*]] ]
 ; CHECK-NEXT:    store i64 0, ptr [[P:%.*]], align 8
-; CHECK-NEXT:    [[IV_NEXT]] = add i64 [[IV]], 2
-; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[IV_NEXT]], [[UREM]]
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -2
+; CHECK-NEXT:    [[DONE:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
 ; CHECK-NEXT:    br i1 [[DONE]], label [[EXIT:%.*]], label [[VECTOR_BODY]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void
-- 
2.7.4