From 77efb73c672762ce28cfff793db414ce05e3c46a Mon Sep 17 00:00:00 2001
From: Max Kazantsev <mkazantsev@azul.com>
Date: Fri, 13 Nov 2020 12:05:14 +0700
Subject: [PATCH] [IndVars] Replace checks with invariants if we cannot remove
 them

If we cannot prove that the check is trivially true, but can prove that it either
fails on the 1st iteration or never fails, we can replace it with first iteration check.

Differential Revision: https://reviews.llvm.org/D88527
Reviewed By: skatkov
---
 llvm/lib/Transforms/Scalar/IndVarSimplify.cpp      | 46 ++++++++++++++++------
 .../Transforms/IndVarSimplify/predicated_ranges.ll |  5 ++-
 2 files changed, 38 insertions(+), 13 deletions(-)
diff --git a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
index f0b5d79..2965349 100644
--- a/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/llvm/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -1292,16 +1292,21 @@ bool IndVarSimplify::sinkUnusedInvariants(Loop *L) {
 
 enum ExitCondAnalysisResult {
   CanBeRemoved,
+  CanBeReplacedWithInvariant,
   CannotOptimize
 };
 
 /// If the condition of BI is trivially true during at least first MaxIter
 /// iterations, return CanBeRemoved.
+/// If the condition is equivalent to loop-invariant condition expressed as
+/// 'InvariantLHS `InvariantPred` InvariantRHS', fill them into respective
+/// output parameters and return CanBeReplacedWithInvariant.
 /// Otherwise, return CannotOptimize.
-static ExitCondAnalysisResult analyzeCond(const Loop *L, BranchInst *BI,
-                                          ScalarEvolution *SE,
-                                          bool ProvingLoopExit,
-                                          const SCEV *MaxIter) {
+static ExitCondAnalysisResult
+analyzeCond(const Loop *L, BranchInst *BI, ScalarEvolution *SE,
+            bool ProvingLoopExit, const SCEV *MaxIter,
+            ICmpInst::Predicate &InvariantPred, const SCEV *&InvariantLHS,
+            const SCEV *&InvariantRHS) {
   ICmpInst::Predicate Pred;
   Value *LHS, *RHS;
   using namespace PatternMatch;
@@ -1330,9 +1335,6 @@ static ExitCondAnalysisResult analyzeCond(const Loop *L, BranchInst *BI,
   if (ProvingLoopExit)
     return CannotOptimize;
 
-  ICmpInst::Predicate InvariantPred;
-  const SCEV *InvariantLHS, *InvariantRHS;
-
   // Check if there is a loop-invariant predicate equivalent to our check.
   if (!SE->isLoopInvariantExitCondDuringFirstIterations(
            Pred, LHSS, RHSS, L, BI, MaxIter, InvariantPred, InvariantLHS,
@@ -1342,7 +1344,7 @@ static ExitCondAnalysisResult analyzeCond(const Loop *L, BranchInst *BI,
   // Can we prove it to be trivially true?
   if (SE->isKnownPredicateAt(InvariantPred, InvariantLHS, InvariantRHS, BI))
     return CanBeRemoved;
-  return CannotOptimize;
+  return CanBeReplacedWithInvariant;
 }
 
 bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
@@ -1420,6 +1422,19 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
     ReplaceExitCond(BI, NewCond);
   };
 
+  auto ReplaceWithInvariantCond = [&](
+      BasicBlock *ExitingBB, ICmpInst::Predicate InvariantPred,
+      const SCEV *InvariantLHS, const SCEV *InvariantRHS) {
+    BranchInst *BI = cast<BranchInst>(ExitingBB->getTerminator());
+    Rewriter.setInsertPoint(BI);
+    auto *LHSV = Rewriter.expandCodeFor(InvariantLHS);
+    auto *RHSV = Rewriter.expandCodeFor(InvariantRHS);
+    IRBuilder<> Builder(BI);
+    auto *NewCond = Builder.CreateICmp(InvariantPred, LHSV, RHSV,
+                                       BI->getCondition()->getName());
+    ReplaceExitCond(BI, NewCond);
+  };
+
   bool Changed = false;
   bool SkipLastIter = false;
   SmallSet<const SCEV*, 8> DominatingExitCounts;
@@ -1429,17 +1444,26 @@ bool IndVarSimplify::optimizeLoopExits(Loop *L, SCEVExpander &Rewriter) {
       // Okay, we do not know the exit count here. Can we at least prove that it
       // will remain the same within iteration space?
       auto *BI = cast<BranchInst>(ExitingBB->getTerminator());
-      auto OptimizeCond = [this, L, BI, ExitingBB, MaxExitCount, &FoldExit](
-          bool Inverted, bool SkipLastIter) {
+      auto OptimizeCond = [this, L, BI, ExitingBB, MaxExitCount, &FoldExit,
+                           &ReplaceWithInvariantCond](bool Inverted,
+                                                      bool SkipLastIter) {
         const SCEV *MaxIter = MaxExitCount;
         if (SkipLastIter) {
           const SCEV *One = SE->getOne(MaxIter->getType());
           MaxIter = SE->getMinusSCEV(MaxIter, One);
         }
-        switch (analyzeCond(L, BI, SE, Inverted, MaxIter)) {
+        ICmpInst::Predicate InvariantPred;
+        const SCEV *InvariantLHS, *InvariantRHS;
+        switch (analyzeCond(L, BI, SE, Inverted, MaxIter, InvariantPred,
+                            InvariantLHS, InvariantRHS)) {
         case CanBeRemoved:
           FoldExit(ExitingBB, Inverted);
           return true;
+        case CanBeReplacedWithInvariant: {
+          ReplaceWithInvariantCond(ExitingBB, InvariantPred, InvariantLHS,
+                                   InvariantRHS);
+          return true;
+        }
         case CannotOptimize:
           return false;
         }
diff --git a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
index a01f12d..a730f74 100644
--- a/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
+++ b/llvm/test/Transforms/IndVarSimplify/predicated_ranges.ll
@@ -470,6 +470,7 @@ define void @test_can_predicate_simple_unsigned(i32* %p, i32* %arr) {
 ; CHECK-LABEL: @test_can_predicate_simple_unsigned(
 ; CHECK-NEXT:  preheader:
 ; CHECK-NEXT:    [[LEN:%.*]] = load i32, i32* [[P:%.*]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN]], -1
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ [[LEN]], [[PREHEADER:%.*]] ], [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ]
@@ -477,8 +478,8 @@ define void @test_can_predicate_simple_unsigned(i32* %p, i32* %arr) {
 ; CHECK-NEXT:    br i1 [[ZERO_COND]], label [[EXIT:%.*]], label [[RANGE_CHECK_BLOCK:%.*]]
 ; CHECK:       range_check_block:
 ; CHECK-NEXT:    [[IV_NEXT]] = sub i32 [[IV]], 1
-; CHECK-NEXT:    [[RANGE_CHECK:%.*]] = icmp ult i32 [[IV_NEXT]], [[LEN]]
-; CHECK-NEXT:    br i1 [[RANGE_CHECK]], label [[BACKEDGE]], label [[FAIL:%.*]]
+; CHECK-NEXT:    [[RANGE_CHECK1:%.*]] = icmp ult i32 [[TMP0]], [[LEN]]
+; CHECK-NEXT:    br i1 [[RANGE_CHECK1]], label [[BACKEDGE]], label [[FAIL:%.*]]
 ; CHECK:       backedge:
 ; CHECK-NEXT:    [[EL_PTR:%.*]] = getelementptr i32, i32* [[P]], i32 [[IV]]
 ; CHECK-NEXT:    [[EL:%.*]] = load i32, i32* [[EL_PTR]], align 4
-- 
2.7.4