From d01828096f1660ae3641d2a345fe0b026acda2c9 Mon Sep 17 00:00:00 2001
From: Geoff Berry <gberry@codeaurora.org>
Date: Thu, 11 Aug 2016 21:05:17 +0000
Subject: [PATCH] [SCEV] Update interface to handle SCEVExpander insert point
 motion.

Summary:
This is an extension of the fix in r271424.  That fix dealt with builder
insert points being moved by SCEV expansion, but only for the lifetime
of the expand call.  This change modifies the interface so that LSR can
safely call expand multiple times at the same insert point and do the
right thing if one of the expansions decides to move the original insert
point.

This is a fix for PR28719.

Reviewers: sanjoy

Subscribers: llvm-commits, mcrosier, mzolotukhin

Differential Revision: https://reviews.llvm.org/D23342

llvm-svn: 278413
---
 .../llvm/Analysis/ScalarEvolutionExpander.h        | 22 +++++++---
 llvm/lib/Analysis/ScalarEvolutionExpander.cpp      |  3 +-
 llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp  | 17 ++++----
 .../Transforms/LoopStrengthReduce/X86/pr28719.ll   | 47 ++++++++++++++++++++++
 4 files changed, 73 insertions(+), 16 deletions(-)
 create mode 100644 llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll

diff --git a/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h b/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
index e2e2c02..337a11b 100644
--- a/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -197,6 +197,13 @@ namespace llvm {
     /// block.
     Value *expandCodeFor(const SCEV *SH, Type *Ty, Instruction *I);
 
+    /// \brief Insert code to directly compute the specified SCEV expression
+    /// into the program.  The inserted code is inserted into the SCEVExpander's
+    /// current insertion point. If a type is specified, the result will be
+    /// expanded to have that type, with a cast if necessary.
+    Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
+
+
     /// \brief Generates a code sequence that evaluates this predicate.
     /// The inserted instructions will be at position \p Loc.
     /// The result will be of type i1 and will have a value of 0 when the
@@ -254,6 +261,15 @@ namespace llvm {
 
     void enableLSRMode() { LSRMode = true; }
 
+    /// \brief Set the current insertion point. This is useful if multiple calls
+    /// to expandCodeFor() are going to be made with the same insert point and
+    /// the insert point may be moved during one of the expansions (e.g. if the
+    /// insert point is not a block terminator).
+    void setInsertPoint(Instruction *IP) {
+      assert(IP);
+      Builder.SetInsertPoint(IP);
+    }
+
     /// \brief Clear the current insertion point. This is useful if the
     /// instruction that had been serving as the insertion point may have been
     /// deleted.
@@ -325,12 +341,6 @@ namespace llvm {
 
     Value *expand(const SCEV *S);
 
-    /// \brief Insert code to directly compute the specified SCEV expression
-    /// into the program.  The inserted code is inserted into the SCEVExpander's
-    /// current insertion point. If a type is specified, the result will be
-    /// expanded to have that type, with a cast if necessary.
-    Value *expandCodeFor(const SCEV *SH, Type *Ty = nullptr);
-
     /// \brief Determine the most "relevant" loop for the given SCEV.
     const Loop *getRelevantLoop(const SCEV *);
 
diff --git a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
index 341a230..83c803d 100644
--- a/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -1610,8 +1610,7 @@ Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) {
 
 Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty,
                                    Instruction *IP) {
-  assert(IP);
-  Builder.SetInsertPoint(IP);
+  setInsertPoint(IP);
   return expandCodeFor(SH, Ty);
 }
 
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index c2a8f15..2542193 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4444,6 +4444,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   // Determine an input position which will be dominated by the operands and
   // which will dominate the result.
   IP = AdjustInsertPositionForExpand(IP, LF, LU, Rewriter);
+  Rewriter.setInsertPoint(&*IP);
 
   // Inform the Rewriter if we have a post-increment use, so that it can
   // perform an advantageous expansion.
@@ -4475,7 +4476,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
                                  LF.UserInst, LF.OperandValToReplace,
                                  Loops, SE, DT);
 
-    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr, &*IP)));
+    Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
   }
 
   // Expand the ScaledReg portion.
@@ -4493,14 +4494,14 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // Expand ScaleReg as if it was part of the base regs.
       if (F.Scale == 1)
         Ops.push_back(
-            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP)));
+            SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
       else {
         // An interesting way of "folding" with an icmp is to use a negated
         // scale, which we'll implement by inserting it into the other operand
         // of the icmp.
         assert(F.Scale == -1 &&
                "The only scale supported by ICmpZero uses is -1!");
-        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr, &*IP);
+        ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
       }
     } else {
       // Otherwise just expand the scaled register and an explicit scale,
@@ -4510,11 +4511,11 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
       // Unless the addressing mode will not be folded.
       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
           isAMCompletelyFolded(TTI, LU, F)) {
-        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+        Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
         Ops.clear();
         Ops.push_back(SE.getUnknown(FullV));
       }
-      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr, &*IP));
+      ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
       if (F.Scale != 1)
         ScaledS =
             SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
@@ -4526,7 +4527,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   if (F.BaseGV) {
     // Flush the operand list to suppress SCEVExpander hoisting.
     if (!Ops.empty()) {
-      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+      Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
       Ops.clear();
       Ops.push_back(SE.getUnknown(FullV));
     }
@@ -4536,7 +4537,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
   // unfolded offsets. LSR assumes they both live next to their uses.
   if (!Ops.empty()) {
-    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, &*IP);
+    Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
     Ops.clear();
     Ops.push_back(SE.getUnknown(FullV));
   }
@@ -4572,7 +4573,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
   const SCEV *FullS = Ops.empty() ?
                       SE.getConstant(IntTy, 0) :
                       SE.getAddExpr(Ops);
-  Value *FullV = Rewriter.expandCodeFor(FullS, Ty, &*IP);
+  Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
 
   // We're done expanding now, so reset the rewriter.
   Rewriter.clearPostInc();
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll
new file mode 100644
index 0000000..0e74ff2
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr28719.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global i32 0, align 4
+@b = global i8 0, align 1
+@c = global [4 x i8] zeroinitializer, align 1
+
+; Just make sure we don't generate code with uses not dominated by defs.
+; CHECK-LABEL: @main(
+define i32 @main() {
+entry:
+  %a0 = load i32, i32* @a, align 4
+  %cmpa = icmp slt i32 %a0, 4
+  br i1 %cmpa, label %preheader, label %for.end
+
+preheader:
+  %b0 = load i8, i8* @b, align 1
+  %b0sext = sext i8 %b0 to i64
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %preheader ], [ %iv.next, %lor.false ]
+  %mul = mul nsw i64 %b0sext, %iv
+  %multrunc = trunc i64 %mul to i32
+  %cmp = icmp eq i32 %multrunc, 0
+  br i1 %cmp, label %lor.false, label %if.then
+
+lor.false:
+  %cgep = getelementptr inbounds [4 x i8], [4 x i8]* @c, i64 0, i64 %iv
+  %ci = load i8, i8* %cgep, align 1
+  %cisext = sext i8 %ci to i32
+  %ivtrunc = trunc i64 %iv to i32
+  %cmp2 = icmp eq i32 %cisext, %ivtrunc
+  %iv.next = add i64 %iv, 1
+  br i1 %cmp2, label %for.body, label %if.then
+
+if.then:
+  tail call void @abort()
+  unreachable
+
+for.end:
+  ret i32 0
+}
+
+declare void @abort()
-- 
2.7.4