Re-enable "[SCEV] Do not fold dominated SCEVUnknown into AddRecExpr start"

author Max Kazantsev <max.kazantsev@azul.com>

Fri, 26 May 2017 06:47:04 +0000 (06:47 +0000)

committer Max Kazantsev <max.kazantsev@azul.com>

Fri, 26 May 2017 06:47:04 +0000 (06:47 +0000)
author Max Kazantsev <max.kazantsev@azul.com>
Fri, 26 May 2017 06:47:04 +0000 (06:47 +0000)
committer Max Kazantsev <max.kazantsev@azul.com>
Fri, 26 May 2017 06:47:04 +0000 (06:47 +0000)
diff --git a/llvm/include/llvm/Analysis/ScalarEvolution.h b/llvm/include/llvm/Analysis/ScalarEvolution.h

index ac54bd4..4a6fc24 100644 (file)
--- a/llvm/include/llvm/Analysis/ScalarEvolution.h
+++ b/llvm/include/llvm/Analysis/ScalarEvolution.h
@@ -1533,6 +1533,12 @@ public:
    /// specified loop.
    bool isLoopInvariant(const SCEV *S, const Loop *L);
  
+  /// Determine if the SCEV can be evaluated at loop's entry. It is true if it
+  /// doesn't depend on a SCEVUnknown of an instruction which is dominated by
+  /// the header of loop L.
+  bool isAvailableAtLoopEntry(const SCEV *S, const Loop *L, DominatorTree &DT,
+                              LoopInfo &LI);
+
    /// Return true if the given SCEV changes value in a known way in the
    /// specified loop.  This property being true implies that the value is
    /// variant in the loop AND that we can emit an expression to compute the
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp

index 31175d0..ee07e97 100644 (file)
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -2178,6 +2178,63 @@ StrengthenNoWrapFlags(ScalarEvolution *SE, SCEVTypes Type,
    return Flags;
  }
  
+bool ScalarEvolution::isAvailableAtLoopEntry(const SCEV *S, const Loop *L,
+                                             DominatorTree &DT, LoopInfo &LI) {
+  if (!isLoopInvariant(S, L))
+    return false;
+  // If a value depends on a SCEVUnknown which is defined after the loop, we
+  // conservatively assume that we cannot calculate it at the loop's entry.
+  struct FindDominatedSCEVUnknown {
+    bool Found = false;
+    const Loop *L;
+    DominatorTree &DT;
+    LoopInfo &LI;
+
+    FindDominatedSCEVUnknown(const Loop *L, DominatorTree &DT, LoopInfo &LI)
+        : L(L), DT(DT), LI(LI) {}
+
+    bool checkSCEVUnknown(const SCEVUnknown *SU) {
+      if (auto *I = dyn_cast<Instruction>(SU->getValue())) {
+        if (DT.dominates(L->getHeader(), I->getParent()))
+          Found = true;
+        else
+          assert(DT.dominates(I->getParent(), L->getHeader()) &&
+                 "No dominance relationship between SCEV and loop?");
+      }
+      return false;
+    }
+
+    bool follow(const SCEV *S) {
+      switch (static_cast<SCEVTypes>(S->getSCEVType())) {
+      case scConstant:
+        return false;
+      case scAddRecExpr:
+      case scTruncate:
+      case scZeroExtend:
+      case scSignExtend:
+      case scAddExpr:
+      case scMulExpr:
+      case scUMaxExpr:
+      case scSMaxExpr:
+      case scUDivExpr:
+        return true;
+      case scUnknown:
+        return checkSCEVUnknown(cast<SCEVUnknown>(S));
+      case scCouldNotCompute:
+        llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!");
+      }
+      return false;
+    }
+
+    bool isDone() { return Found; }
+  };
+
+  FindDominatedSCEVUnknown FSU(L, DT, LI);
+  SCEVTraversal<FindDominatedSCEVUnknown> ST(FSU);
+  ST.visitAll(S);
+  return !FSU.Found;
+}
+
  /// Get a canonical add expression, or something simpler if possible.
  const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
                                          SCEV::NoWrapFlags Flags,
@@ -2459,7 +2516,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
      const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
      const Loop *AddRecLoop = AddRec->getLoop();
      for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+      if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
          LIOps.push_back(Ops[i]);
          Ops.erase(Ops.begin()+i);
          --i; --e;
@@ -2734,7 +2791,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
      const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ops[Idx]);
      const Loop *AddRecLoop = AddRec->getLoop();
      for (unsigned i = 0, e = Ops.size(); i != e; ++i)
-      if (isLoopInvariant(Ops[i], AddRecLoop)) {
+      if (isAvailableAtLoopEntry(Ops[i], AddRecLoop, DT, LI)) {
          LIOps.push_back(Ops[i]);
          Ops.erase(Ops.begin()+i);
          --i; --e;
diff --git a/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll b/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll

index 6d4f1b0..afc2151 100644 (file)
--- a/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll
+++ b/llvm/test/Analysis/IVUsers/quadradic-exit-value.ll
@@ -30,13 +30,47 @@ exit:
    ret i64 %r
  }
  
+; PR15470: LSR miscompile. The test1 function should return '1'.
+; It is valid to fold SCEVUnknown into the recurrence because it
+; was defined before the loop.
+;
+; SCEV does not know how to denormalize chained recurrences, so make
+; sure they aren't marked as post-inc users.
+;
+; CHECK-LABEL: IV Users for loop %test1.loop
+; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us))<nuw><nsw>,+,33554432}<%test1.loop> (post-inc with loop %test1.loop) in    %f = ashr i32 %sext.us, 24
+define i32 @test1(i1 %cond) {
+entry:
+  %sub.us = select i1 %cond, i32 0, i32 0
+  br label %test1.loop
+
+test1.loop:
+  %inc1115.us = phi i32 [ 0, %entry ], [ %inc11.us, %test1.loop ]
+  %inc11.us = add nsw i32 %inc1115.us, 1
+  %cmp.us = icmp slt i32 %inc11.us, 2
+  br i1 %cmp.us, label %test1.loop, label %for.end
+
+for.end:
+  %tobool.us = icmp eq i32 %inc1115.us, 0
+  %mul.us = shl i32 %inc1115.us, 24
+  %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
+  %sext.us = mul i32 %mul.us, %sub.cond.us
+  %f = ashr i32 %sext.us, 24
+  br label %exit
+
+exit:
+  ret i32 %f
+}
+
  ; PR15470: LSR miscompile. The test2 function should return '1'.
+; It is illegal to fold SCEVUnknown (sext.us) into the recurrence
+; because it is defined after the loop where this recurrence belongs.
  ;
  ; SCEV does not know how to denormalize chained recurrences, so make
  ; sure they aren't marked as post-inc users.
  ;
  ; CHECK-LABEL: IV Users for loop %test2.loop
-; CHECK-NO-LCSSA: %sext.us = {0,+,(16777216 + (-16777216 * %sub.us))<nuw><nsw>,+,33554432}<%test2.loop> (post-inc with loop %test2.loop) in    %f = ashr i32 %sext.us, 24
+; CHECK-NO-LCSSA: %sub.cond.us = ((-1 * %sub.us)<nsw> + {0,+,1}<nuw><nsw><%test2.loop>) (post-inc with loop %test2.loop) in    %sext.us = mul i32 %mul.us, %sub.cond.us
  define i32 @test2() {
  entry:
    br label %test2.loop
diff --git a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll

index ad3d1e0..6b88f09 100644 (file)
--- a/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
+++ b/llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll
@@ -220,7 +220,8 @@ exit:
  
  ; Mix of previous use cases that demonstrates %s3 can be incorrectly treated as
  ; a recurrence of loop1 because of operands order if we pick recurrencies in an
-; incorrect order.
+; incorrect order. It also shows that we cannot safely fold v1 (SCEVUnknown)
+; because we cannot prove for sure that it doesn't use Phis of loop 2.
  
  define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) {
  
@@ -228,9 +229,9 @@ define void @test_03(i32 %a, i32 %b, i32 %c, i32* %p) {
  ; CHECK:       %v1 = load i32, i32* %p
  ; CHECK-NEXT:  -->  %v1
  ; CHECK:       %s1 = add i32 %phi1, %v1
-; CHECK-NEXT:  -->  {(%a + %v1),+,1}<%loop1>
+; CHECK-NEXT:  -->  ({%a,+,1}<%loop1> + %v1)
  ; CHECK:       %s2 = add i32 %s1, %b
-; CHECK-NEXT:  -->  {(%a + %b + %v1),+,1}<%loop1>
+; CHECK-NEXT:  -->  ({(%a + %b),+,1}<%loop1> + %v1)
  ; CHECK:       %s3 = add i32 %s2, %phi2
  ; CHECK-NEXT:  -->  ({{{{}}((2 * %a) + %b),+,1}<%loop1>,+,2}<%loop2> + %v1)
  
@@ -452,3 +453,60 @@ exit:
    %s6 = add i32 %phi3, %phi2
    ret void
  }
+
+; Make sure that a complicated Phi does not get folded with rec's start value
+; of a loop which is above.
+define void @test_08() {
+
+; CHECK-LABEL: Classifying expressions for: @test_08
+; CHECK:       %tmp11 = add i64 %iv.2.2, %iv.2.1
+; CHECK-NEXT:  -->  ({0,+,-1}<nsw><%loop_2> + %iv.2.1)
+; CHECK:       %tmp12 = trunc i64 %tmp11 to i32
+; CHECK-NEXT:  -->  (trunc i64 ({0,+,-1}<nsw><%loop_2> + %iv.2.1) to i32)
+; CHECK:       %tmp14 = mul i32 %tmp12, %tmp7
+; CHECK-NEXT:  -->  ((trunc i64 ({0,+,-1}<nsw><%loop_2> + %iv.2.1) to i32) * {-1,+,-1}<%loop_1>)
+; CHECK:       %tmp16 = mul i64 %iv.2.1, %iv.1.1
+; CHECK-NEXT:  -->  ({2,+,1}<nuw><nsw><%loop_1> * %iv.2.1)
+
+entry:
+  br label %loop_1
+
+loop_1:
+  %iv.1.1 = phi i64 [ 2, %entry ], [ %iv.1.1.next, %loop_1_back_branch ]
+  %iv.1.2 = phi i32 [ -1, %entry ], [ %iv.1.2.next, %loop_1_back_branch ]
+  br label %loop_1_exit
+
+dead:
+  br label %loop_1_exit
+
+loop_1_exit:
+  %tmp5 = icmp sgt i64 %iv.1.1, 2
+  br i1 %tmp5, label %loop_2_preheader, label %loop_1_back_branch
+
+loop_1_back_branch:
+  %iv.1.1.next = add nuw nsw i64 %iv.1.1, 1
+  %iv.1.2.next = add nsw i32 %iv.1.2, 1
+  br label %loop_1
+
+loop_2_preheader:
+  %tmp6 = sub i64 1, %iv.1.1
+  %tmp7 = trunc i64 %tmp6 to i32
+  br label %loop_2
+
+loop_2:
+  %iv.2.1 = phi i64 [ 0, %loop_2_preheader ], [ %tmp16, %loop_2 ]
+  %iv.2.2 = phi i64 [ 0, %loop_2_preheader ], [ %iv.2.2.next, %loop_2 ]
+  %iv.2.3 = phi i64 [ 2, %loop_2_preheader ], [ %iv.2.3.next, %loop_2 ]
+  %tmp11 = add i64 %iv.2.2, %iv.2.1
+  %tmp12 = trunc i64 %tmp11 to i32
+  %tmp14 = mul i32 %tmp12, %tmp7
+  %tmp16 = mul i64 %iv.2.1, %iv.1.1
+  %iv.2.3.next = add nuw nsw i64 %iv.2.3, 1
+  %iv.2.2.next = add nsw i64 %iv.2.2, -1
+  %tmp17 = icmp slt i64 %iv.2.3.next, %iv.1.1
+  br i1 %tmp17, label %loop_2, label %exit
+
+exit:
+  %tmp10 = add i32 %iv.1.2, 3
+  ret void
+}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll

index 3adb8bc..00c3222 100644 (file)
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll
@@ -25,7 +25,7 @@ L2:                                               ; preds = %idxend.8
  if6:                                              ; preds = %idxend.8
    %r2 = add i64 %0, -1
    %r3 = load i64, i64* %1, align 8
-; CHECK-NOT:  %r2
+; CHECK:  %r2 = add i64 %0, -1
  ; CHECK:  %r3 = load i64
    br label %ib
  
@@ -36,13 +36,11 @@ ib:                                               ; preds = %if6
    %r4 = mul i64 %r3, %r0
    %r5 = add i64 %r2, %r4
    %r6 = icmp ult i64 %r5, undef
-; CHECK:  [[MUL1:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK:  [[ADD1:%[0-9]+]] = add i64 [[MUL1]], -1
-; CHECK:  add i64 %{{.}}, [[ADD1]]
-; CHECK:  %r6
+; CHECK:  %r4 = mul i64 %r3, %lsr.iv
+; CHECK:  %r5 = add i64 %r2, %r4
+; CHECK:  %r6 = icmp ult i64 %r5, undef
+; CHECK:  %r7 = getelementptr i64, i64* undef, i64 %r5
    %r7 = getelementptr i64, i64* undef, i64 %r5
    store i64 1, i64* %r7, align 8
-; CHECK:  [[MUL2:%[0-9]+]] = mul i64 %lsr.iv, %r3
-; CHECK:  [[ADD2:%[0-9]+]] = add i64 [[MUL2]], -1
    br label %L
  }
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll

similarity index 51%

rename from llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll

rename to llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll

index aa688d9..a7731bf 100644 (file)
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll
@@ -1,5 +1,14 @@
+; REQUIRES: x86
  ; RUN: opt -loop-reduce -S < %s | FileCheck %s
  
+; Strength reduction analysis here relies on IV Users analysis, that
+; only finds users among instructions with types that are treated as
+; legal by the data layout. When running this test on pure non-x86
+; configs (for example, ARM 64), it gets confused with the target
+; triple and uses a default data layout instead. This default layout
+; does not have any legal types (even i32), so the transformation
+; does not happen.
+
  target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
  target triple = "x86_64-apple-macosx"
  
@@ -7,16 +16,23 @@ target triple = "x86_64-apple-macosx"
  ;
  ; SCEV expander cannot expand quadratic recurrences outside of the
  ; loop. This recurrence depends on %sub.us, so can't be expanded.
+; We cannot fold SCEVUnknown (sub.us) with recurrences since it is
+; declared after the loop.
  ;
  ; CHECK-LABEL: @test2
  ; CHECK-LABEL: test2.loop:
-; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -16777216, %entry ]
-; CHECK: %lsr.iv.next = add nsw i32 %lsr.iv, 16777216
+; CHECK:  %lsr.iv1 = phi i32 [ %lsr.iv.next2, %test2.loop ], [ -16777216, %entry ]
+; CHECK:  %lsr.iv = phi i32 [ %lsr.iv.next, %test2.loop ], [ -1, %entry ]
+; CHECK:  %lsr.iv.next = add nsw i32 %lsr.iv, 1
+; CHECK:  %lsr.iv.next2 = add nsw i32 %lsr.iv1, 16777216
  ;
  ; CHECK-LABEL: for.end:
-; CHECK: %sub.cond.us = sub nsw i32 %inc1115.us, %sub.us
-; CHECK: %sext.us = mul i32 %lsr.iv.next, %sub.cond.us
-; CHECK: %f = ashr i32 %sext.us, 24
+; CHECK:  %tobool.us = icmp eq i32 %lsr.iv.next2, 0
+; CHECK:  %sub.us = select i1 %tobool.us, i32 0, i32 0
+; CHECK:  %1 = sub i32 0, %sub.us
+; CHECK:  %2 = add i32 %1, %lsr.iv.next
+; CHECK:  %sext.us = mul i32 %lsr.iv.next2, %2
+; CHECK:  %f = ashr i32 %sext.us, 24
  ; CHECK: ret i32 %f
  define i32 @test2() {
  entry:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll

index fbf55fd..cbf177c 100644 (file)
--- a/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -25,6 +25,8 @@ define void @_Z15IntegerToStringjjR7Vector2(i32 %i, i32 %radix, %struct.Vector2*
  entry:
    %buffer = alloca [33 x i16], align 16
    %add.ptr = getelementptr inbounds [33 x i16], [33 x i16]* %buffer, i64 0, i64 33
+  %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
+  %sub.ptr.rhs.cast = ptrtoint i16* %add.ptr to i64
    br label %do.body
  
  do.body:                                          ; preds = %do.body, %entry
@@ -46,8 +48,6 @@ do.body:                                          ; preds = %do.body, %entry
  do.end:                                           ; preds = %do.body
    %xap.0 = inttoptr i64 %0 to i1*
    %cap.0 = ptrtoint i1* %xap.0 to i64
-  %sub.ptr.lhs.cast = ptrtoint i16* %add.ptr to i64
-  %sub.ptr.rhs.cast = ptrtoint i16* %incdec.ptr to i64
    %sub.ptr.sub = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
    %sub.ptr.div39 = lshr exact i64 %sub.ptr.sub, 1
    %conv11 = trunc i64 %sub.ptr.div39 to i32
author	Max Kazantsev <max.kazantsev@azul.com>
	Fri, 26 May 2017 06:47:04 +0000 (06:47 +0000)
committer	Max Kazantsev <max.kazantsev@azul.com>
	Fri, 26 May 2017 06:47:04 +0000 (06:47 +0000)
llvm/include/llvm/Analysis/ScalarEvolution.h		patch \| blob \| history
llvm/lib/Analysis/ScalarEvolution.cpp		patch \| blob \| history
llvm/test/Analysis/IVUsers/quadradic-exit-value.ll		patch \| blob \| history
llvm/test/Analysis/ScalarEvolution/different-loops-recs.ll		patch \| blob \| history
llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll		patch \| blob \| history
llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll	[moved from llvm/test/Transforms/LoopStrengthReduce/lsr-expand-quadratic.ll with 51% similarity]	patch \| blob \| history
llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll		patch \| blob \| history