Teach ScalarEvolution to make use of no-overflow flags when
authorDan Gohman <gohman@apple.com>
Sat, 25 Jul 2009 01:22:26 +0000 (01:22 +0000)
committerDan Gohman <gohman@apple.com>
Sat, 25 Jul 2009 01:22:26 +0000 (01:22 +0000)
analyzing add recurrences.

llvm-svn: 77034

llvm/lib/Analysis/ScalarEvolution.cpp
llvm/test/Analysis/ScalarEvolution/nsw.ll [new file with mode: 0644]

index c77c7c40ef1a4d366197c33d1333e03808190679..49af579366297a6161e36dc7100bd3774365de92 100644 (file)
@@ -734,6 +734,13 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
       unsigned BitWidth = getTypeSizeInBits(AR->getType());
       const Loop *L = AR->getLoop();
 
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoUnsignedOverflow())
+        return getAddRecExpr(getZeroExtendExpr(Start, Ty),
+                             getZeroExtendExpr(Step, Ty),
+                             L);
+
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
@@ -866,6 +873,13 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
       unsigned BitWidth = getTypeSizeInBits(AR->getType());
       const Loop *L = AR->getLoop();
 
+      // If we have special knowledge that this addrec won't overflow,
+      // we don't need to do any further analysis.
+      if (AR->hasNoSignedOverflow())
+        return getAddRecExpr(getSignExtendExpr(Start, Ty),
+                             getSignExtendExpr(Step, Ty),
+                             L);
+
       // Check whether the backedge-taken count is SCEVCouldNotCompute.
       // Note that this serves two purposes: It filters out loops that are
       // simply not analyzable, and it covers the case where this code is
@@ -2344,8 +2358,29 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
                  cast<SCEVAddRecExpr>(Accum)->getLoop() == L)) {
               const SCEV *StartVal =
                 getSCEV(PN->getIncomingValue(IncomingEdge));
-              const SCEV *PHISCEV =
-                getAddRecExpr(StartVal, Accum, L);
+              const SCEVAddRecExpr *PHISCEV =
+                cast<SCEVAddRecExpr>(getAddRecExpr(StartVal, Accum, L));
+
+              // If the increment doesn't overflow, then neither the addrec nor the
+              // post-increment will overflow.
+              if (const AddOperator *OBO = dyn_cast<AddOperator>(BEValueV))
+                if (OBO->getOperand(0) == PN &&
+                    getSCEV(OBO->getOperand(1)) ==
+                      PHISCEV->getStepRecurrence(*this)) {
+                  const SCEVAddRecExpr *PostInc = PHISCEV->getPostIncExpr(*this);
+                  if (OBO->hasNoUnsignedOverflow()) {
+                    const_cast<SCEVAddRecExpr *>(PHISCEV)
+                      ->setHasNoUnsignedOverflow(true);
+                    const_cast<SCEVAddRecExpr *>(PostInc)
+                      ->setHasNoUnsignedOverflow(true);
+                  }
+                  if (OBO->hasNoSignedOverflow()) {
+                    const_cast<SCEVAddRecExpr *>(PHISCEV)
+                      ->setHasNoSignedOverflow(true);
+                    const_cast<SCEVAddRecExpr *>(PostInc)
+                      ->setHasNoSignedOverflow(true);
+                  }
+                }
 
               // Okay, for the entire analysis of this edge we assumed the PHI
               // to be symbolic.  We now need to go back and purge all of the
diff --git a/llvm/test/Analysis/ScalarEvolution/nsw.ll b/llvm/test/Analysis/ScalarEvolution/nsw.ll
new file mode 100644 (file)
index 0000000..245ed6f
--- /dev/null
@@ -0,0 +1,40 @@
+; RUN: llvm-as < %s | opt -analyze -scalar-evolution -disable-output | grep { -->  {.*,+,.*}<bb>} | count 8
+
+; The addrecs in this loop are analyzable only by using nsw information.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64"
+
+define void @foo(double* %p) nounwind {
+entry:
+       %tmp = load double* %p, align 8         ; <double> [#uses=1]
+       %tmp1 = fcmp ogt double %tmp, 2.000000e+00              ; <i1> [#uses=1]
+       br i1 %tmp1, label %bb.nph, label %return
+
+bb.nph:                ; preds = %entry
+       br label %bb
+
+bb:            ; preds = %bb1, %bb.nph
+       %i.01 = phi i32 [ %tmp8, %bb1 ], [ 0, %bb.nph ]         ; <i32> [#uses=3]
+       %tmp2 = sext i32 %i.01 to i64           ; <i64> [#uses=1]
+       %tmp3 = getelementptr double* %p, i64 %tmp2             ; <double*> [#uses=1]
+       %tmp4 = load double* %tmp3, align 8             ; <double> [#uses=1]
+       %tmp5 = fmul double %tmp4, 9.200000e+00         ; <double> [#uses=1]
+       %tmp6 = sext i32 %i.01 to i64           ; <i64> [#uses=1]
+       %tmp7 = getelementptr double* %p, i64 %tmp6             ; <double*> [#uses=1]
+       store double %tmp5, double* %tmp7, align 8
+       %tmp8 = nsw add i32 %i.01, 1            ; <i32> [#uses=2]
+       br label %bb1
+
+bb1:           ; preds = %bb
+       %phitmp = sext i32 %tmp8 to i64         ; <i64> [#uses=1]
+       %tmp9 = getelementptr double* %p, i64 %phitmp           ; <double*> [#uses=1]
+       %tmp10 = load double* %tmp9, align 8            ; <double> [#uses=1]
+       %tmp11 = fcmp ogt double %tmp10, 2.000000e+00           ; <i1> [#uses=1]
+       br i1 %tmp11, label %bb, label %bb1.return_crit_edge
+
+bb1.return_crit_edge:          ; preds = %bb1
+       br label %return
+
+return:                ; preds = %bb1.return_crit_edge, %entry
+       ret void
+}