From 67bf9a6154d4b82c6c01aad01141bf08c1bbd0f6 Mon Sep 17 00:00:00 2001
From: Sjoerd Meijer <sjoerd.meijer@arm.com>
Date: Fri, 10 Jan 2020 09:30:02 +0000
Subject: [PATCH] [SVEV] Recognise hardware-loop intrinsic loop.decrement.reg

Teach SCEV about the @loop.decrement.reg intrinsic, which has exactly the same
semantics as a sub expression. This allows us to query hardware-loops, which
contain this @loop.decrement.reg intrinsic, so that we can calculate iteration
counts, exit values, etc. of hardwareloops.

This "int_loop_decrement_reg" intrinsic is defined as "IntrNoDuplicate". Thus,
while hardware-loops and tripcounts now become analysable by SCEV, this
prevents the usual loop transformations from applying transformations on
hardware-loops, which is what we want at this point, for which I have added
test cases for loopunrolling and IndVarSimplify and LFTR.

Differential Revision: https://reviews.llvm.org/D71563
---
 llvm/lib/Analysis/ScalarEvolution.cpp              | 11 ++++++
 llvm/test/Transforms/IndVarSimplify/lftr.ll        | 29 ++++++++++++++++
 .../LoopUnroll/ARM/dont-unroll-loopdec.ll          | 40 ++++++++++++++++++++++
 llvm/unittests/Analysis/ScalarEvolutionTest.cpp    | 28 +++++++++++++++
 4 files changed, 108 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll
diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp
index e636854..7d5c71e 100644
--- a/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/llvm/lib/Analysis/ScalarEvolution.cpp
@@ -4507,6 +4507,17 @@ static Optional<BinaryOp> MatchBinaryOp(Value *V, DominatorTree &DT) {
   if (!Op)
     return None;
 
+  // Recognise intrinsic loop.decrement.reg, and as this has exactly the same
+  // semantics as a Sub, return a binary sub expression.
+  if (auto *II = dyn_cast<IntrinsicInst>(V)) {
+    switch (II->getIntrinsicID()) {
+    case Intrinsic::loop_decrement_reg:
+      return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1));
+    default:
+      return None;
+    }
+  }
+
   // Implementation detail: all the cleverness here should happen without
   // creating new SCEV expressions -- our caller knowns tricks to avoid creating
   // SCEV expressions when possible, and we should not break that.
diff --git a/llvm/test/Transforms/IndVarSimplify/lftr.ll b/llvm/test/Transforms/IndVarSimplify/lftr.ll
index 098f98d..abeeb5b 100644
--- a/llvm/test/Transforms/IndVarSimplify/lftr.ll
+++ b/llvm/test/Transforms/IndVarSimplify/lftr.ll
@@ -153,6 +153,34 @@ loopexit:
   ret i32 %i
 }
 
+define i32 @quadratic_sgt_loopdec() {
+; CHECK-LABEL: @quadratic_sgt_loopdec(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 10, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I_NEXT]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[I]], i32 1)
+; CHECK-NEXT:    store i32 [[I]], i32* @A
+; CHECK-NEXT:    [[I2:%.*]] = mul i32 [[I]], [[I]]
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp sgt i32 [[I2]], 0
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP]], label [[LOOPEXIT:%.*]]
+; CHECK:       loopexit:
+; CHECK-NEXT:    ret i32 0
+
+entry:
+  br label %loop
+
+loop:
+  %i = phi i32 [ 10, %entry ], [ %i.next, %loop ]
+  %i.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %i, i32 1)
+  store i32 %i, i32* @A
+  %i2 = mul i32 %i, %i
+  %c = icmp sgt i32 %i2, 0
+  br i1 %c, label %loop, label %loopexit
+
+loopexit:
+  ret i32 %i
+}
 
 @data = common global [240 x i8] zeroinitializer, align 16
 
@@ -629,4 +657,5 @@ exit:
 }
 
 
+declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
 
diff --git a/llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll b/llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll
new file mode 100644
index 0000000..f183c67
--- /dev/null
+++ b/llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll
@@ -0,0 +1,40 @@
+; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -loop-unroll --loop-unroll -S < %s | FileCheck %s
+
+; CHECK-LABEL:  foo
+; CHECK:        5:
+; CHECK:        6:                 ; preds = %6, %5
+; CHECK:        15:                ; preds = %6
+; CHECK:          br label %16
+; CHECK:        16:                ; preds = %15, %3
+; CHECK:          ret void
+; CHECK:        }
+
+define void @foo(i8* nocapture, i8* nocapture readonly, i32) {
+  %4 = icmp sgt i32 %2, 0
+  br i1 %4, label %5, label %16
+
+; <label>:5:
+  br label %6
+
+; <label>:6:
+  %7 = phi i32 [ %13, %6 ], [ %2, %5 ]
+  %8 = phi i8* [ %10, %6 ], [ %1, %5 ]
+  %9 = phi i8* [ %12, %6 ], [ %0, %5 ]
+  %10 = getelementptr inbounds i8, i8* %8, i32 1
+  %11 = load i8, i8* %8, align 1
+  %12 = getelementptr inbounds i8, i8* %9, i32 1
+  store i8 %11, i8* %9, align 1
+
+  %13 = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %7, i32 1)
+
+  %14 = icmp sgt i32 %7, 1
+  br i1 %14, label %6, label %15
+
+; <label>:15:
+  br label %16
+
+; <label>:16:
+  ret void
+}
+
+declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32)
diff --git a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
index c42ebf6..e147502 100644
--- a/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
+++ b/llvm/unittests/Analysis/ScalarEvolutionTest.cpp
@@ -1683,6 +1683,34 @@ TEST_F(ScalarEvolutionsTest, SCEVExpanderShlNSW) {
                "} ");
 }
 
+TEST_F(ScalarEvolutionsTest, SCEVLoopDecIntrinsic) {
+  LLVMContext C;
+  SMDiagnostic Err;
+  std::unique_ptr<Module> M = parseAssemblyString(
+      "define void @foo(i32 %N) { "
+      "entry: "
+      "  %cmp3 = icmp sgt i32 %N, 0 "
+      "  br i1 %cmp3, label %for.body, label %for.cond.cleanup "
+      "for.cond.cleanup: "
+      "  ret void "
+      "for.body: "
+      "  %i.04 = phi i32 [ %inc, %for.body ], [ 100, %entry ] "
+      "  %inc = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %i.04, i32 1) "
+      "  %exitcond = icmp ne i32 %inc, 0 "
+      "  br i1 %exitcond, label %for.cond.cleanup, label %for.body "
+      "} "
+      "declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) ",
+      Err, C);
+
+  ASSERT_TRUE(M && "Could not parse module?");
+  ASSERT_TRUE(!verifyModule(*M) && "Must have been well formed!");
+
+  runWithSE(*M, "foo", [&](Function &F, LoopInfo &LI, ScalarEvolution &SE) {
+    auto *ScevInc = SE.getSCEV(getInstructionByName(F, "inc"));
+    EXPECT_TRUE(isa<SCEVAddRecExpr>(ScevInc));
+  });
+}
+
 TEST_F(ScalarEvolutionsTest, SCEVComputeConstantDifference) {
   LLVMContext C;
   SMDiagnostic Err;
-- 
2.7.4