From 67bf9a6154d4b82c6c01aad01141bf08c1bbd0f6 Mon Sep 17 00:00:00 2001 From: Sjoerd Meijer Date: Fri, 10 Jan 2020 09:30:02 +0000 Subject: [PATCH] [SVEV] Recognise hardware-loop intrinsic loop.decrement.reg Teach SCEV about the @loop.decrement.reg intrinsic, which has exactly the same semantics as a sub expression. This allows us to query hardware-loops, which contain this @loop.decrement.reg intrinsic, so that we can calculate iteration counts, exit values, etc. of hardwareloops. This "int_loop_decrement_reg" intrinsic is defined as "IntrNoDuplicate". Thus, while hardware-loops and tripcounts now become analysable by SCEV, this prevents the usual loop transformations from applying transformations on hardware-loops, which is what we want at this point, for which I have added test cases for loopunrolling and IndVarSimplify and LFTR. Differential Revision: https://reviews.llvm.org/D71563 --- llvm/lib/Analysis/ScalarEvolution.cpp | 11 ++++++ llvm/test/Transforms/IndVarSimplify/lftr.ll | 29 ++++++++++++++++ .../LoopUnroll/ARM/dont-unroll-loopdec.ll | 40 ++++++++++++++++++++++ llvm/unittests/Analysis/ScalarEvolutionTest.cpp | 28 +++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll diff --git a/llvm/lib/Analysis/ScalarEvolution.cpp b/llvm/lib/Analysis/ScalarEvolution.cpp index e636854..7d5c71e 100644 --- a/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/llvm/lib/Analysis/ScalarEvolution.cpp @@ -4507,6 +4507,17 @@ static Optional MatchBinaryOp(Value *V, DominatorTree &DT) { if (!Op) return None; + // Recognise intrinsic loop.decrement.reg, and as this has exactly the same + // semantics as a Sub, return a binary sub expression. + if (auto *II = dyn_cast(V)) { + switch (II->getIntrinsicID()) { + case Intrinsic::loop_decrement_reg: + return BinaryOp(Instruction::Sub, II->getOperand(0), II->getOperand(1)); + default: + return None; + } + } + // Implementation detail: all the cleverness here should happen without // creating new SCEV expressions -- our caller knowns tricks to avoid creating // SCEV expressions when possible, and we should not break that. diff --git a/llvm/test/Transforms/IndVarSimplify/lftr.ll b/llvm/test/Transforms/IndVarSimplify/lftr.ll index 098f98d..abeeb5b 100644 --- a/llvm/test/Transforms/IndVarSimplify/lftr.ll +++ b/llvm/test/Transforms/IndVarSimplify/lftr.ll @@ -153,6 +153,34 @@ loopexit: ret i32 %i } +define i32 @quadratic_sgt_loopdec() { +; CHECK-LABEL: @quadratic_sgt_loopdec( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[I:%.*]] = phi i32 [ 10, [[ENTRY:%.*]] ], [ [[I_NEXT:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[I_NEXT]] = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 [[I]], i32 1) +; CHECK-NEXT: store i32 [[I]], i32* @A +; CHECK-NEXT: [[I2:%.*]] = mul i32 [[I]], [[I]] +; CHECK-NEXT: [[EXITCOND:%.*]] = icmp sgt i32 [[I2]], 0 +; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP]], label [[LOOPEXIT:%.*]] +; CHECK: loopexit: +; CHECK-NEXT: ret i32 0 + +entry: + br label %loop + +loop: + %i = phi i32 [ 10, %entry ], [ %i.next, %loop ] + %i.next = call i32 @llvm.loop.decrement.reg.i32.i32.i32(i32 %i, i32 1) + store i32 %i, i32* @A + %i2 = mul i32 %i, %i + %c = icmp sgt i32 %i2, 0 + br i1 %c, label %loop, label %loopexit + +loopexit: + ret i32 %i +} @data = common global [240 x i8] zeroinitializer, align 16 @@ -629,4 +657,5 @@ exit: } +declare i32 @llvm.loop.decrement.reg.i32.i32.i32(i32, i32) diff --git a/llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll b/llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll new file mode 100644 index 0000000..f183c67 --- /dev/null +++ b/llvm/test/Transforms/LoopUnroll/ARM/dont-unroll-loopdec.ll @@ -0,0 +1,40 @@ +; RUN: opt -mtriple=thumbv8.1m.main -mattr=+mve.fp -loop-unroll --loop-unroll -S < %s | FileCheck %s + +; CHECK-LABEL: foo +; CHECK: 5: +; CHECK: 6: ; preds = %6, %5 +; CHECK: 15: ; preds = %6 +; CHECK: br label %16 +; CHECK: 16: ; preds = %15, %3 +; CHECK: ret void +; CHECK: } + +define void @foo(i8* nocapture, i8* nocapture readonly, i32) { + %4 = icmp sgt i32 %2, 0 + br i1 %4, label %5, label %16 + +;