From c7e275388e3fe55163296e587eff1b6325b63da2 Mon Sep 17 00:00:00 2001 From: David Green Date: Tue, 10 Nov 2020 17:01:31 +0000 Subject: [PATCH] [ARM] Don't aggressively unroll vector remainder loops We already do not unroll loops with vector instructions under MVE, but that does not include the remainder loops that the vectorizer produces. These remainder loops will be rarely executed and are not worth unrolling, as the trip count is likely to be low if they get executed at all. Luckily they get llvm.loop.isvectorized to make recognizing them simpler. We have wanted to do this for a while but hit issues with low overhead loops being reverted due to difficult registry allocation. With recent changes that seems to be less of an issue now. Differential Revision: https://reviews.llvm.org/D90055 --- llvm/include/llvm/Transforms/Utils/LoopUtils.h | 3 +++ llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 4 ++++ llvm/lib/Transforms/Utils/LoopUtils.cpp | 2 +- llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll | 8 ++------ 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/llvm/include/llvm/Transforms/Utils/LoopUtils.h b/llvm/include/llvm/Transforms/Utils/LoopUtils.h index d741b51..665ff37 100644 --- a/llvm/include/llvm/Transforms/Utils/LoopUtils.h +++ b/llvm/include/llvm/Transforms/Utils/LoopUtils.h @@ -271,6 +271,9 @@ TransformationMode hasLICMVersioningTransformation(Loop *L); void addStringMetadataToLoop(Loop *TheLoop, const char *MDString, unsigned V = 0); +/// Returns true if Name is applied to TheLoop and enabled. +bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name); + /// Returns a loop's estimated trip count based on branch weight metadata. /// In addition if \p EstimatedLoopInvocationWeight is not null it is /// initialized with weight of loop's latch leading to the exit. diff --git a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp index ccb3750..d9f83a0 100644 --- a/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -1946,6 +1946,10 @@ void ARMTTIImpl::getUnrollingPreferences(Loop *L, ScalarEvolution &SE, if (ST->hasBranchPredictor() && L->getNumBlocks() > 4) return; + // Don't unroll vectorized loops, including the remainder loop + if (getBooleanLoopAttribute(L, "llvm.loop.isvectorized")) + return; + // Scan the loop: don't unroll loops with calls as this could prevent // inlining. unsigned Cost = 0; diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp index e10a230..c2c5713 100644 --- a/llvm/lib/Transforms/Utils/LoopUtils.cpp +++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp @@ -297,7 +297,7 @@ static Optional getOptionalBoolLoopAttribute(const Loop *TheLoop, llvm_unreachable("unexpected number of options"); } -static bool getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { +bool llvm::getBooleanLoopAttribute(const Loop *TheLoop, StringRef Name) { return getOptionalBoolLoopAttribute(TheLoop, Name).getValueOr(false); } diff --git a/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll b/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll index 316ad64..41073e6 100644 --- a/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll +++ b/llvm/test/Transforms/LoopUnroll/ARM/mve-nounroll.ll @@ -70,15 +70,11 @@ for.body: ; preds = %for.body.preheader1 ; CHECK-LABEL: @remainder ; CHECK: vector.body: -; CHECK: br i1 %13, label %middle.block, label %vector.body, !llvm.loop !0 +; CHECK: br i1 %7, label %middle.block, label %vector.body, !llvm.loop !0 ; CHECK: middle.block: ; CHECK: br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader13 ; CHECK: for.body: -; CHECK: br i1 %exitcond.3, label %for.cond.cleanup.loopexit.unr-lcssa, label %for.body, !llvm.loop !0 -; CHECK: for.body.prol.1: -; CHECK: br i1 %prol.iter.cmp.1, label %for.body.prol.2, label %for.body.prol.loopexit.unr-lcssa -; CHECK: for.body.prol.2: -; CHECK: br label %for.body.prol.loopexit.unr-lcssa +; CHECK: br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body, !llvm.loop !0 define void @remainder(float* %s1, float* %s2, float* %d, i32 %n) { entry: -- 2.7.4