From 715b01e979aad41b1138fb21ed08601cc328159e Mon Sep 17 00:00:00 2001 From: Kevin Qin Date: Mon, 9 Mar 2015 06:14:18 +0000 Subject: [PATCH] Introduce runtime unrolling disable matadata and use it to mark the scalar loop from vectorization. Runtime unrolling is an expensive optimization which can bring benefit only if the loop is hot and iteration number is relatively large enough. For some loops, we know they are not worth to be runtime unrolled. The scalar loop from vectorization is one of the cases. llvm-svn: 231631 --- llvm/docs/LangRef.rst | 10 ++++ llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 8 ++++ llvm/lib/Transforms/Vectorize/LoopVectorize.cpp | 54 +++++++++++++++++++++- llvm/test/Transforms/LoopUnroll/runtime-loop.ll | 33 +++++++++++++ .../LoopVectorize/X86/already-vectorized.ll | 3 +- .../Transforms/LoopVectorize/vectorize-once.ll | 3 +- 6 files changed, 107 insertions(+), 4 deletions(-) diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index 218be34..ff9b4bf 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -3509,6 +3509,16 @@ which is the string ``llvm.loop.unroll.disable``. For example: !0 = !{!"llvm.loop.unroll.disable"} +'``llvm.loop.unroll.runtime.disable``' Metadata +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This metadata either disables runtime loop unrolling. The metadata has a single +operand which is the string ``llvm.loop.unroll.runtime.disable``. For example: + +.. code-block:: llvm + + !0 = !{!"llvm.loop.unroll.runtime.disable"} + '``llvm.loop.unroll.full``' Metadata ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp index 924be16..407595e 100644 --- a/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp +++ b/llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp @@ -619,6 +619,11 @@ static bool HasUnrollDisablePragma(const Loop *L) { return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.disable"); } +// Returns true if the loop has an runtime unroll(disable) pragma. +static bool HasRuntimeUnrollDisablePragma(const Loop *L) { + return GetUnrollMetadataForLoop(L, "llvm.loop.unroll.runtime.disable"); +} + // If loop has an unroll_count pragma return the (necessarily // positive) value from the pragma. Otherwise return 0. static unsigned UnrollCountPragmaValue(const Loop *L) { @@ -807,6 +812,9 @@ bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) { // Reduce count based on the type of unrolling and the threshold values. unsigned OriginalCount = Count; bool AllowRuntime = UserRuntime ? CurrentRuntime : UP.Runtime; + if (HasRuntimeUnrollDisablePragma(L)) { + AllowRuntime = false; + } if (Unrolling == Partial) { bool AllowPartial = UserAllowPartial ? CurrentAllowPartial : UP.Partial; if (!AllowPartial && !CountSetExplicitly) { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index ffa3fe1..781e224 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -250,7 +250,7 @@ public: : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), DL(DL), TLI(TLI), VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), - Legal(nullptr) {} + Legal(nullptr), AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). void vectorize(LoopVectorizationLegality *L) { @@ -264,6 +264,11 @@ public: updateAnalysis(); } + // Return true if any runtime check is added. + bool IsSafetyChecksAdded() { + return AddedSafetyChecks; + } + virtual ~InnerLoopVectorizer() {} protected: @@ -443,6 +448,9 @@ protected: EdgeMaskCache MaskCache; LoopVectorizationLegality *Legal; + + // Record whether runtime check is added. + bool AddedSafetyChecks; }; class InnerLoopUnroller : public InnerLoopVectorizer { @@ -893,7 +901,7 @@ private: ValueToValueMap Strides; SmallPtrSet StrideSet; - + /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet MaskedOp; @@ -1320,6 +1328,40 @@ struct LoopVectorize : public FunctionPass { return Changed; } + static void AddRuntimeUnrollDisableMetaData(Loop *L) { + SmallVector MDs; + // Reserve first location for self reference to the LoopID metadata node. + MDs.push_back(nullptr); + bool IsUnrollMetadata = false; + MDNode *LoopID = L->getLoopID(); + if (LoopID) { + // First find existing loop unrolling disable metadata. + for (unsigned i = 1, ie = LoopID->getNumOperands(); i < ie; ++i) { + MDNode *MD = dyn_cast(LoopID->getOperand(i)); + if (MD) { + const MDString *S = dyn_cast(MD->getOperand(0)); + IsUnrollMetadata = + S && S->getString().startswith("llvm.loop.unroll.disable"); + } + MDs.push_back(LoopID->getOperand(i)); + } + } + + if (!IsUnrollMetadata) { + // Add runtime unroll disable metadata. + LLVMContext &Context = L->getHeader()->getContext(); + SmallVector DisableOperands; + DisableOperands.push_back( + MDString::get(Context, "llvm.loop.unroll.runtime.disable")); + MDNode *DisableNode = MDNode::get(Context, DisableOperands); + MDs.push_back(DisableNode); + MDNode *NewLoopID = MDNode::get(Context, MDs); + // Set operand 0 to refer to the loop id itself. + NewLoopID->replaceOperandWith(0, NewLoopID); + L->setLoopID(NewLoopID); + } + } + bool processLoop(Loop *L) { assert(L->empty() && "Only process inner loops."); @@ -1475,6 +1517,12 @@ struct LoopVectorize : public FunctionPass { LB.vectorize(&LVL); ++LoopsVectorized; + // Add metadata to disable runtime unrolling scalar loop when there's no + // runtime check about strides and memory. Because at this situation, + // scalar loop is rarely used not worthy to be unrolled. + if (!LB.IsSafetyChecksAdded()) + AddRuntimeUnrollDisableMetaData(L); + // Report the vectorization decision. emitOptimizationRemark( F->getContext(), DEBUG_TYPE, *F, L->getStartLoc(), @@ -2221,6 +2269,7 @@ void InnerLoopVectorizer::createEmptyLoop() { std::tie(FirstCheckInst, StrideCheck) = addStrideCheck(LastBypassBlock->getTerminator()); if (StrideCheck) { + AddedSafetyChecks = true; // Create a new block containing the stride check. BasicBlock *CheckBlock = LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.stridecheck"); @@ -2245,6 +2294,7 @@ void InnerLoopVectorizer::createEmptyLoop() { std::tie(FirstCheckInst, MemRuntimeCheck) = Legal->getLAI()->addRuntimeCheck(LastBypassBlock->getTerminator()); if (MemRuntimeCheck) { + AddedSafetyChecks = true; // Create a new block containing the memory check. BasicBlock *CheckBlock = LastBypassBlock->splitBasicBlock(FirstCheckInst, "vector.memcheck"); diff --git a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll index 3bec939..9e78edf 100644 --- a/llvm/test/Transforms/LoopUnroll/runtime-loop.ll +++ b/llvm/test/Transforms/LoopUnroll/runtime-loop.ll @@ -113,6 +113,39 @@ for.end: ; preds = %for.cond.for.end_cr ret i16 %res.0.lcssa } +; Test run-time unrolling disable metadata. +; CHECK: for.body: +; CHECK-NOT: for.body.prol: + +define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly { +entry: + %cmp2 = icmp eq i32 %len, 0 + br i1 %cmp2, label %for.end, label %for.body + +for.body: ; preds = %for.body, %entry + %p.addr.05 = phi i16* [ %incdec.ptr, %for.body ], [ %p, %entry ] + %len.addr.04 = phi i32 [ %sub, %for.body ], [ %len, %entry ] + %res.03 = phi i32 [ %add, %for.body ], [ 0, %entry ] + %incdec.ptr = getelementptr inbounds i16, i16* %p.addr.05, i64 1 + %0 = load i16, i16* %p.addr.05, align 2 + %conv = zext i16 %0 to i32 + %add = add i32 %conv, %res.03 + %sub = add nsw i32 %len.addr.04, -2 + %cmp = icmp eq i32 %sub, 0 + br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body, !llvm.loop !0 + +for.cond.for.end_crit_edge: ; preds = %for.body + %phitmp = trunc i32 %add to i16 + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + %res.0.lcssa = phi i16 [ %phitmp, %for.cond.for.end_crit_edge ], [ 0, %entry ] + ret i16 %res.0.lcssa +} + +!0 = distinct !{!0, !1} +!1 = !{!"llvm.loop.unroll.runtime.disable"} + ; CHECK: !0 = distinct !{!0, !1} ; CHECK: !1 = !{!"llvm.loop.unroll.disable"} diff --git a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll index 248d6dc..c400c76 100644 --- a/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll +++ b/llvm/test/Transforms/LoopVectorize/X86/already-vectorized.ll @@ -42,5 +42,6 @@ for.end: ; preds = %for.body ; CHECK: [[vect]] = distinct !{[[vect]], [[width:![0-9]+]], [[unroll:![0-9]+]]} ; CHECK: [[width]] = !{!"llvm.loop.vectorize.width", i32 1} ; CHECK: [[unroll]] = !{!"llvm.loop.interleave.count", i32 1} -; CHECK: [[scalar]] = distinct !{[[scalar]], [[width]], [[unroll]]} +; CHECK: [[scalar]] = distinct !{[[scalar]], [[runtime_unroll:![0-9]+]], [[width]], [[unroll]]} +; CHECK: [[runtime_unroll]] = !{!"llvm.loop.unroll.runtime.disable"} diff --git a/llvm/test/Transforms/LoopVectorize/vectorize-once.ll b/llvm/test/Transforms/LoopVectorize/vectorize-once.ll index 5d0e96b..9a91250 100644 --- a/llvm/test/Transforms/LoopVectorize/vectorize-once.ll +++ b/llvm/test/Transforms/LoopVectorize/vectorize-once.ll @@ -71,7 +71,8 @@ attributes #0 = { nounwind readonly ssp uwtable "fp-contract-model"="standard" " ; CHECK: !0 = distinct !{!0, !1, !2} ; CHECK: !1 = !{!"llvm.loop.vectorize.width", i32 1} ; CHECK: !2 = !{!"llvm.loop.interleave.count", i32 1} -; CHECK: !3 = distinct !{!3, !1, !2} +; CHECK: !3 = distinct !{!3, !4, !1, !2} +; CHECK: !4 = !{!"llvm.loop.unroll.runtime.disable"} !0 = !{!0, !1} !1 = !{!"llvm.loop.vectorize.width", i32 1} -- 2.7.4