#ifndef LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
#define LLVM_TRANSFORMS_SCALAR_LOOPUNROLLPASS_H
+#include "llvm/ADT/Optional.h"
#include "llvm/Analysis/LoopAnalysisManager.h"
#include "llvm/IR/PassManager.h"
LoopStandardAnalysisResults &AR, LPMUpdater &U);
};
+/// A set of parameters used to control various transforms performed by the
+/// LoopUnroll pass. Each of the boolean parameters can be set to:
+/// true - enabling the transformation.
+/// false - disabling the transformation.
+/// None - relying on a global default.
+///
+/// There is also OptLevel parameter, which is used for additional loop unroll
+/// tuning.
+///
+/// Intended use is to create a default object, modify parameters with
+/// additional setters and then pass it to LoopUnrollPass.
+///
+struct LoopUnrollOptions {
+ Optional<bool> AllowPartial;
+ Optional<bool> AllowPeeling;
+ Optional<bool> AllowRuntime;
+ Optional<bool> AllowUpperBound;
+ int OptLevel;
+
+ LoopUnrollOptions(int OptLevel = 2) : OptLevel(OptLevel) {}
+
+ /// Enables or disables partial unrolling. When disabled only full unrolling
+ /// is allowed.
+ LoopUnrollOptions &setPartial(bool Partial) {
+ AllowPartial = Partial;
+ return *this;
+ }
+
+ /// Enables or disables unrolling of loops with runtime trip count.
+ LoopUnrollOptions &setRuntime(bool Runtime) {
+ AllowRuntime = Runtime;
+ return *this;
+ }
+
+ /// Enables or disables loop peeling.
+ LoopUnrollOptions &setPeeling(bool Peeling) {
+ AllowPeeling = Peeling;
+ return *this;
+ }
+
+ /// Enables or disables the use of trip count upper bound
+ /// in loop unrolling.
+ LoopUnrollOptions &setUpperBound(bool UpperBound) {
+ AllowUpperBound = UpperBound;
+ return *this;
+ }
+
+ // Sets "optimization level" tuning parameter for loop unrolling.
+ LoopUnrollOptions &setOptLevel(int O) {
+ OptLevel = O;
+ return *this;
+ }
+};
+
/// Loop unroll pass that will support both full and partial unrolling.
/// It is a function pass to have access to function and module analyses.
/// It will also put loops into canonical form (simplified and LCSSA).
class LoopUnrollPass : public PassInfoMixin<LoopUnrollPass> {
- const int OptLevel;
+ LoopUnrollOptions UnrollOpts;
public:
/// This uses the target information (or flags) to control the thresholds for
/// different unrolling stategies but supports all of them.
- explicit LoopUnrollPass(int OptLevel = 2) : OptLevel(OptLevel) {}
+ explicit LoopUnrollPass(LoopUnrollOptions UnrollOpts = {})
+ : UnrollOpts(UnrollOpts) {}
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
};
OptimizePM.addPass(
createFunctionToLoopPassAdaptor(LoopUnrollAndJamPass(Level)));
}
- OptimizePM.addPass(LoopUnrollPass(Level));
+ OptimizePM.addPass(LoopUnrollPass(LoopUnrollOptions(Level)));
OptimizePM.addPass(InstCombinePass());
OptimizePM.addPass(RequireAnalysisPass<OptimizationRemarkEmitterAnalysis, Function>());
OptimizePM.addPass(createFunctionToLoopPassAdaptor(LICMPass(), DebugLogging));
FUNCTION_PASS("tailcallelim", TailCallElimPass())
FUNCTION_PASS("unreachableblockelim", UnreachableBlockElimPass())
FUNCTION_PASS("unroll", LoopUnrollPass())
+FUNCTION_PASS("unroll<peeling;no-runtime>",LoopUnrollPass(LoopUnrollOptions().setPeeling(true).setRuntime(false)))
FUNCTION_PASS("verify", VerifierPass())
FUNCTION_PASS("verify<domtree>", DominatorTreeVerifierPass())
FUNCTION_PASS("verify<loops>", LoopVerifierPass())
Loop *ParentL = L.getParentLoop();
#endif
- // The API here is quite complex to call, but there are only two interesting
- // states we support: partial and full (or "simple") unrolling. However, to
- // enable these things we actually pass "None" in for the optional to avoid
- // providing an explicit choice.
- Optional<bool> AllowPartialParam, RuntimeParam, UpperBoundParam,
- AllowPeeling;
// Check if the profile summary indicates that the profiled application
// has a huge working set size, in which case we disable peeling to avoid
// bloating it further.
+ Optional<bool> LocalAllowPeeling = UnrollOpts.AllowPeeling;
if (PSI && PSI->hasHugeWorkingSetSize())
- AllowPeeling = false;
+ LocalAllowPeeling = false;
std::string LoopName = L.getName();
- LoopUnrollResult Result =
- tryToUnrollLoop(&L, DT, &LI, SE, TTI, AC, ORE,
- /*PreserveLCSSA*/ true, OptLevel, /*Count*/ None,
- /*Threshold*/ None, AllowPartialParam, RuntimeParam,
- UpperBoundParam, AllowPeeling);
+ // The API here is quite complex to call and we allow to select some
+ // flavors of unrolling during construction time (by setting UnrollOpts).
+ LoopUnrollResult Result = tryToUnrollLoop(
+ &L, DT, &LI, SE, TTI, AC, ORE,
+ /*PreserveLCSSA*/ true, UnrollOpts.OptLevel, /*Count*/ None,
+ /*Threshold*/ None, UnrollOpts.AllowPartial, UnrollOpts.AllowRuntime,
+ UnrollOpts.AllowUpperBound, LocalAllowPeeling);
Changed |= Result != LoopUnrollResult::Unmodified;
// The parent must not be damaged by unrolling!
; RUN: opt < %s -S -loop-unroll -unroll-force-peel-count=3 -verify-dom-info -simplifycfg -instcombine | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>,simplify-cfg,instcombine' -unroll-force-peel-count=3 -verify-dom-info | FileCheck %s
; Basic loop peeling - check that we can peel-off the first 3 loop iterations
; when explicitly requested.
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
-
+;
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=EPILOG,COMMON
; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=PROLOG,COMMON
+;
+; Restricted versions of unroll (unroll<peeling;noruntime>, unroll-full) should not be doing runtime unrolling
+; even if it is globally enabled through -unroll-runtime option
+;
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,unroll<peeling;no-runtime>' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=true | FileCheck %s -check-prefixes=NOEPILOG,COMMON
+; RUN: opt < %s -S -passes='require<opt-remark-emit>,loop(unroll-full)' -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefixes=NOPROLOG,COMMON
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+; NOEPILOG-NOT: %xtraiter = and i32 %n
+
; PROLOG: %xtraiter = and i32 %n
; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+; NOPROLOG-NOT: %xtraiter = and i32 %n
+
; EPILOG: for.body.epil:
; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1
; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
; PROLOG: for.body.prol:
; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1
; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit.unr-lcssa, !llvm.loop !0
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
; COMMON-LABEL: @foo(
; EPILOG: bb72.2:
; PROLOG: bb72.2:
+; NOEPILOG-NOT: bb72.2:
+; NOPROLOG-NOT: bb72.2:
define void @foo(i32 %trips) {
entry:
; EPILOG: for.body.epil:
; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
; PROLOG: for.body.prol:
; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0
; EPILOG: for.body:
; EPILOG-NOT: for.body.epil:
+; NOEPILOG: for.body:
+; NOEPILOG-NOT: for.body.epil:
+
; PROLOG: for.body:
; PROLOG-NOT: for.body.prol:
+; NOPROLOG: for.body:
+; NOPROLOG-NOT: for.body.prol:
+
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
%cmp2 = icmp eq i32 %len, 0