--- /dev/null
+//===- HardwareLoops.h ------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+/// \file
+///
+/// Defines an IR pass for the creation of hardware loops.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_CODEGEN_HARDWARELOOPS_H
+#define LLVM_CODEGEN_HARDWARELOOPS_H
+
+#include "llvm/IR/PassManager.h"
+
+namespace llvm {
+
+struct HardwareLoopOptions {
+ std::optional<unsigned> Decrement;
+ std::optional<unsigned> Bitwidth;
+ std::optional<bool> Force;
+ std::optional<bool> ForcePhi;
+ std::optional<bool> ForceNested;
+ std::optional<bool> ForceGuard;
+
+ HardwareLoopOptions &setDecrement(unsigned Count) {
+ Decrement = Count;
+ return *this;
+ }
+ HardwareLoopOptions &setCounterBitwidth(unsigned Width) {
+ Bitwidth = Width;
+ return *this;
+ }
+ HardwareLoopOptions &setForce(bool Force) {
+ this->Force = Force;
+ return *this;
+ }
+ HardwareLoopOptions &setForcePhi(bool Force) {
+ ForcePhi = Force;
+ return *this;
+ }
+ HardwareLoopOptions &setForceNested(bool Force) {
+ ForceNested = Force;
+ return *this;
+ }
+ HardwareLoopOptions &setForceGuard(bool Force) {
+ ForceGuard = Force;
+ return *this;
+ }
+ bool getForcePhi() const {
+ return ForcePhi.has_value() && ForcePhi.value();
+ }
+ bool getForceNested() const {
+ return ForceNested.has_value() && ForceNested.value();
+ }
+ bool getForceGuard() const {
+ return ForceGuard.has_value() && ForceGuard.value();
+ }
+};
+
+class HardwareLoopsPass : public PassInfoMixin<HardwareLoopsPass> {
+ HardwareLoopOptions Opts;
+
+public:
+ explicit HardwareLoopsPass(HardwareLoopOptions Opts = {})
+ : Opts(Opts) { }
+
+ PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
+};
+
+} // end namespace llvm
+
+#endif // LLVM_CODEGEN_HARDWARELOOPS_H
FunctionPass *createEHContGuardCatchretPass();
/// Create Hardware Loop pass. \see HardwareLoops.cpp
- FunctionPass *createHardwareLoopsPass();
+ FunctionPass *createHardwareLoopsLegacyPass();
/// This pass inserts pseudo probe annotation for callsite profiling.
FunctionPass *createPseudoProbeInserter();
void initializeGlobalSplitPass(PassRegistry&);
void initializeGlobalsAAWrapperPassPass(PassRegistry&);
void initializeGuardWideningLegacyPassPass(PassRegistry&);
-void initializeHardwareLoopsPass(PassRegistry&);
+void initializeHardwareLoopsLegacyPass(PassRegistry&);
void initializeMIRProfileLoaderPassPass(PassRegistry &);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRCELegacyPassPass(PassRegistry&);
(void) llvm::createFloat2IntPass();
(void) llvm::createEliminateAvailableExternallyPass();
(void)llvm::createScalarizeMaskedMemIntrinLegacyPass();
- (void) llvm::createHardwareLoopsPass();
+ (void) llvm::createHardwareLoopsLegacyPass();
(void) llvm::createInjectTLIMappingsLegacyPass();
(void) llvm::createUnifyLoopExitsPass();
(void) llvm::createFixIrreduciblePass();
initializeFuncletLayoutPass(Registry);
initializeGCMachineCodeAnalysisPass(Registry);
initializeGCModuleInfoPass(Registry);
- initializeHardwareLoopsPass(Registry);
+ initializeHardwareLoopsLegacyPass(Registry);
initializeIfConverterPass(Registry);
initializeImplicitNullChecksPass(Registry);
initializeIndirectBrExpandPassPass(Registry);
///
//===----------------------------------------------------------------------===//
+#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/OptimizationRemarkEmitter.h"
#include "llvm/Analysis/ScalarEvolution.h"
using TTI = TargetTransformInfo;
- class HardwareLoops : public FunctionPass {
+ class HardwareLoopsLegacy : public FunctionPass {
public:
static char ID;
- HardwareLoops() : FunctionPass(ID) {
- initializeHardwareLoopsPass(*PassRegistry::getPassRegistry());
+ HardwareLoopsLegacy() : FunctionPass(ID) {
+ initializeHardwareLoopsLegacyPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override;
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
AU.addRequired<ScalarEvolutionWrapperPass>();
+ AU.addPreserved<ScalarEvolutionWrapperPass>();
AU.addRequired<AssumptionCacheTracker>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
+ AU.addPreserved<BranchProbabilityInfoWrapperPass>();
}
+ };
+
+ class HardwareLoopsImpl {
+ public:
+ HardwareLoopsImpl(ScalarEvolution &SE, LoopInfo &LI, bool PreserveLCSSA,
+ DominatorTree &DT, const DataLayout &DL,
+ const TargetTransformInfo &TTI, TargetLibraryInfo *TLI,
+ AssumptionCache &AC, OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts)
+ : SE(SE), LI(LI), PreserveLCSSA(PreserveLCSSA), DT(DT), DL(DL), TTI(TTI),
+ TLI(TLI), AC(AC), ORE(ORE), Opts(Opts) { }
+ bool run(Function &F);
+
+ private:
// Try to convert the given Loop into a hardware loop.
- bool TryConvertLoop(Loop *L);
+ bool TryConvertLoop(Loop *L, LLVMContext &Ctx);
// Given that the target believes the loop to be profitable, try to
// convert it.
bool TryConvertLoop(HardwareLoopInfo &HWLoopInfo);
- private:
- ScalarEvolution *SE = nullptr;
- LoopInfo *LI = nullptr;
- const DataLayout *DL = nullptr;
- OptimizationRemarkEmitter *ORE = nullptr;
- const TargetTransformInfo *TTI = nullptr;
- DominatorTree *DT = nullptr;
- bool PreserveLCSSA = false;
- AssumptionCache *AC = nullptr;
- TargetLibraryInfo *LibInfo = nullptr;
- Module *M = nullptr;
+ ScalarEvolution &SE;
+ LoopInfo &LI;
+ bool PreserveLCSSA;
+ DominatorTree &DT;
+ const DataLayout &DL;
+ const TargetTransformInfo &TTI;
+ TargetLibraryInfo *TLI = nullptr;
+ AssumptionCache &AC;
+ OptimizationRemarkEmitter *ORE;
+ HardwareLoopOptions &Opts;
bool MadeChange = false;
};
public:
HardwareLoop(HardwareLoopInfo &Info, ScalarEvolution &SE,
const DataLayout &DL,
- OptimizationRemarkEmitter *ORE) :
- SE(SE), DL(DL), ORE(ORE), L(Info.L), M(L->getHeader()->getModule()),
+ OptimizationRemarkEmitter *ORE,
+ HardwareLoopOptions &Opts) :
+ SE(SE), DL(DL), ORE(ORE), Opts(Opts), L(Info.L), M(L->getHeader()->getModule()),
ExitCount(Info.ExitCount),
CountType(Info.CountType),
ExitBranch(Info.ExitBranch),
ScalarEvolution &SE;
const DataLayout &DL;
OptimizationRemarkEmitter *ORE = nullptr;
+ HardwareLoopOptions &Opts;
Loop *L = nullptr;
Module *M = nullptr;
const SCEV *ExitCount = nullptr;
};
}
-char HardwareLoops::ID = 0;
+char HardwareLoopsLegacy::ID = 0;
-bool HardwareLoops::runOnFunction(Function &F) {
+bool HardwareLoopsLegacy::runOnFunction(Function &F) {
if (skipFunction(F))
return false;
LLVM_DEBUG(dbgs() << "HWLoops: Running on " << F.getName() << "\n");
- LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
- SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
- DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
- TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
- DL = &F.getParent()->getDataLayout();
- ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
+ auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
+ auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
+ auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
+ auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
+ auto &DL = F.getParent()->getDataLayout();
+ auto *ORE = &getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
- LibInfo = TLIP ? &TLIP->getTLI(F) : nullptr;
- PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
- AC = &getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
- M = F.getParent();
+ auto *TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
+ auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
+ bool PreserveLCSSA = mustPreserveAnalysisID(LCSSAID);
+
+ HardwareLoopOptions Opts;
+ if (ForceHardwareLoops.getNumOccurrences())
+ Opts.setForce(ForceHardwareLoops);
+ if (ForceHardwareLoopPHI.getNumOccurrences())
+ Opts.setForcePhi(ForceHardwareLoopPHI);
+ if (ForceNestedLoop.getNumOccurrences())
+ Opts.setForceNested(ForceNestedLoop);
+ if (ForceGuardLoopEntry.getNumOccurrences())
+ Opts.setForceGuard(ForceGuardLoopEntry);
+ if (LoopDecrement.getNumOccurrences())
+ Opts.setDecrement(LoopDecrement);
+ if (CounterBitWidth.getNumOccurrences())
+ Opts.setCounterBitwidth(CounterBitWidth);
- for (Loop *L : *LI)
- if (L->isOutermost())
- TryConvertLoop(L);
+ HardwareLoopsImpl Impl(SE, LI, PreserveLCSSA, DT, DL, TTI, TLI, AC, ORE,
+ Opts);
+ return Impl.run(F);
+}
+
+PreservedAnalyses HardwareLoopsPass::run(Function &F,
+ FunctionAnalysisManager &AM) {
+ auto &LI = AM.getResult<LoopAnalysis>(F);
+ auto &SE = AM.getResult<ScalarEvolutionAnalysis>(F);
+ auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
+ auto &TTI = AM.getResult<TargetIRAnalysis>(F);
+ auto *TLI = &AM.getResult<TargetLibraryAnalysis>(F);
+ auto &AC = AM.getResult<AssumptionAnalysis>(F);
+ auto *ORE = &AM.getResult<OptimizationRemarkEmitterAnalysis>(F);
+ auto &DL = F.getParent()->getDataLayout();
+
+ HardwareLoopsImpl Impl(SE, LI, true, DT, DL, TTI, TLI, AC, ORE, Opts);
+ bool Changed = Impl.run(F);
+ if (!Changed)
+ return PreservedAnalyses::all();
+
+ PreservedAnalyses PA;
+ PA.preserve<LoopAnalysis>();
+ PA.preserve<ScalarEvolutionAnalysis>();
+ PA.preserve<DominatorTreeAnalysis>();
+ PA.preserve<BranchProbabilityAnalysis>();
+ return PA;
+}
+bool HardwareLoopsImpl::run(Function &F) {
+ LLVMContext &Ctx = F.getParent()->getContext();
+ for (Loop *L : LI)
+ if (L->isOutermost())
+ TryConvertLoop(L, Ctx);
return MadeChange;
}
// Return true if the search should stop, which will be when an inner loop is
// converted and the parent loop doesn't support containing a hardware loop.
-bool HardwareLoops::TryConvertLoop(Loop *L) {
+bool HardwareLoopsImpl::TryConvertLoop(Loop *L, LLVMContext &Ctx) {
// Process nested loops first.
bool AnyChanged = false;
for (Loop *SL : *L)
- AnyChanged |= TryConvertLoop(SL);
+ AnyChanged |= TryConvertLoop(SL, Ctx);
if (AnyChanged) {
reportHWLoopFailure("nested hardware-loops not supported", "HWLoopNested",
ORE, L);
LLVM_DEBUG(dbgs() << "HWLoops: Loop " << L->getHeader()->getName() << "\n");
HardwareLoopInfo HWLoopInfo(L);
- if (!HWLoopInfo.canAnalyze(*LI)) {
+ if (!HWLoopInfo.canAnalyze(LI)) {
reportHWLoopFailure("cannot analyze loop, irreducible control flow",
"HWLoopCannotAnalyze", ORE, L);
return false;
}
- if (!ForceHardwareLoops &&
- !TTI->isHardwareLoopProfitable(L, *SE, *AC, LibInfo, HWLoopInfo)) {
+ if (!Opts.Force &&
+ !TTI.isHardwareLoopProfitable(L, SE, AC, TLI, HWLoopInfo)) {
reportHWLoopFailure("it's not profitable to create a hardware-loop",
"HWLoopNotProfitable", ORE, L);
return false;
}
// Allow overriding of the counter width and loop decrement value.
- if (CounterBitWidth.getNumOccurrences())
- HWLoopInfo.CountType =
- IntegerType::get(M->getContext(), CounterBitWidth);
+ if (Opts.Bitwidth.has_value()) {
+ HWLoopInfo.CountType = IntegerType::get(Ctx, Opts.Bitwidth.value());
+ }
- if (LoopDecrement.getNumOccurrences())
+ if (Opts.Decrement.has_value())
HWLoopInfo.LoopDecrement =
- ConstantInt::get(HWLoopInfo.CountType, LoopDecrement);
+ ConstantInt::get(HWLoopInfo.CountType, Opts.Decrement.value());
MadeChange |= TryConvertLoop(HWLoopInfo);
- return MadeChange && (!HWLoopInfo.IsNestingLegal && !ForceNestedLoop);
+ return MadeChange && (!HWLoopInfo.IsNestingLegal && !Opts.ForceNested);
}
-bool HardwareLoops::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
+bool HardwareLoopsImpl::TryConvertLoop(HardwareLoopInfo &HWLoopInfo) {
Loop *L = HWLoopInfo.L;
LLVM_DEBUG(dbgs() << "HWLoops: Try to convert profitable loop: " << *L);
- if (!HWLoopInfo.isHardwareLoopCandidate(*SE, *LI, *DT, ForceNestedLoop,
- ForceHardwareLoopPHI)) {
+ if (!HWLoopInfo.isHardwareLoopCandidate(SE, LI, DT, Opts.getForceNested(),
+ Opts.getForcePhi())) {
// TODO: there can be many reasons a loop is not considered a
// candidate, so we should let isHardwareLoopCandidate fill in the
// reason and then report a better message here.
// If we don't have a preheader, then insert one.
if (!Preheader)
- Preheader = InsertPreheaderForLoop(L, DT, LI, nullptr, PreserveLCSSA);
+ Preheader = InsertPreheaderForLoop(L, &DT, &LI, nullptr, PreserveLCSSA);
if (!Preheader)
return false;
- HardwareLoop HWLoop(HWLoopInfo, *SE, *DL, ORE);
+ HardwareLoop HWLoop(HWLoopInfo, SE, DL, ORE, Opts);
HWLoop.Create();
++NumHWLoops;
return true;
Value *Setup = InsertIterationSetup(LoopCountInit);
- if (UsePHICounter || ForceHardwareLoopPHI) {
+ if (UsePHICounter || Opts.ForcePhi) {
Instruction *LoopDec = InsertLoopRegDec(LoopCountInit);
Value *EltsRem = InsertPHICounter(Setup, LoopDec);
LoopDec->setOperand(0, EltsRem);
if (SE.isLoopEntryGuardedByCond(L, ICmpInst::ICMP_NE, ExitCount,
SE.getZero(ExitCount->getType()))) {
LLVM_DEBUG(dbgs() << " - Attempting to use test.set counter.\n");
- UseLoopGuard |= ForceGuardLoopEntry;
+ if (Opts.ForceGuard)
+ UseLoopGuard = true;
} else
UseLoopGuard = false;
Value* HardwareLoop::InsertIterationSetup(Value *LoopCountInit) {
IRBuilder<> Builder(BeginBB->getTerminator());
Type *Ty = LoopCountInit->getType();
- bool UsePhi = UsePHICounter || ForceHardwareLoopPHI;
+ bool UsePhi = UsePHICounter || Opts.ForcePhi;
Intrinsic::ID ID = UseLoopGuard
? (UsePhi ? Intrinsic::test_start_loop_iterations
: Intrinsic::test_set_loop_iterations)
RecursivelyDeleteTriviallyDeadInstructions(OldCond);
}
-INITIALIZE_PASS_BEGIN(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_BEGIN(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
-INITIALIZE_PASS_END(HardwareLoops, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
+INITIALIZE_PASS_END(HardwareLoopsLegacy, DEBUG_TYPE, HW_LOOPS_NAME, false, false)
-FunctionPass *llvm::createHardwareLoopsPass() { return new HardwareLoops(); }
+FunctionPass *llvm::createHardwareLoopsLegacyPass() { return new HardwareLoopsLegacy(); }
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/Analysis/TypeBasedAliasAnalysis.h"
#include "llvm/Analysis/UniformityAnalysis.h"
+#include "llvm/CodeGen/HardwareLoops.h"
#include "llvm/CodeGen/TypePromotion.h"
#include "llvm/IR/DebugInfo.h"
#include "llvm/IR/Dominators.h"
return Result;
}
+/// Parser of parameters for HardwareLoops pass.
+Expected<HardwareLoopOptions> parseHardwareLoopOptions(StringRef Params) {
+ HardwareLoopOptions HardwareLoopOpts;
+
+ while (!Params.empty()) {
+ StringRef ParamName;
+ std::tie(ParamName, Params) = Params.split(';');
+ if (ParamName.consume_front("hardware-loop-decrement=")) {
+ int Count;
+ if (ParamName.getAsInteger(0, Count))
+ return make_error<StringError>(
+ formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ HardwareLoopOpts.setDecrement(Count);
+ continue;
+ }
+ if (ParamName.consume_front("hardware-loop-counter-bitwidth=")) {
+ int Count;
+ if (ParamName.getAsInteger(0, Count))
+ return make_error<StringError>(
+ formatv("invalid HardwareLoopPass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ HardwareLoopOpts.setCounterBitwidth(Count);
+ continue;
+ }
+ if (ParamName == "force-hardware-loops") {
+ HardwareLoopOpts.setForce(true);
+ } else if (ParamName == "force-hardware-loop-phi") {
+ HardwareLoopOpts.setForcePhi(true);
+ } else if (ParamName == "force-nested-hardware-loop") {
+ HardwareLoopOpts.setForceNested(true);
+ } else if (ParamName == "force-hardware-loop-guard") {
+ HardwareLoopOpts.setForceGuard(true);
+ } else {
+ return make_error<StringError>(
+ formatv("invalid HardwarePass parameter '{0}' ", ParamName).str(),
+ inconvertibleErrorCode());
+ }
+ }
+ return HardwareLoopOpts;
+}
+
/// Parser of parameters for LoopUnroll pass.
Expected<LoopUnrollOptions> parseLoopUnrollOptions(StringRef Params) {
LoopUnrollOptions UnrollOpts;
},
parseEntryExitInstrumenterPassOptions,
"post-inline")
+FUNCTION_PASS_WITH_PARAMS("hardware-loops",
+ "HardwareLoopsPass",
+ [](HardwareLoopOptions Opts) {
+ return HardwareLoopsPass(Opts);
+ },
+ parseHardwareLoopOptions,
+ "force-hardware-loops;"
+ "force-hardware-loop-phi;"
+ "force-nested-hardware-loop;"
+ "force-hardware-loop-guard;"
+ "hardware-loop-decrement=N;"
+ "hardware-loop-counter-bitwidth=N")
FUNCTION_PASS_WITH_PARAMS("lower-matrix-intrinsics",
"LowerMatrixIntrinsicsPass",
[](bool Minimal) {
}
if (TM->getOptLevel() != CodeGenOpt::None) {
- addPass(createHardwareLoopsPass());
+ addPass(createHardwareLoopsLegacyPass());
addPass(createMVETailPredicationPass());
// FIXME: IR passes can delete address-taken basic blocks, deleting
// corresponding blockaddresses. ARMConstantPoolConstant holds references to
addPass(createPPCLoopInstrFormPrepPass(getPPCTargetMachine()));
if (!DisableCTRLoops && getOptLevel() != CodeGenOpt::None)
- addPass(createHardwareLoopsPass());
+ addPass(createHardwareLoopsLegacyPass());
return false;
}
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -hardware-loops -S -verify-loop-lcssa %s | FileCheck %s
+; RUN: opt < %s -passes=hardware-loops -verify-loop-lcssa -S | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "ppc64-unknown-linux-elf"
; CHECK-NEXT: [[C_0:%.*]] = call i1 @cond()
; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25_PREHEADER:%.*]], label [[FOR_BODY]]
; CHECK: while.cond25.preheader:
+; CHECK-NEXT: [[INDVARS_IV349_PH:%.*]] = phi i64 [ 50, [[FOR_INC]] ]
; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 51)
; CHECK-NEXT: br label [[WHILE_COND25:%.*]]
; CHECK: while.cond25:
; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[WHILE_COND25_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ]
-; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[WHILE_COND25_PREHEADER]] ]
+; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ [[INDVARS_IV349_PH]], [[WHILE_COND25_PREHEADER]] ]
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1)
; CHECK-NEXT: br i1 [[TMP0]], label [[LAND_RHS]], label [[WHILE_END187:%.*]]
; CHECK: land.rhs:
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MAIN
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8,+fullfp16 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP64
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVE
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-MVEFP
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
; DISABLED-NOT: call i32 @llvm.loop.decrement
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -o - | FileCheck %s
@g = common local_unnamed_addr global ptr null, align 4
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s
@g = common local_unnamed_addr global ptr null, align 4
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+fp-armv8 -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-FP
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+soft-float -passes=hardware-loops %s -S -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-SOFT
; CHECK-LABEL: test_fptosi
; CHECK-SOFT-NOT: call i32 @llvm.start.loop.iterations
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | FileCheck %s
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | FileCheck %s
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops -disable-arm-loloops=true %s -S -o - | FileCheck %s --check-prefix=DISABLED
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi %s -o - | FileCheck %s --check-prefix=CHECK-LLC
; DISABLED-NOT: llvm.{{.*}}.loop.iterations
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops %s -S -o - | \
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops %s -S -o - | \
; RUN: FileCheck %s
; RUN: opt -mtriple=thumbv8.1m.main -passes=loop-unroll -unroll-remainder=false -S < %s | \
; RUN: llc -mtriple=thumbv8.1m.main | FileCheck %s --check-prefix=CHECK-UNROLL
-; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -hardware-loops \
+; RUN: opt -mtriple=thumbv8.1m.main-none-none-eabi -passes=hardware-loops \
; RUN: -pass-remarks-analysis=hardware-loops %s -S -o - 2>&1 | \
; RUN: FileCheck %s --check-prefix=CHECK-REMARKS
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
-; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: nested hardware-loops not supported
+; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: loop is not a candidate
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
; CHECK-REMARKS: remark: <unknown>:0:0: hardware-loop not created: it's not profitable to create a hardware-loop
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=false -S %s -o - | FileCheck %s --check-prefix=NO-GUARD
+; RUN: opt -passes='hardware-loops<force-hardware-loops;force-hardware-loop-guard;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-EXIT
+; RUN: opt -passes='hardware-loops<force-hardware-loops;force-hardware-loop-guard;force-hardware-loop-phi;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=NO-GUARD
; NO-GUARD-NOT: @llvm.test.set.loop.iterations
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK-DEC
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi>' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHI
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-nested-hardware-loop>' -S %s -o - | FileCheck %s --check-prefix=CHECK-NESTED
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-guard>' -S %s -o - | FileCheck %s --check-prefix=CHECK-GUARD
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi;force-hardware-loop-guard>' -S %s -o - | FileCheck %s --check-prefix=CHECK-PHIGUARD
define void @while_lt(i32 %i, i32 %N, ptr nocapture %A) {
; CHECK-DEC-LABEL: @while_lt(
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S | FileCheck %s
+; RUN: opt < %s -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S | FileCheck %s
define arm_aapcs_vfpcc void @test(ptr noalias nocapture readonly %off, ptr noalias nocapture %data, ptr noalias nocapture %dst, i32 %n) {
; CHECK-LABEL: @test(
; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i32(i32 1)
; CHECK-NEXT: br i1 [[TMP0]], label [[FOR_BODY4_US]], label [[FOR_BODY15_US_PREHEADER:%.*]]
; CHECK: for.body15.us.preheader:
+; CHECK-NEXT: [[J10_055_US_PH:%.*]] = phi i32 [ 0, [[FOR_BODY4_US]] ]
; CHECK-NEXT: call void @llvm.set.loop.iterations.i32(i32 [[N]])
; CHECK-NEXT: br label [[FOR_BODY15_US:%.*]]
; CHECK: for.body15.us:
-; CHECK-NEXT: [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ 0, [[FOR_BODY15_US_PREHEADER]] ]
+; CHECK-NEXT: [[J10_055_US:%.*]] = phi i32 [ [[INC26_US:%.*]], [[FOR_BODY15_US]] ], [ [[J10_055_US_PH]], [[FOR_BODY15_US_PREHEADER]] ]
; CHECK-NEXT: [[ARRAYIDX16_US:%.*]] = getelementptr inbounds i16, ptr [[OFF]], i32 [[J10_055_US]]
; CHECK-NEXT: [[L0:%.*]] = load i16, ptr [[ARRAYIDX16_US]], align 2
; CHECK-NEXT: [[ARRAYIDX18_US:%.*]] = getelementptr inbounds i16, ptr [[DATA]], i32 [[J10_055_US]]
-; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
-; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -hardware-loops -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
-; RUN: opt -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -hardware-loops -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32>' -force-hardware-loop-guard=true -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-ALLOW
+; RUN: opt -passes='hardware-loops<force-hardware-loops;hardware-loop-decrement=1;hardware-loop-counter-bitwidth=32;force-hardware-loop-phi>' -S %s -o - | FileCheck %s --check-prefix=CHECK --check-prefix=CHECK-LATCH
; CHECK-LABEL: not_rotated
; CHECK-LATCH-NOT: call void @llvm.set.loop.iterations
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s
-; RUN: opt -hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s
+; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -S %s -o - | FileCheck %s
+; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-hardware-loop-phi=true -S %s -o - | FileCheck %s
+; RUN: opt -passes=hardware-loops -force-hardware-loops=true -hardware-loop-decrement=1 -hardware-loop-counter-bitwidth=32 -force-nested-hardware-loop=true -S %s -o - | FileCheck %s
; CHECK-LABEL: float_counter
; CHECK-NOT: set.loop.iterations
initializeScalarizeMaskedMemIntrinLegacyPassPass(*Registry);
initializeExpandReductionsPass(*Registry);
initializeExpandVectorPredicationPass(*Registry);
- initializeHardwareLoopsPass(*Registry);
+ initializeHardwareLoopsLegacyPass(*Registry);
initializeTransformUtils(*Registry);
initializeReplaceWithVeclibLegacyPass(*Registry);
initializeTLSVariableHoistLegacyPassPass(*Registry);
"verify-safepoint-ir",
"atomic-expand",
"expandvp",
- "hardware-loops",
"mve-tail-predication",
"interleaved-access",
"global-merge",
initializeExpandVectorPredicationPass(Registry);
initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry);
- initializeHardwareLoopsPass(Registry);
initializeReplaceWithVeclibLegacyPass(Registry);
initializeJMCInstrumenterPass(Registry);
initializeExpandVectorPredicationPass(Registry);
initializeWasmEHPreparePass(Registry);
initializeWriteBitcodePassPass(Registry);
- initializeHardwareLoopsPass(Registry);
+ initializeHardwareLoopsLegacyPass(Registry);
initializeTypePromotionLegacyPass(Registry);
initializeReplaceWithVeclibLegacyPass(Registry);
initializeJMCInstrumenterPass(Registry);