SmallVector<MachineOperand, 4> BrCond;
MachineInstr *LoopInductionVar = nullptr;
MachineInstr *LoopCompare = nullptr;
- std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo> LoopPipelinerInfo =
- nullptr;
};
LoopInfo LI;
LiveIntervals &LIS;
const RegisterClassInfo &RegClassInfo;
unsigned II_setByPragma = 0;
- TargetInstrInfo::PipelinerLoopInfo *LoopPipelinerInfo = nullptr;
/// A toplogical ordering of the SUnits, which is needed for changing
/// dependences and iterating over the SUnits.
public:
SwingSchedulerDAG(MachinePipeliner &P, MachineLoop &L, LiveIntervals &lis,
- const RegisterClassInfo &rci, unsigned II,
- TargetInstrInfo::PipelinerLoopInfo *PLI)
+ const RegisterClassInfo &rci, unsigned II)
: ScheduleDAGInstrs(*P.MF, P.MLI, false), Pass(P), Loop(L), LIS(lis),
- RegClassInfo(rci), II_setByPragma(II), LoopPipelinerInfo(PLI),
- Topo(SUnits, &ExitSU) {
+ RegClassInfo(rci), II_setByPragma(II), Topo(SUnits, &ExitSU) {
P.MF->getSubtarget().getSMSMutations(Mutations);
if (SwpEnableCopyToPhi)
Mutations.push_back(std::make_unique<CopyToPhiMutation>());
return ScheduledInstrs[cycle];
}
- SmallSet<SUnit *, 8>
- computeUnpipelineableNodes(SwingSchedulerDAG *SSD,
- TargetInstrInfo::PipelinerLoopInfo *PLI);
-
- bool
- normalizeNonPipelinedInstructions(SwingSchedulerDAG *SSD,
- TargetInstrInfo::PipelinerLoopInfo *PLI);
bool isValidSchedule(SwingSchedulerDAG *SSD);
void finalizeSchedule(SwingSchedulerDAG *SSD);
void orderDependence(SwingSchedulerDAG *SSD, SUnit *SU,
void generateProlog(unsigned LastStage, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, MBBVectorTy &PrologBBs);
void generateEpilog(unsigned LastStage, MachineBasicBlock *KernelBB,
- MachineBasicBlock *OrigBB, ValueMapTy *VRMap,
- MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs);
+ ValueMapTy *VRMap, MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs);
void generateExistingPhis(MachineBasicBlock *NewBB, MachineBasicBlock *BB1,
MachineBasicBlock *BB2, MachineBasicBlock *KernelBB,
ValueMapTy *VRMap, InstrMapTy &InstrMap,
<< "Failed to pipeline loop";
});
- LI.LoopPipelinerInfo.reset();
return Changed;
}
Changed = swingModuloScheduler(L);
- LI.LoopPipelinerInfo.reset();
return Changed;
}
LI.LoopInductionVar = nullptr;
LI.LoopCompare = nullptr;
- LI.LoopPipelinerInfo = TII->analyzeLoopForPipelining(L.getTopBlock());
- if (!LI.LoopPipelinerInfo) {
+ if (!TII->analyzeLoopForPipelining(L.getTopBlock())) {
LLVM_DEBUG(dbgs() << "Unable to analyzeLoop, can NOT pipeline Loop\n");
NumFailLoop++;
ORE->emit([&]() {
assert(L.getBlocks().size() == 1 && "SMS works on single blocks only.");
SwingSchedulerDAG SMS(*this, L, getAnalysis<LiveIntervals>(), RegClassInfo,
- II_setByPragma, LI.LoopPipelinerInfo.get());
+ II_setByPragma);
MachineBasicBlock *MBB = L.getHeader();
// The kernel should not include any terminator instructions. These
/// We ignore the back-edge recurrence in order to avoid unbounded recursion
/// in the calculation of the ASAP, ALAP, etc functions.
static bool ignoreDependence(const SDep &D, bool isPred) {
- if (D.isArtificial() || D.getSUnit()->isBoundaryNode())
+ if (D.isArtificial())
return true;
return D.getKind() == SDep::Anti && isPred;
}
SUnit *SU = &SUnits[I];
for (const SDep &S : SU->Succs) {
SUnit *succ = S.getSUnit();
- if (succ->isBoundaryNode())
- continue;
if (S.getLatency() == 0)
zeroLatencyHeight =
std::max(zeroLatencyHeight, getZeroLatencyHeight(succ) + 1);
NodesAdded.insert(SU);
for (auto &SI : SU->Succs) {
SUnit *Successor = SI.getSUnit();
- if (!SI.isArtificial() && !Successor->isBoundaryNode() &&
- NodesAdded.count(Successor) == 0)
+ if (!SI.isArtificial() && NodesAdded.count(Successor) == 0)
addConnectedNodes(Successor, NewSet, NodesAdded);
}
for (auto &PI : SU->Preds) {
});
} while (++NI != NE && scheduleFound);
- // If a schedule is found, ensure non-pipelined instructions are in stage 0
- if (scheduleFound)
- scheduleFound =
- Schedule.normalizeNonPipelinedInstructions(this, LoopPipelinerInfo);
-
// If a schedule is found, check if it is a valid schedule too.
if (scheduleFound)
scheduleFound = Schedule.isValidSchedule(this);
bool SwingSchedulerDAG::isLoopCarriedDep(SUnit *Source, const SDep &Dep,
bool isSucc) {
if ((Dep.getKind() != SDep::Order && Dep.getKind() != SDep::Output) ||
- Dep.isArtificial() || Dep.getSUnit()->isBoundaryNode())
+ Dep.isArtificial())
return false;
if (!SwpPruneLoopCarried)
while (!Worklist.empty()) {
const SDep &Cur = Worklist.pop_back_val();
SUnit *SuccSU = Cur.getSUnit();
- if (Visited.count(SuccSU) || SuccSU->isBoundaryNode())
+ if (Visited.count(SuccSU))
continue;
std::map<SUnit *, int>::const_iterator it = InstrToCycle.find(SuccSU);
if (it == InstrToCycle.end())
return false;
}
-/// Determine transitive dependences of unpipelineable instructions
-SmallSet<SUnit *, 8> SMSchedule::computeUnpipelineableNodes(
- SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
- SmallSet<SUnit *, 8> DoNotPipeline;
- SmallVector<SUnit *, 8> Worklist;
-
- for (auto &SU : SSD->SUnits)
- if (SU.isInstr() && PLI->shouldIgnoreForPipelining(SU.getInstr()))
- Worklist.push_back(&SU);
-
- while (!Worklist.empty()) {
- auto SU = Worklist.pop_back_val();
- if (DoNotPipeline.count(SU))
- continue;
- LLVM_DEBUG(dbgs() << "Do not pipeline SU(" << SU->NodeNum << ")\n");
- DoNotPipeline.insert(SU);
- for (auto &Dep : SU->Preds)
- Worklist.push_back(Dep.getSUnit());
- if (SU->getInstr()->isPHI())
- for (auto &Dep : SU->Succs)
- if (Dep.getKind() == SDep::Anti)
- Worklist.push_back(Dep.getSUnit());
- }
- return DoNotPipeline;
-}
-
-// Determine all instructions upon which any unpipelineable instruction depends
-// and ensure that they are in stage 0. If unable to do so, return false.
-bool SMSchedule::normalizeNonPipelinedInstructions(
- SwingSchedulerDAG *SSD, TargetInstrInfo::PipelinerLoopInfo *PLI) {
- SmallSet<SUnit *, 8> DNP = computeUnpipelineableNodes(SSD, PLI);
-
- int NewLastCycle = INT_MIN;
- for (SUnit &SU : SSD->SUnits) {
- if (!SU.isInstr())
- continue;
- if (!DNP.contains(&SU) || stageScheduled(&SU) == 0) {
- NewLastCycle = std::max(NewLastCycle, InstrToCycle[&SU]);
- continue;
- }
-
- // Put the non-pipelined instruction as early as possible in the schedule
- int NewCycle = getFirstCycle();
- for (auto &Dep : SU.Preds)
- NewCycle = std::max(InstrToCycle[Dep.getSUnit()], NewCycle);
-
- int OldCycle = InstrToCycle[&SU];
- if (OldCycle != NewCycle) {
- InstrToCycle[&SU] = NewCycle;
- auto &OldS = getInstructions(OldCycle);
- OldS.erase(std::remove(OldS.begin(), OldS.end(), &SU), OldS.end());
- getInstructions(NewCycle).emplace_back(&SU);
- LLVM_DEBUG(dbgs() << "SU(" << SU.NodeNum
- << ") is not pipelined; moving from cycle " << OldCycle
- << " to " << NewCycle << " Instr:" << *SU.getInstr());
- }
- NewLastCycle = std::max(NewLastCycle, NewCycle);
- }
- LastCycle = NewLastCycle;
- return true;
-}
-
// Check if the generated schedule is valid. This function checks if
// an instruction that uses a physical register is scheduled in a
// different stage than the definition. The pipeliner does not handle
// physical register values that may cross a basic block boundary.
-// Furthermore, if a physical def/use pair is assigned to the same
-// cycle, orderDependence does not guarantee def/use ordering, so that
-// case should be considered invalid. (The test checks for both
-// earlier and same-cycle use to be more robust.)
bool SMSchedule::isValidSchedule(SwingSchedulerDAG *SSD) {
for (SUnit &SU : SSD->SUnits) {
if (!SU.hasPhysRegDefs)
continue;
int StageDef = stageScheduled(&SU);
- int CycleDef = InstrToCycle[&SU];
assert(StageDef != -1 && "Instruction should have been scheduled.");
for (auto &SI : SU.Succs)
- if (SI.isAssignedRegDep() && !SI.getSUnit()->isBoundaryNode())
- if (Register::isPhysicalRegister(SI.getReg())) {
+ if (SI.isAssignedRegDep())
+ if (Register::isPhysicalRegister(SI.getReg()))
if (stageScheduled(SI.getSUnit()) != StageDef)
return false;
- if (InstrToCycle[SI.getSUnit()] <= CycleDef)
- return false;
- }
}
return true;
}
SmallVector<MachineBasicBlock *, 4> EpilogBBs;
// Generate the epilog instructions to complete the pipeline.
- generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
+ generateEpilog(MaxStageCount, KernelBB, VRMap, EpilogBBs, PrologBBs);
// We need this step because the register allocation doesn't handle some
// situations well, so we insert copies to help out.
/// Generate the pipeline epilog code. The epilog code finishes the iterations
/// that were started in either the prolog or the kernel. We create a basic
/// block for each stage that needs to complete.
-void ModuloScheduleExpander::generateEpilog(
- unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
- ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
+void ModuloScheduleExpander::generateEpilog(unsigned LastStage,
+ MachineBasicBlock *KernelBB,
+ ValueMapTy *VRMap,
+ MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs) {
// We need to change the branch from the kernel to the first epilog block, so
// this call to analyze branch uses the kernel rather than the original BB.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
// Create a branch to the new epilog from the kernel.
// Remove the original branch and add a new branch to the epilog.
TII->removeBranch(*KernelBB);
- assert((OrigBB == TBB || OrigBB == FBB) &&
- "Unable to determine looping branch direction");
- if (OrigBB != TBB)
- TII->insertBranch(*KernelBB, EpilogStart, KernelBB, Cond, DebugLoc());
- else
- TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
+ TII->insertBranch(*KernelBB, KernelBB, EpilogStart, Cond, DebugLoc());
// Add a branch to the loop exit.
if (EpilogBBs.size() > 0) {
MachineBasicBlock *LastEpilogBB = EpilogBBs.back();
def FeatureUseMISched: SubtargetFeature<"use-misched", "UseMISched", "true",
"Use the MachineScheduler">;
-// Use the MachinePipeliner for instruction scheduling for the subtarget.
-def FeatureUseMIPipeliner: SubtargetFeature<"use-mipipeliner", "UseMIPipeliner", "true",
- "Use the MachinePipeliner">;
-
// False if scheduling should happen again after register allocation.
def FeatureNoPostRASched : SubtargetFeature<"disable-postra-scheduler",
"DisablePostRAScheduler", "true",
def : ProcessorModel<"cortex-m7", CortexM7Model, [ARMv7em,
ProcM7,
FeatureFPARMv8_D16,
- FeatureUseMIPipeliner,
FeatureUseMISched]>;
def : ProcNoItin<"cortex-m23", [ARMv8mBaseline,
return (MF.getSubtarget<ARMSubtarget>().hardenSlsBlr()) ? ARM::BLX_pred_noip
: ARM::BLX_pred;
}
-
-namespace {
-class ARMPipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
- MachineInstr *Loop, *EndLoop, *LoopCount;
- MachineFunction *MF;
- const TargetInstrInfo *TII;
-
- // Meanings of the various stuff with loop types:
- // t2Bcc:
- // Loop = null -- there is no setup.
- // EndLoop = branch at end of original BB that will become a kernel
- // LoopCount = CC setter live into branch
-public:
- ARMPipelinerLoopInfo(MachineInstr *Loop, MachineInstr *EndLoop,
- MachineInstr *LoopCount)
- : Loop(Loop), EndLoop(EndLoop), LoopCount(LoopCount),
- MF(EndLoop->getParent()->getParent()),
- TII(MF->getSubtarget().getInstrInfo()) {}
-
- bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
- // Only ignore the terminator.
- return MI == EndLoop || MI == LoopCount;
- }
-
- Optional<bool> createTripCountGreaterCondition(
- int TC, MachineBasicBlock &MBB,
- SmallVectorImpl<MachineOperand> &Cond) override {
-
- if (isCondBranchOpcode(EndLoop->getOpcode())) {
- Cond.push_back(EndLoop->getOperand(1));
- Cond.push_back(EndLoop->getOperand(2));
- if (EndLoop->getOperand(0).getMBB() == EndLoop->getParent()) {
- TII->reverseBranchCondition(Cond);
- }
- return {};
- } else
- llvm_unreachable("Unknown EndLoop");
- }
-
- void setPreheader(MachineBasicBlock *NewPreheader) override {}
-
- void adjustTripCount(int TripCountAdjust) override {}
-
- void disposed() override {}
-};
-} // namespace
-
-std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
-ARMBaseInstrInfo::analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const {
- MachineBasicBlock::iterator I = LoopBB->getFirstTerminator();
- MachineBasicBlock *Preheader = *LoopBB->pred_begin();
- if (Preheader == LoopBB)
- Preheader = *std::next(LoopBB->pred_begin());
-
- if (I != LoopBB->end() && I->getOpcode() == ARM::t2Bcc) {
- // If the branch is a Bcc, then the CPSR should be set somewhere within the
- // block. We need to determine the reaching definition of CPSR so that
- // it can be marked as non-pipelineable, allowing the pipeliner to force
- // it into stage 0 or give up if it cannot or will not do so.
- MachineInstr *CCSetter = nullptr;
- for (auto &L : LoopBB->instrs()) {
- if (L.isCall())
- return nullptr;
- if (isCPSRDefined(L))
- CCSetter = &L;
- }
- if (CCSetter)
- return std::make_unique<ARMPipelinerLoopInfo>(nullptr, &*I, CCSetter);
- else
- return nullptr; // Unable to find the CC setter, so unable to guarantee
- // that pipeline will work
- }
-
- return nullptr;
-}
MI->getOpcode() == ARM::t2WhileLoopStartTP;
}
- /// Analyze loop L, which must be a single-basic-block loop, and if the
- /// conditions can be understood enough produce a PipelinerLoopInfo object.
- std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
- analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override;
-
private:
/// Returns an unused general-purpose register which can be used for
/// constructing an outlined call if one exists. Returns 0 otherwise.
return hasMVEIntegerOps();
}
-bool ARMSubtarget::enableMachinePipeliner() const {
- // Enable the MachinePipeliner before register allocation for subtargets
- // with the use-mipipeliner feature.
- return getSchedModel().hasInstrSchedModel() && useMachinePipeliner();
-}
-
-bool ARMSubtarget::useDFAforSMS() const { return false; }
-
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostRAScheduler() const {
if (enableMachineScheduler())
bool isRWPI() const;
bool useMachineScheduler() const { return UseMISched; }
- bool useMachinePipeliner() const { return UseMIPipeliner; }
bool hasMinSize() const { return OptMinSize; }
bool isThumb1Only() const { return isThumb() && !hasThumb2(); }
bool isThumb2() const { return isThumb() && hasThumb2(); }
/// Returns true if machine scheduler should be enabled.
bool enableMachineScheduler() const override;
- /// Returns true if machine pipeliner should be enabled.
- bool enableMachinePipeliner() const override;
- bool useDFAforSMS() const override;
-
/// True for some subtargets at > -O0.
bool enablePostRAScheduler() const override;
void ARMPassConfig::addPreRegAlloc() {
if (getOptLevel() != CodeGenOpt::None) {
- if (getOptLevel() == CodeGenOpt::Aggressive)
- addPass(&MachinePipelinerID);
-
addPass(createMVETPAndVPTOptimisationsPass());
addPass(createMLxExpansionPass());
; CHECK-NEXT: Peephole Optimizations
; CHECK-NEXT: Remove dead machine instructions
; CHECK-NEXT: MachineDominator Tree Construction
-; CHECK-NEXT: Slot index numbering
-; CHECK-NEXT: Live Interval Analysis
-; CHECK-NEXT: Lazy Machine Block Frequency Analysis
-; CHECK-NEXT: Machine Optimization Remark Emitter
-; CHECK-NEXT: Modulo Software Pipelining
-; CHECK-NEXT: MachineDominator Tree Construction
-; CHECK-NEXT: Machine Natural Loop Construction
; CHECK-NEXT: MVE TailPred and VPT Optimisation Pass
; CHECK-NEXT: ARM MLA / MLS expansion pass
; CHECK-NEXT: MachineDominator Tree Construction
+++ /dev/null
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
-
---- |
- define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {
- entry:
- %cmp8 = icmp sgt i32 %sz, 0
- br i1 %cmp8, label %for.body.preheader, label %for.end
-
- for.body.preheader: ; preds = %entry
- %scevgep = getelementptr float, float* %b, i32 -1
- %scevgep4 = getelementptr float, float* %a, i32 -1
- br label %for.body
-
- for.body: ; preds = %for.body.preheader, %for.body
- %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
- %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
- %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
- %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
- %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
- %0 = load float, float* %scevgep7, align 4
- %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1
- %1 = load float, float* %scevgep3, align 4
- %mul = fmul fast float %1, %0
- %add = fadd fast float %mul, %sum.010
- %lsr.iv.next = add i32 %lsr.iv, -1
- %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1
- %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1
- %exitcond.not = icmp ne i32 %lsr.iv.next, 0
- br i1 %exitcond.not, label %for.body, label %for.end, !llvm.loop !0
-
- for.end: ; preds = %for.body, %entry
- %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
- ret float %sum.0.lcssa
- }
-
- !0 = distinct !{!0, !1, !2, !3}
- !1 = !{!"llvm.loop.mustprogress"}
- !2 = !{!"llvm.loop.unroll.disable"}
- !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
-
-...
----
-name: dot
-alignment: 2
-tracksRegLiveness: true
-constants:
- - id: 0
- value: 'float 0.000000e+00'
- alignment: 4
- isTargetSpecific: false
-body: |
- ; CHECK-LABEL: name: dot
- ; CHECK: bb.0.entry:
- ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000)
- ; CHECK-NEXT: liveins: $r0, $r1, $r2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnopc = COPY $r2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnopc = COPY $r1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK-NEXT: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
- ; CHECK-NEXT: t2Bcc %bb.2, 10 /* CC::ge */, $cpsr
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2.for.body.preheader:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnopc = COPY [[t2SUBri]]
- ; CHECK-NEXT: [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gprnopc = COPY [[t2SUBri1]]
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5.for.body:
- ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.9(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY4]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
- ; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gprnopc = COPY [[t2ADDri]]
- ; CHECK-NEXT: t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
- ; CHECK-NEXT: t2B %bb.6, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.6.for.body:
- ; CHECK-NEXT: successors: %bb.7(0x80000000), %bb.8(0x00000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[COPY7]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
- ; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
- ; CHECK-NEXT: t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
- ; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.7.for.body:
- ; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %49, %bb.7
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %50, %bb.7
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %51, %bb.7
- ; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
- ; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
- ; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
- ; CHECK-NEXT: [[t2ADDri4:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[t2ADDri5:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
- ; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.8:
- ; CHECK-NEXT: successors: %bb.9(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
- ; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
- ; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.9:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
- ; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4.for.end:
- ; CHECK-NEXT: [[PHI11:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS2]], %bb.9
- ; CHECK-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI11]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: $r0 = COPY [[VMOVRS]]
- ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
- bb.0.entry:
- successors: %bb.1(0x50000000), %bb.4(0x30000000)
- liveins: $r0, $r1, $r2
-
- %13:gprnopc = COPY $r2
- %12:gprnopc = COPY $r1
- %11:gprnopc = COPY $r0
- t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
- t2Bcc %bb.1, 10 /* CC::ge */, $cpsr
-
- bb.4:
- successors: %bb.3(0x80000000)
-
- %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- t2B %bb.3, 14 /* CC::al */, $noreg
-
- bb.1.for.body.preheader:
- successors: %bb.2(0x80000000)
-
- %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg
- %0:gpr = COPY %16
- %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg
- %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- %1:gpr = COPY %17
-
- bb.2.for.body:
- successors: %bb.3(0x04000000), %bb.2(0x7c000000)
-
- %2:gprnopc = PHI %1, %bb.1, %9, %bb.2
- %3:gprnopc = PHI %0, %bb.1, %8, %bb.2
- %4:gprnopc = PHI %13, %bb.1, %7, %bb.2
- %5:spr = PHI %15, %bb.1, %6, %bb.2
- %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg
- %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
- %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
- %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
- %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
- %7:gpr = COPY %23
- %8:gpr = COPY %20
- %9:gpr = COPY %18
- t2Bcc %bb.3, 0 /* CC::eq */, $cpsr
- t2B %bb.2, 14 /* CC::al */, $noreg
-
- bb.3.for.end:
- %10:spr = PHI %14, %bb.4, %6, %bb.2
- %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg
- $r0 = COPY %24
- tBX_RET 14 /* CC::al */, $noreg, implicit $r0
-
-...
+++ /dev/null
-# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
-# RUN: llc -mtriple=thumbv7m-none-eabi -mcpu=cortex-m7 -run-pass=pipeliner -o - %s | FileCheck %s --check-prefix=CHECK
-
---- |
- define hidden float @dot(float* nocapture noundef readonly %a, float* nocapture noundef readonly %b, i32 noundef %sz) local_unnamed_addr #0 {
- entry:
- %cmp8 = icmp sgt i32 %sz, 0
- br i1 %cmp8, label %for.body.preheader, label %for.end
-
- for.body.preheader: ; preds = %entry
- %scevgep = getelementptr float, float* %b, i32 -1
- %scevgep4 = getelementptr float, float* %a, i32 -1
- br label %for.body
-
- for.body: ; preds = %for.body.preheader, %for.body
- %lsr.iv5 = phi float* [ %scevgep4, %for.body.preheader ], [ %scevgep6, %for.body ]
- %lsr.iv1 = phi float* [ %scevgep, %for.body.preheader ], [ %scevgep2, %for.body ]
- %lsr.iv = phi i32 [ %sz, %for.body.preheader ], [ %lsr.iv.next, %for.body ]
- %sum.010 = phi float [ %add, %for.body ], [ 0.000000e+00, %for.body.preheader ]
- %scevgep7 = getelementptr float, float* %lsr.iv5, i32 1
- %0 = load float, float* %scevgep7, align 4
- %scevgep3 = getelementptr float, float* %lsr.iv1, i32 1
- %1 = load float, float* %scevgep3, align 4
- %mul = fmul fast float %1, %0
- %add = fadd fast float %mul, %sum.010
- %lsr.iv.next = add i32 %lsr.iv, -1
- %scevgep2 = getelementptr float, float* %lsr.iv1, i32 1
- %scevgep6 = getelementptr float, float* %lsr.iv5, i32 1
- %exitcond.not = icmp eq i32 %lsr.iv.next, 0
- br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0
-
- for.end: ; preds = %for.body, %entry
- %sum.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ]
- ret float %sum.0.lcssa
- }
-
- !0 = distinct !{!0, !1, !2, !3}
- !1 = !{!"llvm.loop.mustprogress"}
- !2 = !{!"llvm.loop.unroll.disable"}
- !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
-
-...
----
-name: dot
-alignment: 2
-tracksRegLiveness: true
-constants:
- - id: 0
- value: 'float 0.000000e+00'
- alignment: 4
- isTargetSpecific: false
-body: |
- ; CHECK-LABEL: name: dot
- ; CHECK: bb.0.entry:
- ; CHECK-NEXT: successors: %bb.2(0x50000000), %bb.1(0x30000000)
- ; CHECK-NEXT: liveins: $r0, $r1, $r2
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[COPY:%[0-9]+]]:gprnopc = COPY $r2
- ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gprnopc = COPY $r1
- ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gprnopc = COPY $r0
- ; CHECK-NEXT: t2CMPri [[COPY]], 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
- ; CHECK-NEXT: t2Bcc %bb.2, 10 /* CC::ge */, $cpsr
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.1:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[VLDRS:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.2.for.body.preheader:
- ; CHECK-NEXT: successors: %bb.5(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[t2SUBri:%[0-9]+]]:rgpr = t2SUBri [[COPY1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[COPY3:%[0-9]+]]:gprnopc = COPY [[t2SUBri]]
- ; CHECK-NEXT: [[t2SUBri1:%[0-9]+]]:rgpr = t2SUBri [[COPY2]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS1:%[0-9]+]]:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- ; CHECK-NEXT: [[COPY4:%[0-9]+]]:gprnopc = COPY [[t2SUBri1]]
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.5.for.body:
- ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.9(0x40000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[t2ADDri:%[0-9]+]]:rgpr = t2ADDri [[COPY4]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS2:%[0-9]+]]:spr = VLDRS [[COPY4]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
- ; CHECK-NEXT: [[t2ADDri1:%[0-9]+]]:rgpr = t2ADDri [[COPY3]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS3:%[0-9]+]]:spr = VLDRS [[COPY3]], 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- ; CHECK-NEXT: [[VMULS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS3]], [[VLDRS2]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[t2SUBri2:%[0-9]+]]:rgpr = t2SUBri [[COPY]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY5:%[0-9]+]]:gprnopc = COPY [[t2SUBri2]]
- ; CHECK-NEXT: [[COPY6:%[0-9]+]]:gprnopc = COPY [[t2ADDri1]]
- ; CHECK-NEXT: [[COPY7:%[0-9]+]]:gprnopc = COPY [[t2ADDri]]
- ; CHECK-NEXT: t2Bcc %bb.9, 0 /* CC::eq */, $cpsr
- ; CHECK-NEXT: t2B %bb.6, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.6.for.body:
- ; CHECK-NEXT: successors: %bb.7(0x80000000), %bb.8(0x00000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[t2ADDri2:%[0-9]+]]:rgpr = t2ADDri [[COPY7]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS4:%[0-9]+]]:spr = VLDRS [[COPY7]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
- ; CHECK-NEXT: [[t2ADDri3:%[0-9]+]]:rgpr = t2ADDri [[COPY6]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[VLDRS5:%[0-9]+]]:spr = VLDRS [[COPY6]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VMULS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS5]], [[VLDRS4]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[t2SUBri3:%[0-9]+]]:rgpr = t2SUBri [[COPY5]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[COPY8:%[0-9]+]]:gpr = COPY [[t2SUBri3]]
- ; CHECK-NEXT: [[COPY9:%[0-9]+]]:gpr = COPY [[t2ADDri3]]
- ; CHECK-NEXT: [[COPY10:%[0-9]+]]:gpr = COPY [[t2ADDri2]]
- ; CHECK-NEXT: t2Bcc %bb.8, 0 /* CC::eq */, $cpsr
- ; CHECK-NEXT: t2B %bb.7, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.7.for.body:
- ; CHECK-NEXT: successors: %bb.8(0x04000000), %bb.7(0x7c000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI:%[0-9]+]]:gprnopc = PHI [[COPY10]], %bb.6, %49, %bb.7
- ; CHECK-NEXT: [[PHI1:%[0-9]+]]:gprnopc = PHI [[COPY9]], %bb.6, %50, %bb.7
- ; CHECK-NEXT: [[PHI2:%[0-9]+]]:gprnopc = PHI [[COPY8]], %bb.6, %51, %bb.7
- ; CHECK-NEXT: [[PHI3:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, %43, %bb.7
- ; CHECK-NEXT: [[PHI4:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, %52, %bb.7
- ; CHECK-NEXT: [[PHI5:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI5]], [[PHI3]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: [[t2SUBri4:%[0-9]+]]:rgpr = t2SUBri [[PHI2]], 1, 14 /* CC::al */, $noreg, def $cpsr
- ; CHECK-NEXT: [[VLDRS6:%[0-9]+]]:spr = VLDRS [[PHI1]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep3, align 4)
- ; CHECK-NEXT: [[VLDRS7:%[0-9]+]]:spr = VLDRS [[PHI]], 1, 14 /* CC::al */, $noreg :: (load unknown-size from %ir.scevgep7, align 4)
- ; CHECK-NEXT: [[t2ADDri4:%[0-9]+]]:rgpr = t2ADDri [[PHI]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[t2ADDri5:%[0-9]+]]:rgpr = t2ADDri [[PHI1]], 4, 14 /* CC::al */, $noreg, $noreg
- ; CHECK-NEXT: [[COPY11:%[0-9]+]]:gpr = COPY [[t2ADDri4]]
- ; CHECK-NEXT: [[COPY12:%[0-9]+]]:gpr = COPY [[t2ADDri5]]
- ; CHECK-NEXT: [[COPY13:%[0-9]+]]:gpr = COPY [[t2SUBri4]]
- ; CHECK-NEXT: [[VMULS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VMULS [[VLDRS6]], [[VLDRS7]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: t2Bcc %bb.7, 1 /* CC::ne */, $cpsr
- ; CHECK-NEXT: t2B %bb.8, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.8:
- ; CHECK-NEXT: successors: %bb.9(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI6:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.6, [[VADDS]], %bb.7
- ; CHECK-NEXT: [[PHI7:%[0-9]+]]:spr = PHI [[VMULS1]], %bb.6, [[VMULS2]], %bb.7
- ; CHECK-NEXT: [[PHI8:%[0-9]+]]:spr = PHI [[VMULS]], %bb.6, [[PHI4]], %bb.7
- ; CHECK-NEXT: [[VADDS1:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI8]], [[PHI6]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.9:
- ; CHECK-NEXT: successors: %bb.4(0x80000000)
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: [[PHI9:%[0-9]+]]:spr = PHI [[VLDRS1]], %bb.5, [[VADDS1]], %bb.8
- ; CHECK-NEXT: [[PHI10:%[0-9]+]]:spr = PHI [[VMULS]], %bb.5, [[PHI7]], %bb.8
- ; CHECK-NEXT: [[VADDS2:%[0-9]+]]:spr = nnan ninf nsz arcp contract afn reassoc VADDS [[PHI10]], [[PHI9]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: t2B %bb.4, 14 /* CC::al */, $noreg
- ; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: bb.4.for.end:
- ; CHECK-NEXT: [[PHI11:%[0-9]+]]:spr = PHI [[VLDRS]], %bb.1, [[VADDS2]], %bb.9
- ; CHECK-NEXT: [[VMOVRS:%[0-9]+]]:gpr = VMOVRS [[PHI11]], 14 /* CC::al */, $noreg
- ; CHECK-NEXT: $r0 = COPY [[VMOVRS]]
- ; CHECK-NEXT: tBX_RET 14 /* CC::al */, $noreg, implicit $r0
- bb.0.entry:
- successors: %bb.1(0x50000000), %bb.4(0x30000000)
- liveins: $r0, $r1, $r2
-
- %13:gprnopc = COPY $r2
- %12:gprnopc = COPY $r1
- %11:gprnopc = COPY $r0
- t2CMPri %13, 1, 14 /* CC::al */, $noreg, implicit-def $cpsr
- t2Bcc %bb.1, 10 /* CC::ge */, $cpsr
-
- bb.4:
- successors: %bb.3(0x80000000)
-
- %14:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- t2B %bb.3, 14 /* CC::al */, $noreg
-
- bb.1.for.body.preheader:
- successors: %bb.2(0x80000000)
-
- %16:rgpr = t2SUBri %12, 4, 14 /* CC::al */, $noreg, $noreg
- %0:gpr = COPY %16
- %17:rgpr = t2SUBri %11, 4, 14 /* CC::al */, $noreg, $noreg
- %15:spr = VLDRS %const.0, 0, 14 /* CC::al */, $noreg :: (load (s32) from constant-pool)
- %1:gpr = COPY %17
-
- bb.2.for.body:
- successors: %bb.3(0x04000000), %bb.2(0x7c000000)
-
- %2:gprnopc = PHI %1, %bb.1, %9, %bb.2
- %3:gprnopc = PHI %0, %bb.1, %8, %bb.2
- %4:gprnopc = PHI %13, %bb.1, %7, %bb.2
- %5:spr = PHI %15, %bb.1, %6, %bb.2
- %18:rgpr = t2ADDri %2, 4, 14 /* CC::al */, $noreg, $noreg
- %19:spr = VLDRS %2, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep7)
- %20:rgpr = t2ADDri %3, 4, 14 /* CC::al */, $noreg, $noreg
- %21:spr = VLDRS %3, 1, 14 /* CC::al */, $noreg :: (load (s32) from %ir.scevgep3)
- %22:spr = nnan ninf nsz arcp contract afn reassoc VMULS killed %21, killed %19, 14 /* CC::al */, $noreg
- %6:spr = nnan ninf nsz arcp contract afn reassoc VADDS killed %22, %5, 14 /* CC::al */, $noreg
- %23:rgpr = t2SUBri %4, 1, 14 /* CC::al */, $noreg, def $cpsr
- %7:gpr = COPY %23
- %8:gpr = COPY %20
- %9:gpr = COPY %18
- t2Bcc %bb.2, 1 /* CC::ne */, $cpsr
- t2B %bb.3, 14 /* CC::al */, $noreg
-
- bb.3.for.end:
- %10:spr = PHI %14, %bb.4, %6, %bb.2
- %24:gpr = VMOVRS %10, 14 /* CC::al */, $noreg
- $r0 = COPY %24
- tBX_RET 14 /* CC::al */, $noreg, implicit $r0
-
-...