// a map between register names in the original block and the names created
// in each stage of the pipelined loop.
ValueMapTy *VRMap = new ValueMapTy[(MaxStageCount + 1) * 2];
+
+ // The renaming destination by Phis for the registers across stages.
+ // This map is updated during Phis generation to point to the most recent
+ // renaming destination.
+ ValueMapTy *VRMapPhi = new ValueMapTy[(MaxStageCount + 1) * 2];
+
InstrMapTy InstrMap;
SmallVector<MachineBasicBlock *, 4> PrologBBs;
generateExistingPhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap,
InstrMap, MaxStageCount, MaxStageCount, false);
- generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, InstrMap,
- MaxStageCount, MaxStageCount, false);
+ generatePhis(KernelBB, PrologBBs.back(), KernelBB, KernelBB, VRMap, VRMapPhi,
+ InstrMap, MaxStageCount, MaxStageCount, false);
LLVM_DEBUG(dbgs() << "New block\n"; KernelBB->dump(););
SmallVector<MachineBasicBlock *, 4> EpilogBBs;
// Generate the epilog instructions to complete the pipeline.
- generateEpilog(MaxStageCount, KernelBB, BB, VRMap, EpilogBBs, PrologBBs);
+ generateEpilog(MaxStageCount, KernelBB, BB, VRMap, VRMapPhi, EpilogBBs,
+ PrologBBs);
// We need this step because the register allocation doesn't handle some
// situations well, so we insert copies to help out.
addBranches(*Preheader, PrologBBs, KernelBB, EpilogBBs, VRMap);
delete[] VRMap;
+ delete[] VRMapPhi;
}
void ModuloScheduleExpander::cleanup() {
/// block for each stage that needs to complete.
void ModuloScheduleExpander::generateEpilog(
unsigned LastStage, MachineBasicBlock *KernelBB, MachineBasicBlock *OrigBB,
- ValueMapTy *VRMap, MBBVectorTy &EpilogBBs, MBBVectorTy &PrologBBs) {
+ ValueMapTy *VRMap, ValueMapTy *VRMapPhi, MBBVectorTy &EpilogBBs,
+ MBBVectorTy &PrologBBs) {
// We need to change the branch from the kernel to the first epilog block, so
// this call to analyze branch uses the kernel rather than the original BB.
MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
}
generateExistingPhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap,
InstrMap, LastStage, EpilogStage, i == 1);
- generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, InstrMap,
- LastStage, EpilogStage, i == 1);
+ generatePhis(NewBB, PrologBBs[i - 1], PredBB, KernelBB, VRMap, VRMapPhi,
+ InstrMap, LastStage, EpilogStage, i == 1);
PredBB = NewBB;
LLVM_DEBUG({
/// use in the pipelined sequence.
void ModuloScheduleExpander::generatePhis(
MachineBasicBlock *NewBB, MachineBasicBlock *BB1, MachineBasicBlock *BB2,
- MachineBasicBlock *KernelBB, ValueMapTy *VRMap, InstrMapTy &InstrMap,
- unsigned LastStageNum, unsigned CurStageNum, bool IsLast) {
+ MachineBasicBlock *KernelBB, ValueMapTy *VRMap, ValueMapTy *VRMapPhi,
+ InstrMapTy &InstrMap, unsigned LastStageNum, unsigned CurStageNum,
+ bool IsLast) {
// Compute the stage number that contains the initial Phi value, and
// the Phi from the previous stage.
unsigned PrologStage = 0;
if (!InKernel && (unsigned)StageScheduled > PrologStage)
continue;
- unsigned PhiOp2 = VRMap[PrevStage][Def];
- if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
- if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
- PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ unsigned PhiOp2;
+ if (InKernel) {
+ PhiOp2 = VRMap[PrevStage][Def];
+ if (MachineInstr *InstOp2 = MRI.getVRegDef(PhiOp2))
+ if (InstOp2->isPHI() && InstOp2->getParent() == NewBB)
+ PhiOp2 = getLoopPhiReg(*InstOp2, BB2);
+ }
// The number of Phis can't exceed the number of prolog stages. The
// prolog stage number is zero based.
if (NumPhis > PrologStage + 1 - StageScheduled)
NumPhis = PrologStage + 1 - StageScheduled;
for (unsigned np = 0; np < NumPhis; ++np) {
+ // Example for
+ // Org:
+ // %Org = ... (Scheduled at Stage#0, NumPhi = 2)
+ //
+ // Prolog0 (Stage0):
+ // %Clone0 = ...
+ // Prolog1 (Stage1):
+ // %Clone1 = ...
+ // Kernel (Stage2):
+ // %Phi0 = Phi %Clone1, Prolog1, %Clone2, Kernel
+ // %Phi1 = Phi %Clone0, Prolog1, %Phi0, Kernel
+ // %Clone2 = ...
+ // Epilog0 (Stage3):
+ // %Phi2 = Phi %Clone1, Prolog1, %Clone2, Kernel
+ // %Phi3 = Phi %Clone0, Prolog1, %Phi0, Kernel
+ // Epilog1 (Stage4):
+ // %Phi4 = Phi %Clone0, Prolog0, %Phi2, Epilog0
+ //
+ // VRMap = {0: %Clone0, 1: %Clone1, 2: %Clone2}
+ // VRMapPhi (after Kernel) = {0: %Phi1, 1: %Phi0}
+ // VRMapPhi (after Epilog0) = {0: %Phi3, 1: %Phi2}
+
unsigned PhiOp1 = VRMap[PrologStage][Def];
if (np <= PrologStage)
PhiOp1 = VRMap[PrologStage - np][Def];
- if (MachineInstr *InstOp1 = MRI.getVRegDef(PhiOp1)) {
- if (InstOp1->isPHI() && InstOp1->getParent() == KernelBB)
- PhiOp1 = getInitPhiReg(*InstOp1, KernelBB);
- if (InstOp1->isPHI() && InstOp1->getParent() == NewBB)
- PhiOp1 = getInitPhiReg(*InstOp1, NewBB);
+ if (!InKernel) {
+ if (PrevStage == LastStageNum && np == 0)
+ PhiOp2 = VRMap[LastStageNum][Def];
+ else
+ PhiOp2 = VRMapPhi[PrevStage - np][Def];
}
- if (!InKernel)
- PhiOp2 = VRMap[PrevStage - np][Def];
const TargetRegisterClass *RC = MRI.getRegClass(Def);
Register NewReg = MRI.createVirtualRegister(RC);
NewReg);
PhiOp2 = NewReg;
- VRMap[PrevStage - np - 1][Def] = NewReg;
+ VRMapPhi[PrevStage - np - 1][Def] = NewReg;
} else {
- VRMap[CurStageNum - np][Def] = NewReg;
+ VRMapPhi[CurStageNum - np][Def] = NewReg;
if (np == NumPhis - 1)
rewriteScheduledInstr(NewBB, InstrMap, CurStageNum, np, &*BBI, Def,
NewReg);
--- /dev/null
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+# RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr9 --ppc-enable-pipeliner -pipeliner-max-stages=10 -run-pass=pipeliner -verify-machineinstrs -o - %s | FileCheck %s --check-prefix=CHECK
+
+--- |
+ define dso_local void @f(ptr noalias nocapture noundef writeonly %a, ptr nocapture noundef readonly %b, i32 noundef signext %n) local_unnamed_addr #0 {
+ entry:
+ %wide.trip.count = zext i32 %n to i64
+ %uglygep2 = getelementptr i8, ptr %b, i64 -4
+ %uglygep3 = getelementptr i8, ptr %a, i64 -4
+ call void @llvm.set.loop.iterations.i64(i64 %wide.trip.count)
+ br label %for.body
+
+ for.cond.cleanup: ; preds = %for.body
+ ret void
+
+ for.body: ; preds = %for.body, %entry
+ %0 = phi ptr [ %uglygep2, %entry ], [ %3, %for.body ]
+ %1 = phi ptr [ %uglygep3, %entry ], [ %2, %for.body ]
+ %2 = getelementptr i8, ptr %1, i64 4
+ %3 = getelementptr i8, ptr %0, i64 4
+ %4 = load float, ptr %3, align 4
+ %add = fadd float %4, %4
+ %add3 = fadd float %4, %add
+ store float %add3, ptr %2, align 4
+ %5 = call i1 @llvm.loop.decrement.i64(i64 1)
+ br i1 %5, label %for.body, label %for.cond.cleanup, !llvm.loop !0
+ }
+
+ ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
+ declare void @llvm.set.loop.iterations.i64(i64) #1
+
+ ; Function Attrs: nocallback noduplicate nofree nosync nounwind willreturn
+ declare i1 @llvm.loop.decrement.i64(i64) #1
+
+ attributes #0 = { argmemonly nofree norecurse nosync nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="pwr9" "target-features"="+altivec,+bpermd,+crbits,+crypto,+direct-move,+extdiv,+htm,+isa-v206-instructions,+isa-v207-instructions,+isa-v30-instructions,+power8-vector,+power9-vector,+quadword-atomics,+vsx,-privileged,-rop-protect,-spe" }
+ attributes #1 = { nocallback noduplicate nofree nosync nounwind willreturn }
+
+ !0 = distinct !{!0, !1, !2, !3}
+ !1 = !{!"llvm.loop.mustprogress"}
+ !2 = !{!"llvm.loop.unroll.disable"}
+ !3 = !{!"llvm.loop.pipeline.initiationinterval", i32 3}
+
+...
+---
+name: f
+alignment: 16
+tracksRegLiveness: true
+registers:
+ - { id: 0, class: g8rc }
+ - { id: 1, class: g8rc }
+ - { id: 2, class: g8rc_and_g8rc_nox0 }
+ - { id: 3, class: g8rc_and_g8rc_nox0 }
+ - { id: 4, class: g8rc }
+ - { id: 5, class: g8rc }
+ - { id: 6, class: g8rc_and_g8rc_nox0 }
+ - { id: 7, class: g8rc_and_g8rc_nox0 }
+ - { id: 8, class: g8rc }
+ - { id: 9, class: g8rc }
+ - { id: 10, class: f4rc }
+ - { id: 11, class: g8rc_and_g8rc_nox0 }
+ - { id: 12, class: vssrc }
+ - { id: 13, class: f4rc }
+ - { id: 14, class: g8rc_and_g8rc_nox0 }
+liveins:
+ - { reg: '$x3', virtual-reg: '%6' }
+ - { reg: '$x4', virtual-reg: '%7' }
+ - { reg: '$x5', virtual-reg: '%8' }
+frameInfo:
+ maxAlignment: 1
+machineFunctionInfo: {}
+body: |
+ ; CHECK-LABEL: name: f
+ ; CHECK: bb.0.entry:
+ ; CHECK-NEXT: successors: %bb.3(0x80000000)
+ ; CHECK-NEXT: liveins: $x3, $x4, $x5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[COPY:%[0-9]+]]:g8rc = COPY $x5
+ ; CHECK-NEXT: [[COPY1:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x4
+ ; CHECK-NEXT: [[COPY2:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY $x3
+ ; CHECK-NEXT: [[RLDICL:%[0-9]+]]:g8rc = RLDICL [[COPY]], 0, 32
+ ; CHECK-NEXT: [[ADDI8_:%[0-9]+]]:g8rc_and_g8rc_nox0 = ADDI8 [[COPY1]], -4
+ ; CHECK-NEXT: [[ADDI8_1:%[0-9]+]]:g8rc = ADDI8 [[COPY2]], -4
+ ; CHECK-NEXT: MTCTR8loop [[RLDICL]], implicit-def dead $ctr8
+ ; CHECK-NEXT: B %bb.3
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.1.for.cond.cleanup:
+ ; CHECK-NEXT: BLR8 implicit $lr8, implicit $rm
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.3.for.body:
+ ; CHECK-NEXT: successors: %bb.4(0x40000000), %bb.17(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LFSU:%[0-9]+]]:f4rc, [[LFSU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[ADDI8_]] :: (load (s32) from %ir.3)
+ ; CHECK-NEXT: [[COPY3:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU1]]
+ ; CHECK-NEXT: BDZ8 %bb.17, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.4
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.4.for.body:
+ ; CHECK-NEXT: successors: %bb.5(0x40000000), %bb.16(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[LFSU2:%[0-9]+]]:f4rc, [[LFSU3:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY3]] :: (load unknown-size from %ir.3, align 4)
+ ; CHECK-NEXT: [[COPY4:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU3]]
+ ; CHECK-NEXT: BDZ8 %bb.16, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.5
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.5.for.body:
+ ; CHECK-NEXT: successors: %bb.6(0x40000000), %bb.15(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[XSADDSP:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU]], [[LFSU]]
+ ; CHECK-NEXT: [[LFSU4:%[0-9]+]]:f4rc, [[LFSU5:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY4]] :: (load unknown-size from %ir.3, align 4)
+ ; CHECK-NEXT: [[COPY5:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU5]]
+ ; CHECK-NEXT: BDZ8 %bb.15, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.6
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.6.for.body:
+ ; CHECK-NEXT: successors: %bb.7(0x40000000), %bb.14(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[XSADDSP1:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU2]], [[LFSU2]]
+ ; CHECK-NEXT: [[LFSU6:%[0-9]+]]:f4rc, [[LFSU7:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY5]] :: (load unknown-size from %ir.3, align 4)
+ ; CHECK-NEXT: [[COPY6:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU7]]
+ ; CHECK-NEXT: BDZ8 %bb.14, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.7
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.7.for.body:
+ ; CHECK-NEXT: successors: %bb.8(0x40000000), %bb.13(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[XSADDSP2:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[LFSU]], [[XSADDSP]]
+ ; CHECK-NEXT: [[XSADDSP3:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU4]], [[LFSU4]]
+ ; CHECK-NEXT: [[LFSU8:%[0-9]+]]:f4rc, [[LFSU9:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY6]] :: (load unknown-size from %ir.3, align 4)
+ ; CHECK-NEXT: [[COPY7:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU9]]
+ ; CHECK-NEXT: BDZ8 %bb.13, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.8
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.8.for.body:
+ ; CHECK-NEXT: successors: %bb.9(0x40000000), %bb.12(0x40000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[XSADDSP4:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[LFSU2]], [[XSADDSP1]]
+ ; CHECK-NEXT: [[XSADDSP5:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU6]], [[LFSU6]]
+ ; CHECK-NEXT: [[LFSU10:%[0-9]+]]:f4rc, [[LFSU11:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY7]] :: (load unknown-size from %ir.3, align 4)
+ ; CHECK-NEXT: [[COPY8:%[0-9]+]]:g8rc_and_g8rc_nox0 = COPY [[LFSU11]]
+ ; CHECK-NEXT: BDZ8 %bb.12, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.9
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.9.for.body:
+ ; CHECK-NEXT: successors: %bb.10(0x80000000), %bb.11(0x00000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[XSADDSP6:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[LFSU4]], [[XSADDSP3]]
+ ; CHECK-NEXT: [[XSADDSP7:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[LFSU8]], [[LFSU8]]
+ ; CHECK-NEXT: [[LFSU12:%[0-9]+]]:f4rc, [[LFSU13:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[COPY8]] :: (load unknown-size from %ir.3, align 4)
+ ; CHECK-NEXT: [[COPY9:%[0-9]+]]:g8rc = COPY [[LFSU13]]
+ ; CHECK-NEXT: BDZ8 %bb.11, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.10
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.10.for.body:
+ ; CHECK-NEXT: successors: %bb.10(0x7c000000), %bb.11(0x04000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[COPY9]], %bb.9, %50, %bb.10
+ ; CHECK-NEXT: [[PHI1:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.9, %47, %bb.10
+ ; CHECK-NEXT: [[PHI2:%[0-9]+]]:f4rc = PHI [[LFSU12]], %bb.9, %45, %bb.10
+ ; CHECK-NEXT: [[PHI3:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.9, [[PHI2]], %bb.10
+ ; CHECK-NEXT: [[PHI4:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.9, [[PHI3]], %bb.10
+ ; CHECK-NEXT: [[PHI5:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.9, [[PHI4]], %bb.10
+ ; CHECK-NEXT: [[PHI6:%[0-9]+]]:vssrc = PHI [[XSADDSP7]], %bb.9, %48, %bb.10
+ ; CHECK-NEXT: [[PHI7:%[0-9]+]]:vssrc = PHI [[XSADDSP5]], %bb.9, [[PHI6]], %bb.10
+ ; CHECK-NEXT: [[PHI8:%[0-9]+]]:f4rc = PHI [[XSADDSP6]], %bb.9, %49, %bb.10
+ ; CHECK-NEXT: [[PHI9:%[0-9]+]]:f4rc = PHI [[XSADDSP4]], %bb.9, [[PHI8]], %bb.10
+ ; CHECK-NEXT: [[PHI10:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.9, [[PHI9]], %bb.10
+ ; CHECK-NEXT: [[STFSU:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI10]], 4, [[PHI1]] :: (store (s32) into %ir.2)
+ ; CHECK-NEXT: [[LFSU14:%[0-9]+]]:f4rc, [[LFSU15:%[0-9]+]]:g8rc_and_g8rc_nox0 = LFSU 4, [[PHI]] :: (load unknown-size from %ir.3, align 4)
+ ; CHECK-NEXT: [[COPY10:%[0-9]+]]:g8rc = COPY [[STFSU]]
+ ; CHECK-NEXT: [[XSADDSP8:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[PHI3]], [[PHI3]]
+ ; CHECK-NEXT: [[XSADDSP9:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI5]], [[PHI7]]
+ ; CHECK-NEXT: [[COPY11:%[0-9]+]]:g8rc = COPY [[LFSU15]]
+ ; CHECK-NEXT: BDNZ8 %bb.10, implicit-def $ctr8, implicit $ctr8
+ ; CHECK-NEXT: B %bb.11
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.11:
+ ; CHECK-NEXT: successors: %bb.12(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI11:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.9, [[COPY10]], %bb.10
+ ; CHECK-NEXT: [[PHI12:%[0-9]+]]:f4rc = PHI [[LFSU12]], %bb.9, [[LFSU14]], %bb.10
+ ; CHECK-NEXT: [[PHI13:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.9, [[PHI2]], %bb.10
+ ; CHECK-NEXT: [[PHI14:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.9, [[PHI3]], %bb.10
+ ; CHECK-NEXT: [[PHI15:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.9, [[PHI4]], %bb.10
+ ; CHECK-NEXT: [[PHI16:%[0-9]+]]:vssrc = PHI [[XSADDSP7]], %bb.9, [[XSADDSP8]], %bb.10
+ ; CHECK-NEXT: [[PHI17:%[0-9]+]]:vssrc = PHI [[XSADDSP5]], %bb.9, [[PHI6]], %bb.10
+ ; CHECK-NEXT: [[PHI18:%[0-9]+]]:f4rc = PHI [[XSADDSP6]], %bb.9, [[XSADDSP9]], %bb.10
+ ; CHECK-NEXT: [[PHI19:%[0-9]+]]:f4rc = PHI [[XSADDSP4]], %bb.9, [[PHI8]], %bb.10
+ ; CHECK-NEXT: [[PHI20:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.9, [[PHI9]], %bb.10
+ ; CHECK-NEXT: [[STFSU1:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI20]], 4, [[PHI11]] :: (store unknown-size into %ir.2, align 4)
+ ; CHECK-NEXT: [[COPY12:%[0-9]+]]:g8rc = COPY [[STFSU1]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.12:
+ ; CHECK-NEXT: successors: %bb.13(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI21:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.8, [[COPY12]], %bb.11
+ ; CHECK-NEXT: [[PHI22:%[0-9]+]]:f4rc = PHI [[LFSU10]], %bb.8, [[PHI12]], %bb.11
+ ; CHECK-NEXT: [[PHI23:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.8, [[PHI13]], %bb.11
+ ; CHECK-NEXT: [[PHI24:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.8, [[PHI14]], %bb.11
+ ; CHECK-NEXT: [[PHI25:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.8, [[PHI15]], %bb.11
+ ; CHECK-NEXT: [[PHI26:%[0-9]+]]:vssrc = PHI [[XSADDSP5]], %bb.8, [[PHI16]], %bb.11
+ ; CHECK-NEXT: [[PHI27:%[0-9]+]]:vssrc = PHI [[XSADDSP3]], %bb.8, [[PHI17]], %bb.11
+ ; CHECK-NEXT: [[PHI28:%[0-9]+]]:f4rc = PHI [[XSADDSP4]], %bb.8, [[PHI18]], %bb.11
+ ; CHECK-NEXT: [[PHI29:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.8, [[PHI19]], %bb.11
+ ; CHECK-NEXT: [[STFSU2:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI29]], 4, [[PHI21]] :: (store unknown-size into %ir.2, align 4)
+ ; CHECK-NEXT: [[COPY13:%[0-9]+]]:g8rc = COPY [[STFSU2]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.13:
+ ; CHECK-NEXT: successors: %bb.14(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI30:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.7, [[COPY13]], %bb.12
+ ; CHECK-NEXT: [[PHI31:%[0-9]+]]:f4rc = PHI [[LFSU8]], %bb.7, [[PHI22]], %bb.12
+ ; CHECK-NEXT: [[PHI32:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.7, [[PHI23]], %bb.12
+ ; CHECK-NEXT: [[PHI33:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.7, [[PHI24]], %bb.12
+ ; CHECK-NEXT: [[PHI34:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.7, [[PHI25]], %bb.12
+ ; CHECK-NEXT: [[PHI35:%[0-9]+]]:vssrc = PHI [[XSADDSP3]], %bb.7, [[PHI26]], %bb.12
+ ; CHECK-NEXT: [[PHI36:%[0-9]+]]:vssrc = PHI [[XSADDSP1]], %bb.7, [[PHI27]], %bb.12
+ ; CHECK-NEXT: [[PHI37:%[0-9]+]]:f4rc = PHI [[XSADDSP2]], %bb.7, [[PHI28]], %bb.12
+ ; CHECK-NEXT: [[STFSU3:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[PHI37]], 4, [[PHI30]] :: (store unknown-size into %ir.2, align 4)
+ ; CHECK-NEXT: [[COPY14:%[0-9]+]]:g8rc = COPY [[STFSU3]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.14:
+ ; CHECK-NEXT: successors: %bb.15(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI38:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.6, [[COPY14]], %bb.13
+ ; CHECK-NEXT: [[PHI39:%[0-9]+]]:f4rc = PHI [[LFSU6]], %bb.6, [[PHI31]], %bb.13
+ ; CHECK-NEXT: [[PHI40:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.6, [[PHI32]], %bb.13
+ ; CHECK-NEXT: [[PHI41:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.6, [[PHI33]], %bb.13
+ ; CHECK-NEXT: [[PHI42:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.6, [[PHI34]], %bb.13
+ ; CHECK-NEXT: [[PHI43:%[0-9]+]]:vssrc = PHI [[XSADDSP1]], %bb.6, [[PHI35]], %bb.13
+ ; CHECK-NEXT: [[PHI44:%[0-9]+]]:vssrc = PHI [[XSADDSP]], %bb.6, [[PHI36]], %bb.13
+ ; CHECK-NEXT: [[XSADDSP10:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI42]], [[PHI44]]
+ ; CHECK-NEXT: [[STFSU4:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP10]], 4, [[PHI38]] :: (store unknown-size into %ir.2, align 4)
+ ; CHECK-NEXT: [[COPY15:%[0-9]+]]:g8rc = COPY [[STFSU4]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.15:
+ ; CHECK-NEXT: successors: %bb.16(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI45:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.5, [[COPY15]], %bb.14
+ ; CHECK-NEXT: [[PHI46:%[0-9]+]]:f4rc = PHI [[LFSU4]], %bb.5, [[PHI39]], %bb.14
+ ; CHECK-NEXT: [[PHI47:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.5, [[PHI40]], %bb.14
+ ; CHECK-NEXT: [[PHI48:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.5, [[PHI41]], %bb.14
+ ; CHECK-NEXT: [[PHI49:%[0-9]+]]:vssrc = PHI [[XSADDSP]], %bb.5, [[PHI43]], %bb.14
+ ; CHECK-NEXT: [[XSADDSP11:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI48]], [[PHI49]]
+ ; CHECK-NEXT: [[STFSU5:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP11]], 4, [[PHI45]] :: (store unknown-size into %ir.2, align 4)
+ ; CHECK-NEXT: [[COPY16:%[0-9]+]]:g8rc = COPY [[STFSU5]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.16:
+ ; CHECK-NEXT: successors: %bb.17(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI50:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.4, [[COPY16]], %bb.15
+ ; CHECK-NEXT: [[PHI51:%[0-9]+]]:f4rc = PHI [[LFSU2]], %bb.4, [[PHI46]], %bb.15
+ ; CHECK-NEXT: [[PHI52:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.4, [[PHI47]], %bb.15
+ ; CHECK-NEXT: [[XSADDSP12:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[PHI52]], [[PHI52]]
+ ; CHECK-NEXT: [[XSADDSP13:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI52]], [[XSADDSP12]]
+ ; CHECK-NEXT: [[STFSU6:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP13]], 4, [[PHI50]] :: (store unknown-size into %ir.2, align 4)
+ ; CHECK-NEXT: [[COPY17:%[0-9]+]]:g8rc = COPY [[STFSU6]]
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: bb.17:
+ ; CHECK-NEXT: successors: %bb.1(0x80000000)
+ ; CHECK-NEXT: {{ $}}
+ ; CHECK-NEXT: [[PHI53:%[0-9]+]]:g8rc_and_g8rc_nox0 = PHI [[ADDI8_1]], %bb.3, [[COPY17]], %bb.16
+ ; CHECK-NEXT: [[PHI54:%[0-9]+]]:f4rc = PHI [[LFSU]], %bb.3, [[PHI51]], %bb.16
+ ; CHECK-NEXT: [[XSADDSP14:%[0-9]+]]:vssrc = nofpexcept XSADDSP [[PHI54]], [[PHI54]]
+ ; CHECK-NEXT: [[XSADDSP15:%[0-9]+]]:f4rc = nofpexcept XSADDSP [[PHI54]], [[XSADDSP14]]
+ ; CHECK-NEXT: [[STFSU7:%[0-9]+]]:g8rc_and_g8rc_nox0 = STFSU [[XSADDSP15]], 4, [[PHI53]] :: (store unknown-size into %ir.2, align 4)
+ ; CHECK-NEXT: B %bb.1
+ bb.0.entry:
+ liveins: $x3, $x4, $x5
+
+ %8:g8rc = COPY $x5
+ %7:g8rc_and_g8rc_nox0 = COPY $x4
+ %6:g8rc_and_g8rc_nox0 = COPY $x3
+ %9:g8rc = RLDICL %8, 0, 32
+ %0:g8rc = ADDI8 %7, -4
+ %1:g8rc = ADDI8 %6, -4
+ MTCTR8loop killed %9, implicit-def dead $ctr8
+ B %bb.2
+
+ bb.1.for.cond.cleanup:
+ BLR8 implicit $lr8, implicit $rm
+
+ bb.2.for.body:
+ successors: %bb.2(0x7c000000), %bb.1(0x04000000)
+
+ %2:g8rc_and_g8rc_nox0 = PHI %0, %bb.0, %5, %bb.2
+ %3:g8rc_and_g8rc_nox0 = PHI %1, %bb.0, %4, %bb.2
+ %10:f4rc, %11:g8rc_and_g8rc_nox0 = LFSU 4, %2 :: (load (s32) from %ir.3)
+ %12:vssrc = nofpexcept XSADDSP %10, %10
+ %13:f4rc = nofpexcept XSADDSP %10, killed %12
+ %14:g8rc_and_g8rc_nox0 = STFSU killed %13, 4, %3 :: (store (s32) into %ir.2)
+ %4:g8rc = COPY %14
+ %5:g8rc = COPY %11
+ BDNZ8 %bb.2, implicit-def dead $ctr8, implicit $ctr8
+ B %bb.1
+
+...