BuildMI(MBB, MBBI, dl,
TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64
: PPC::PROBED_STACKALLOC_32))
- .addDef(ScratchReg)
- .addDef(TempReg) // TempReg stores the old sp.
+ .addDef(TempReg)
+ .addDef(ScratchReg) // ScratchReg stores the old sp.
.addImm(NegFrameSize);
// FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we
// update the ScratchReg to meet the assumption that ScratchReg contains
// the NegFrameSize. This solution is rather tricky.
if (!HasRedZone) {
BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
- .addReg(TempReg)
+ .addReg(ScratchReg)
.addReg(SPReg);
HasSTUX = true;
}
void PPCFrameLowering::inlineStackProbe(MachineFunction &MF,
MachineBasicBlock &PrologMBB) const {
- // TODO: Generate CFI instructions.
bool isPPC64 = Subtarget.isPPC64();
const PPCTargetLowering &TLI = *Subtarget.getTargetLowering();
const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
bool HasBP = RegInfo->hasBasePointer(MF);
Register BPReg = RegInfo->getBaseRegister(MF);
Align MaxAlign = MFI.getMaxAlign();
+ bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR);
// Subroutines to generate .cfi_* directives.
auto buildDefCFAReg = [&](MachineBasicBlock &MBB,
.addReg(SPReg)
.addReg(NegSizeReg);
};
- // Used to probe realignment gap [stackptr - (stackptr % align), stackptr)
- // when HasBP && isPPC64. In such scenario, normally we have r0, r1, r12, r30
- // available and r1 is already copied to r30 which is BPReg. So BPReg stores
- // the value of stackptr.
- // First we have to probe tail interval whose size is less than probesize,
- // i.e., [stackptr - (stackptr % align) % probesize, stackptr). At this stage,
- // ScratchReg stores the value of ((stackptr % align) % probesize). Then we
- // probe each block sized probesize until stackptr meets
- // (stackptr - (stackptr % align)). At this stage, ScratchReg is materialized
- // as negprobesize. At both stages, TempReg stores the value of
- // (stackptr - (stackptr % align)).
- auto dynamicProbe = [&](MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI, Register ScratchReg,
- Register TempReg) {
- assert(HasBP && isPPC64 && "Probe alignment part not available");
+ // Used to probe stack when realignment is required.
+ // Note that, according to ABI's requirement, *sp must always equals the
+ // value of back-chain pointer, only st(w|d)u(x) can be used to update sp.
+ // Following is pseudo code:
+ // final_sp = (sp & align) + negframesize;
+ // neg_gap = final_sp - sp;
+ // while (neg_gap < negprobesize) {
+ // stdu fp, negprobesize(sp);
+ // neg_gap -= negprobesize;
+ // }
+ // stdux fp, sp, neg_gap
+ //
+ // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg
+ // before probe code, we don't need to save it, so we get one additional reg
+ // that can be used to materialize the probeside if needed to use xform.
+ // Otherwise, we can NOT materialize probeside, so we can only use Dform for
+ // now.
+ //
+ // The allocations are:
+ // if (HasBP && HasRedzone) {
+ // r0: materialize the probesize if needed so that we can use xform.
+ // r12: `neg_gap`
+ // } else {
+ // r0: back-chain pointer
+ // r12: `neg_gap`.
+ // }
+ auto probeRealignedStack = [&](MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ Register ScratchReg, Register TempReg) {
+ assert(HasBP && "The function is supposed to have base pointer when its "
+ "stack is realigned.");
assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2");
- // ScratchReg = stackptr % align
- BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(BPReg)
- .addImm(0)
- .addImm(64 - Log2(MaxAlign));
- // TempReg = stackptr - (stackptr % align)
- BuildMI(MBB, MBBI, DL, TII.get(PPC::SUBFC8), TempReg)
- .addReg(ScratchReg)
- .addReg(BPReg);
- // ScratchReg = (stackptr % align) % probesize
- BuildMI(MBB, MBBI, DL, TII.get(PPC::RLDICL), ScratchReg)
- .addReg(ScratchReg)
- .addImm(0)
- .addImm(64 - Log2(ProbeSize));
+
+ // FIXME: We can eliminate this limitation if we get more infomation about
+ // which part of redzone are already used. Used redzone can be treated
+ // probed. But there might be `holes' in redzone probed, this could
+ // complicate the implementation.
+ assert(ProbeSize >= Subtarget.getRedZoneSize() &&
+ "Probe size should be larger or equal to the size of red-zone so "
+ "that red-zone is not clobbered by probing.");
+
+ Register &FinalStackPtr = TempReg;
+ // FIXME: We only support NegProbeSize materializable by DForm currently.
+ // When HasBP && HasRedzone, we can use xform if we have an additional idle
+ // register.
+ NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15));
+ assert(isInt<16>(NegProbeSize) &&
+ "NegProbeSize should be materializable by DForm");
Register CRReg = PPC::CR0;
- // If (stackptr % align) % probesize == 0, we should not generate probe
- // code. Layout of output assembly kinda like:
+ // Layout of output assembly kinda like:
// bb.0:
// ...
- // cmpldi $scratchreg, 0
- // beq bb.2
- // bb.1: # Probe tail interval
- // neg $scratchreg, $scratchreg
- // stdux $bpreg, r1, $scratchreg
+ // sub $scratchreg, $finalsp, r1
+ // cmpdi $scratchreg, <negprobesize>
+ // bge bb.2
+ // bb.1:
+ // stdu <backchain>, <negprobesize>(r1)
+ // sub $scratchreg, $scratchreg, negprobesize
+ // cmpdi $scratchreg, <negprobesize>
+ // blt bb.1
// bb.2:
- // <materialize negprobesize into $scratchreg>
- // cmpd r1, $tempreg
- // beq bb.4
- // bb.3: # Loop to probe each block
- // stdux $bpreg, r1, $scratchreg
- // cmpd r1, $tempreg
- // bne bb.3
- // bb.4:
- // ...
+ // stdux <backchain>, r1, $scratchreg
MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
- MachineBasicBlock *ProbeResidualMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ProbeResidualMBB);
- MachineBasicBlock *ProbeLoopPreHeaderMBB =
- MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ProbeLoopPreHeaderMBB);
MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeLoopBodyMBB);
MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
MF.insert(MBBInsertPoint, ProbeExitMBB);
- // bb.4
- ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
- ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ // bb.2
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false,
+ BackChainPointer);
+ if (HasRedZone)
+ // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg
+ // to TempReg to satisfy it.
+ BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg)
+ .addReg(BPReg)
+ .addReg(BPReg);
+ ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end());
+ ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB);
+ }
// bb.0
- BuildMI(&MBB, DL, TII.get(PPC::CMPDI), CRReg).addReg(ScratchReg).addImm(0);
- BuildMI(&MBB, DL, TII.get(PPC::BCC))
- .addImm(PPC::PRED_EQ)
- .addReg(CRReg)
- .addMBB(ProbeLoopPreHeaderMBB);
- MBB.addSuccessor(ProbeResidualMBB);
- MBB.addSuccessor(ProbeLoopPreHeaderMBB);
+ {
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg)
+ .addReg(SPReg)
+ .addReg(FinalStackPtr);
+ if (!HasRedZone)
+ BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg);
+ BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(&MBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_GE)
+ .addReg(CRReg)
+ .addMBB(ProbeExitMBB);
+ MBB.addSuccessor(ProbeLoopBodyMBB);
+ MBB.addSuccessor(ProbeExitMBB);
+ }
// bb.1
- BuildMI(ProbeResidualMBB, DL, TII.get(PPC::NEG8), ScratchReg)
- .addReg(ScratchReg);
- allocateAndProbe(*ProbeResidualMBB, ProbeResidualMBB->end(), 0, ScratchReg,
- false, BPReg);
- ProbeResidualMBB->addSuccessor(ProbeLoopPreHeaderMBB);
- // bb.2
- MaterializeImm(*ProbeLoopPreHeaderMBB, ProbeLoopPreHeaderMBB->end(),
- NegProbeSize, ScratchReg);
- BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::CMPD), CRReg)
- .addReg(SPReg)
- .addReg(TempReg);
- BuildMI(ProbeLoopPreHeaderMBB, DL, TII.get(PPC::BCC))
- .addImm(PPC::PRED_EQ)
- .addReg(CRReg)
- .addMBB(ProbeExitMBB);
- ProbeLoopPreHeaderMBB->addSuccessor(ProbeLoopBodyMBB);
- ProbeLoopPreHeaderMBB->addSuccessor(ProbeExitMBB);
- // bb.3
- allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), 0, ScratchReg,
- false, BPReg);
- BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::CMPD), CRReg)
- .addReg(SPReg)
- .addReg(TempReg);
- BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
- .addImm(PPC::PRED_NE)
- .addReg(CRReg)
- .addMBB(ProbeLoopBodyMBB);
- ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
- ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+ {
+ Register BackChainPointer = HasRedZone ? BPReg : TempReg;
+ allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize,
+ 0, true /*UseDForm*/, BackChainPointer);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI),
+ ScratchReg)
+ .addReg(ScratchReg)
+ .addImm(-NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI),
+ CRReg)
+ .addReg(ScratchReg)
+ .addImm(NegProbeSize);
+ BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC))
+ .addImm(PPC::PRED_LT)
+ .addReg(CRReg)
+ .addMBB(ProbeLoopBodyMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB);
+ ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB);
+ }
// Update liveins.
- recomputeLiveIns(*ProbeResidualMBB);
- recomputeLiveIns(*ProbeLoopPreHeaderMBB);
recomputeLiveIns(*ProbeLoopBodyMBB);
recomputeLiveIns(*ProbeExitMBB);
return ProbeExitMBB;
};
// For case HasBP && MaxAlign > 1, we have to realign the SP by performing
- // SP = SP - SP % MaxAlign.
+ // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since
+ // the offset subtracted from SP is determined by SP's runtime value.
if (HasBP && MaxAlign > 1) {
- // FIXME: Currently only probe the gap [stackptr & alignmask, stackptr) in
- // 64-bit mode.
- if (isPPC64) {
- // Use BPReg to calculate CFA.
- if (needsCFI)
- buildDefCFA(*CurrentMBB, {MI}, BPReg, 0);
- // Since we have SPReg copied to BPReg at the moment, FPReg can be used as
- // TempReg.
- Register TempReg = FPReg;
- CurrentMBB = dynamicProbe(*CurrentMBB, {MI}, ScratchReg, TempReg);
- // Copy BPReg to FPReg to meet the definition of PROBED_STACKALLOC_64.
- BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
- .addReg(BPReg)
- .addReg(BPReg);
- } else {
- // Initialize current frame pointer.
- BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg)
+ // Calculate final stack pointer.
+ if (isPPC64)
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg)
.addReg(SPReg)
- .addReg(SPReg);
- // Use FPReg to calculate CFA.
- if (needsCFI)
- buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
+ .addImm(0)
+ .addImm(64 - Log2(MaxAlign));
+ else
BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg)
- .addReg(FPReg)
+ .addReg(SPReg)
.addImm(0)
.addImm(32 - Log2(MaxAlign))
.addImm(31);
- BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::SUBFC), SPReg)
- .addReg(ScratchReg)
- .addReg(SPReg);
- }
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF),
+ FPReg)
+ .addReg(ScratchReg)
+ .addReg(SPReg);
+ MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4),
+ FPReg)
+ .addReg(ScratchReg)
+ .addReg(FPReg);
+ CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg);
+ if (needsCFI)
+ buildDefCFAReg(*CurrentMBB, {MI}, FPReg);
} else {
// Initialize current frame pointer.
BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg);
// Use FPReg to calculate CFA.
if (needsCFI)
buildDefCFA(*CurrentMBB, {MI}, FPReg, 0);
- }
- // Probe residual part.
- if (NegResidualSize) {
- bool ResidualUseDForm = CanUseDForm(NegResidualSize);
- if (!ResidualUseDForm)
- MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
- allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
- ResidualUseDForm, FPReg);
- }
- bool UseDForm = CanUseDForm(NegProbeSize);
- // If number of blocks is small, just probe them directly.
- if (NumBlocks < 3) {
- if (!UseDForm)
- MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
- for (int i = 0; i < NumBlocks; ++i)
- allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
- FPReg);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+ // Probe residual part.
+ if (NegResidualSize) {
+ bool ResidualUseDForm = CanUseDForm(NegResidualSize);
+ if (!ResidualUseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg);
+ allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg,
+ ResidualUseDForm, FPReg);
}
- } else {
- // Since CTR is a volatile register and current shrinkwrap implementation
- // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
- // CTR loop to probe.
- // Calculate trip count and stores it in CTRReg.
- MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
- BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
- .addReg(ScratchReg, RegState::Kill);
- if (!UseDForm)
- MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
- // Create MBBs of the loop.
- MachineFunction::iterator MBBInsertPoint =
- std::next(CurrentMBB->getIterator());
- MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, LoopMBB);
- MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
- MF.insert(MBBInsertPoint, ExitMBB);
- // Synthesize the loop body.
- allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
- UseDForm, FPReg);
- BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
- .addMBB(LoopMBB);
- LoopMBB->addSuccessor(ExitMBB);
- LoopMBB->addSuccessor(LoopMBB);
- // Synthesize the exit MBB.
- ExitMBB->splice(ExitMBB->end(), CurrentMBB,
- std::next(MachineBasicBlock::iterator(MI)),
- CurrentMBB->end());
- ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
- CurrentMBB->addSuccessor(LoopMBB);
- if (needsCFI) {
- // Restore using SPReg to calculate CFA.
- buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ bool UseDForm = CanUseDForm(NegProbeSize);
+ // If number of blocks is small, just probe them directly.
+ if (NumBlocks < 3) {
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ for (int i = 0; i < NumBlocks; ++i)
+ allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm,
+ FPReg);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*CurrentMBB, {MI}, SPReg);
+ }
+ } else {
+ // Since CTR is a volatile register and current shrinkwrap implementation
+ // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a
+ // CTR loop to probe.
+ // Calculate trip count and stores it in CTRReg.
+ MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg);
+ BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR))
+ .addReg(ScratchReg, RegState::Kill);
+ if (!UseDForm)
+ MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg);
+ // Create MBBs of the loop.
+ MachineFunction::iterator MBBInsertPoint =
+ std::next(CurrentMBB->getIterator());
+ MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, LoopMBB);
+ MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB);
+ MF.insert(MBBInsertPoint, ExitMBB);
+ // Synthesize the loop body.
+ allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg,
+ UseDForm, FPReg);
+ BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ))
+ .addMBB(LoopMBB);
+ LoopMBB->addSuccessor(ExitMBB);
+ LoopMBB->addSuccessor(LoopMBB);
+ // Synthesize the exit MBB.
+ ExitMBB->splice(ExitMBB->end(), CurrentMBB,
+ std::next(MachineBasicBlock::iterator(MI)),
+ CurrentMBB->end());
+ ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB);
+ CurrentMBB->addSuccessor(LoopMBB);
+ if (needsCFI) {
+ // Restore using SPReg to calculate CFA.
+ buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg);
+ }
+ // Update liveins.
+ recomputeLiveIns(*LoopMBB);
+ recomputeLiveIns(*ExitMBB);
}
- // Update liveins.
- recomputeLiveIns(*LoopMBB);
- recomputeLiveIns(*ExitMBB);
}
++NumPrologProbed;
MI.eraseFromParent();
define i8 @f1() #0 "stack-probe-size"="0" nounwind {
; CHECK-LE-LABEL: f1:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: li r0, 259
-; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: li r12, 259
+; CHECK-LE-NEXT: mtctr r12
; CHECK-LE-NEXT: .LBB1_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdu r12, -16(r1)
+; CHECK-LE-NEXT: stdu r0, -16(r1)
; CHECK-LE-NEXT: bdnz .LBB1_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: li r3, 3
;
; CHECK-BE-LABEL: f1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: li r0, 260
-; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: li r12, 260
+; CHECK-BE-NEXT: mtctr r12
; CHECK-BE-NEXT: .LBB1_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdu r12, -16(r1)
+; CHECK-BE-NEXT: stdu r0, -16(r1)
; CHECK-BE-NEXT: bdnz .LBB1_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: li r3, 3
;
; CHECK-32-LABEL: f1:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: li r0, 257
-; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: li r12, 257
+; CHECK-32-NEXT: mtctr r12
; CHECK-32-NEXT: .LBB1_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwu r12, -16(r1)
+; CHECK-32-NEXT: stwu r0, -16(r1)
; CHECK-32-NEXT: bdnz .LBB1_1
; CHECK-32-NEXT: # %bb.2: # %entry
; CHECK-32-NEXT: li r3, 3
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: stb r3, 16(r1)
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: lbz r3, 16(r1)
define i8 @f2() #0 nounwind {
; CHECK-LE-LABEL: f2:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: li r0, 16
-; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: li r12, 16
+; CHECK-LE-NEXT: mtctr r12
; CHECK-LE-NEXT: .LBB2_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
+; CHECK-LE-NEXT: stdu r0, -4096(r1)
; CHECK-LE-NEXT: bdnz .LBB2_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: li r3, 3
;
; CHECK-BE-LABEL: f2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: li r0, 16
-; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: li r12, 16
+; CHECK-BE-NEXT: mtctr r12
; CHECK-BE-NEXT: .LBB2_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
+; CHECK-BE-NEXT: stdu r0, -4096(r1)
; CHECK-BE-NEXT: bdnz .LBB2_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: li r3, 3
;
; CHECK-32-LABEL: f2:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: li r0, 16
-; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: li r12, 16
+; CHECK-32-NEXT: mtctr r12
; CHECK-32-NEXT: .LBB2_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwu r12, -4096(r1)
+; CHECK-32-NEXT: stwu r0, -4096(r1)
; CHECK-32-NEXT: bdnz .LBB2_1
; CHECK-32-NEXT: # %bb.2: # %entry
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: li r3, 3
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: stb r3, 16(r1)
define i8 @f3() #0 "stack-probe-size"="32768" nounwind {
; CHECK-LE-LABEL: f3:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: stdu r12, -32768(r1)
-; CHECK-LE-NEXT: stdu r12, -32768(r1)
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: stdu r0, -32768(r1)
+; CHECK-LE-NEXT: stdu r0, -32768(r1)
; CHECK-LE-NEXT: li r3, 3
; CHECK-LE-NEXT: stb r3, 48(r1)
; CHECK-LE-NEXT: lbz r3, 48(r1)
;
; CHECK-BE-LABEL: f3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: stdu r12, -32768(r1)
-; CHECK-BE-NEXT: stdu r12, -32768(r1)
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: stdu r0, -32768(r1)
+; CHECK-BE-NEXT: stdu r0, -32768(r1)
; CHECK-BE-NEXT: li r3, 3
; CHECK-BE-NEXT: stb r3, 64(r1)
; CHECK-BE-NEXT: lbz r3, 64(r1)
;
; CHECK-32-LABEL: f3:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: stwu r12, -32768(r1)
-; CHECK-32-NEXT: stwu r12, -32768(r1)
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: stwu r0, -32768(r1)
+; CHECK-32-NEXT: stwu r0, -32768(r1)
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: li r3, 3
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: stb r3, 16(r1)
define i8 @f5() #0 "stack-probe-size"="65536" nounwind {
; CHECK-LE-LABEL: f5:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: li r0, 16
-; CHECK-LE-NEXT: mtctr r0
-; CHECK-LE-NEXT: lis r0, -1
-; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: li r12, 16
+; CHECK-LE-NEXT: mtctr r12
+; CHECK-LE-NEXT: lis r12, -1
+; CHECK-LE-NEXT: ori r12, r12, 0
; CHECK-LE-NEXT: .LBB5_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdux r12, r1, r0
+; CHECK-LE-NEXT: stdux r0, r1, r12
; CHECK-LE-NEXT: bdnz .LBB5_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: li r3, 3
;
; CHECK-BE-LABEL: f5:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: li r0, 16
-; CHECK-BE-NEXT: mtctr r0
-; CHECK-BE-NEXT: lis r0, -1
-; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: li r12, 16
+; CHECK-BE-NEXT: mtctr r12
+; CHECK-BE-NEXT: lis r12, -1
+; CHECK-BE-NEXT: ori r12, r12, 0
; CHECK-BE-NEXT: .LBB5_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdux r12, r1, r0
+; CHECK-BE-NEXT: stdux r0, r1, r12
; CHECK-BE-NEXT: bdnz .LBB5_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: li r3, 3
;
; CHECK-32-LABEL: f5:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: li r0, 16
-; CHECK-32-NEXT: mtctr r0
-; CHECK-32-NEXT: lis r0, -1
-; CHECK-32-NEXT: nop
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: li r12, 16
+; CHECK-32-NEXT: mtctr r12
+; CHECK-32-NEXT: lis r12, -1
+; CHECK-32-NEXT: ori r12, r12, 0
; CHECK-32-NEXT: .LBB5_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwux r12, r1, r0
+; CHECK-32-NEXT: stwux r0, r1, r12
; CHECK-32-NEXT: bdnz .LBB5_1
; CHECK-32-NEXT: # %bb.2: # %entry
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: li r3, 3
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: stb r3, 16(r1)
define i8 @f6() #0 nounwind {
; CHECK-LE-LABEL: f6:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: lis r0, 4
-; CHECK-LE-NEXT: nop
-; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: lis r12, 4
+; CHECK-LE-NEXT: ori r12, r12, 0
+; CHECK-LE-NEXT: mtctr r12
; CHECK-LE-NEXT: .LBB6_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
+; CHECK-LE-NEXT: stdu r0, -4096(r1)
; CHECK-LE-NEXT: bdnz .LBB6_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: li r3, 3
;
; CHECK-BE-LABEL: f6:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: lis r0, 4
-; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: lis r12, 4
+; CHECK-BE-NEXT: ori r12, r12, 0
+; CHECK-BE-NEXT: mtctr r12
; CHECK-BE-NEXT: .LBB6_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
+; CHECK-BE-NEXT: stdu r0, -4096(r1)
; CHECK-BE-NEXT: bdnz .LBB6_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: li r3, 3
;
; CHECK-32-LABEL: f6:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: lis r0, 4
-; CHECK-32-NEXT: nop
-; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: lis r12, 4
+; CHECK-32-NEXT: ori r12, r12, 0
+; CHECK-32-NEXT: mtctr r12
; CHECK-32-NEXT: .LBB6_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwu r12, -4096(r1)
+; CHECK-32-NEXT: stwu r0, -4096(r1)
; CHECK-32-NEXT: bdnz .LBB6_1
; CHECK-32-NEXT: # %bb.2: # %entry
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: li r3, 3
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: stb r3, 16(r1)
define i8 @f7() #0 "stack-probe-size"="65536" nounwind {
; CHECK-LE-LABEL: f7:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: lis r0, -1
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: ori r0, r0, 13776
-; CHECK-LE-NEXT: stdux r12, r1, r0
-; CHECK-LE-NEXT: li r0, 15258
-; CHECK-LE-NEXT: mtctr r0
-; CHECK-LE-NEXT: lis r0, -1
-; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: lis r12, -1
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: ori r12, r12, 13776
+; CHECK-LE-NEXT: stdux r0, r1, r12
+; CHECK-LE-NEXT: li r12, 15258
+; CHECK-LE-NEXT: mtctr r12
+; CHECK-LE-NEXT: lis r12, -1
+; CHECK-LE-NEXT: ori r12, r12, 0
; CHECK-LE-NEXT: .LBB7_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdux r12, r1, r0
+; CHECK-LE-NEXT: stdux r0, r1, r12
; CHECK-LE-NEXT: bdnz .LBB7_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: li r3, 3
;
; CHECK-BE-LABEL: f7:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: lis r0, -1
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: ori r0, r0, 13760
-; CHECK-BE-NEXT: stdux r12, r1, r0
-; CHECK-BE-NEXT: li r0, 15258
-; CHECK-BE-NEXT: mtctr r0
-; CHECK-BE-NEXT: lis r0, -1
-; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: lis r12, -1
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: ori r12, r12, 13760
+; CHECK-BE-NEXT: stdux r0, r1, r12
+; CHECK-BE-NEXT: li r12, 15258
+; CHECK-BE-NEXT: mtctr r12
+; CHECK-BE-NEXT: lis r12, -1
+; CHECK-BE-NEXT: ori r12, r12, 0
; CHECK-BE-NEXT: .LBB7_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdux r12, r1, r0
+; CHECK-BE-NEXT: stdux r0, r1, r12
; CHECK-BE-NEXT: bdnz .LBB7_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: li r3, 3
;
; CHECK-32-LABEL: f7:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: lis r0, -1
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: ori r0, r0, 13808
-; CHECK-32-NEXT: stwux r12, r1, r0
-; CHECK-32-NEXT: li r0, 15258
-; CHECK-32-NEXT: mtctr r0
-; CHECK-32-NEXT: lis r0, -1
-; CHECK-32-NEXT: nop
+; CHECK-32-NEXT: lis r12, -1
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: ori r12, r12, 13808
+; CHECK-32-NEXT: stwux r0, r1, r12
+; CHECK-32-NEXT: li r12, 15258
+; CHECK-32-NEXT: mtctr r12
+; CHECK-32-NEXT: lis r12, -1
+; CHECK-32-NEXT: ori r12, r12, 0
; CHECK-32-NEXT: .LBB7_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwux r12, r1, r0
+; CHECK-32-NEXT: stwux r0, r1, r12
; CHECK-32-NEXT: bdnz .LBB7_1
; CHECK-32-NEXT: # %bb.2: # %entry
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: li r3, 3
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: stb r3, 9(r1)
define i8 @f1() #0 "stack-probe-size"="0" {
; CHECK-LE-LABEL: f1:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT: li r0, 259
-; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT: li r12, 259
+; CHECK-LE-NEXT: mtctr r12
; CHECK-LE-NEXT: .LBB1_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdu r12, -16(r1)
+; CHECK-LE-NEXT: stdu r0, -16(r1)
; CHECK-LE-NEXT: bdnz .LBB1_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-BE-LABEL: f1:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT: li r0, 260
-; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT: li r12, 260
+; CHECK-BE-NEXT: mtctr r12
; CHECK-BE-NEXT: .LBB1_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdu r12, -16(r1)
+; CHECK-BE-NEXT: stdu r0, -16(r1)
; CHECK-BE-NEXT: bdnz .LBB1_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-32-LABEL: f1:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: li r0, 257
-; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: .cfi_def_cfa r0, 0
+; CHECK-32-NEXT: li r12, 257
+; CHECK-32-NEXT: mtctr r12
; CHECK-32-NEXT: .LBB1_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwu r12, -16(r1)
+; CHECK-32-NEXT: stwu r0, -16(r1)
; CHECK-32-NEXT: bdnz .LBB1_1
; CHECK-32-NEXT: # %bb.2: # %entry
; CHECK-32-NEXT: .cfi_def_cfa_register r1
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: .cfi_def_cfa_offset 4112
; CHECK-32-NEXT: li r3, 3
define i8 @f2() #0 {
; CHECK-LE-LABEL: f2:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: li r0, 16
-; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: li r12, 16
+; CHECK-LE-NEXT: mtctr r12
; CHECK-LE-NEXT: .LBB2_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
+; CHECK-LE-NEXT: stdu r0, -4096(r1)
; CHECK-LE-NEXT: bdnz .LBB2_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-BE-LABEL: f2:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: li r0, 16
-; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: li r12, 16
+; CHECK-BE-NEXT: mtctr r12
; CHECK-BE-NEXT: .LBB2_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
+; CHECK-BE-NEXT: stdu r0, -4096(r1)
; CHECK-BE-NEXT: bdnz .LBB2_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-32-LABEL: f2:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: li r0, 16
-; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: .cfi_def_cfa r0, 0
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: li r12, 16
+; CHECK-32-NEXT: mtctr r12
; CHECK-32-NEXT: .LBB2_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwu r12, -4096(r1)
+; CHECK-32-NEXT: stwu r0, -4096(r1)
; CHECK-32-NEXT: bdnz .LBB2_1
; CHECK-32-NEXT: # %bb.2: # %entry
; CHECK-32-NEXT: .cfi_def_cfa_register r1
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: .cfi_def_cfa_offset 65552
; CHECK-32-NEXT: li r3, 3
define i8 @f3() #0 "stack-probe-size"="32768" {
; CHECK-LE-LABEL: f3:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: stdu r12, -32768(r1)
-; CHECK-LE-NEXT: stdu r12, -32768(r1)
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: stdu r0, -32768(r1)
+; CHECK-LE-NEXT: stdu r0, -32768(r1)
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
; CHECK-LE-NEXT: .cfi_def_cfa_offset 65584
; CHECK-LE-NEXT: li r3, 3
;
; CHECK-BE-LABEL: f3:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: stdu r12, -32768(r1)
-; CHECK-BE-NEXT: stdu r12, -32768(r1)
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: stdu r0, -32768(r1)
+; CHECK-BE-NEXT: stdu r0, -32768(r1)
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
; CHECK-BE-NEXT: .cfi_def_cfa_offset 65600
; CHECK-BE-NEXT: li r3, 3
;
; CHECK-32-LABEL: f3:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: stwu r12, -32768(r1)
-; CHECK-32-NEXT: stwu r12, -32768(r1)
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: .cfi_def_cfa r0, 0
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: stwu r0, -32768(r1)
+; CHECK-32-NEXT: stwu r0, -32768(r1)
; CHECK-32-NEXT: .cfi_def_cfa_register r1
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: .cfi_def_cfa_offset 65552
; CHECK-32-NEXT: li r3, 3
define i8 @f5() #0 "stack-probe-size"="65536" {
; CHECK-LE-LABEL: f5:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: li r0, 16
-; CHECK-LE-NEXT: mtctr r0
-; CHECK-LE-NEXT: lis r0, -1
-; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: li r12, 16
+; CHECK-LE-NEXT: mtctr r12
+; CHECK-LE-NEXT: lis r12, -1
+; CHECK-LE-NEXT: ori r12, r12, 0
; CHECK-LE-NEXT: .LBB5_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdux r12, r1, r0
+; CHECK-LE-NEXT: stdux r0, r1, r12
; CHECK-LE-NEXT: bdnz .LBB5_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-BE-LABEL: f5:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: li r0, 16
-; CHECK-BE-NEXT: mtctr r0
-; CHECK-BE-NEXT: lis r0, -1
-; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: li r12, 16
+; CHECK-BE-NEXT: mtctr r12
+; CHECK-BE-NEXT: lis r12, -1
+; CHECK-BE-NEXT: ori r12, r12, 0
; CHECK-BE-NEXT: .LBB5_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdux r12, r1, r0
+; CHECK-BE-NEXT: stdux r0, r1, r12
; CHECK-BE-NEXT: bdnz .LBB5_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-32-LABEL: f5:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: li r0, 16
-; CHECK-32-NEXT: mtctr r0
-; CHECK-32-NEXT: lis r0, -1
-; CHECK-32-NEXT: nop
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: .cfi_def_cfa r0, 0
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: li r12, 16
+; CHECK-32-NEXT: mtctr r12
+; CHECK-32-NEXT: lis r12, -1
+; CHECK-32-NEXT: ori r12, r12, 0
; CHECK-32-NEXT: .LBB5_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwux r12, r1, r0
+; CHECK-32-NEXT: stwux r0, r1, r12
; CHECK-32-NEXT: bdnz .LBB5_1
; CHECK-32-NEXT: # %bb.2: # %entry
; CHECK-32-NEXT: .cfi_def_cfa_register r1
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: .cfi_def_cfa_offset 1048592
; CHECK-32-NEXT: li r3, 3
define i8 @f6() #0 {
; CHECK-LE-LABEL: f6:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT: stdu r12, -48(r1)
-; CHECK-LE-NEXT: lis r0, 4
-; CHECK-LE-NEXT: nop
-; CHECK-LE-NEXT: mtctr r0
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT: stdu r0, -48(r1)
+; CHECK-LE-NEXT: lis r12, 4
+; CHECK-LE-NEXT: ori r12, r12, 0
+; CHECK-LE-NEXT: mtctr r12
; CHECK-LE-NEXT: .LBB6_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
+; CHECK-LE-NEXT: stdu r0, -4096(r1)
; CHECK-LE-NEXT: bdnz .LBB6_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-BE-LABEL: f6:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT: stdu r12, -64(r1)
-; CHECK-BE-NEXT: lis r0, 4
-; CHECK-BE-NEXT: nop
-; CHECK-BE-NEXT: mtctr r0
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT: stdu r0, -64(r1)
+; CHECK-BE-NEXT: lis r12, 4
+; CHECK-BE-NEXT: ori r12, r12, 0
+; CHECK-BE-NEXT: mtctr r12
; CHECK-BE-NEXT: .LBB6_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
+; CHECK-BE-NEXT: stdu r0, -4096(r1)
; CHECK-BE-NEXT: bdnz .LBB6_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-32-LABEL: f6:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: stwu r12, -16(r1)
-; CHECK-32-NEXT: lis r0, 4
-; CHECK-32-NEXT: nop
-; CHECK-32-NEXT: mtctr r0
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: .cfi_def_cfa r0, 0
+; CHECK-32-NEXT: stwu r0, -16(r1)
+; CHECK-32-NEXT: lis r12, 4
+; CHECK-32-NEXT: ori r12, r12, 0
+; CHECK-32-NEXT: mtctr r12
; CHECK-32-NEXT: .LBB6_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwu r12, -4096(r1)
+; CHECK-32-NEXT: stwu r0, -4096(r1)
; CHECK-32-NEXT: bdnz .LBB6_1
; CHECK-32-NEXT: # %bb.2: # %entry
; CHECK-32-NEXT: .cfi_def_cfa_register r1
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: .cfi_def_cfa_offset 1073741840
; CHECK-32-NEXT: li r3, 3
define i8 @f7() #0 "stack-probe-size"="65536" {
; CHECK-LE-LABEL: f7:
; CHECK-LE: # %bb.0: # %entry
-; CHECK-LE-NEXT: mr r12, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-LE-NEXT: lis r0, -1
-; CHECK-LE-NEXT: ori r0, r0, 13776
-; CHECK-LE-NEXT: stdux r12, r1, r0
-; CHECK-LE-NEXT: li r0, 15258
-; CHECK-LE-NEXT: mtctr r0
-; CHECK-LE-NEXT: lis r0, -1
-; CHECK-LE-NEXT: nop
+; CHECK-LE-NEXT: mr r0, r1
+; CHECK-LE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-LE-NEXT: lis r12, -1
+; CHECK-LE-NEXT: ori r12, r12, 13776
+; CHECK-LE-NEXT: stdux r0, r1, r12
+; CHECK-LE-NEXT: li r12, 15258
+; CHECK-LE-NEXT: mtctr r12
+; CHECK-LE-NEXT: lis r12, -1
+; CHECK-LE-NEXT: ori r12, r12, 0
; CHECK-LE-NEXT: .LBB7_1: # %entry
; CHECK-LE-NEXT: #
-; CHECK-LE-NEXT: stdux r12, r1, r0
+; CHECK-LE-NEXT: stdux r0, r1, r12
; CHECK-LE-NEXT: bdnz .LBB7_1
; CHECK-LE-NEXT: # %bb.2: # %entry
; CHECK-LE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-BE-LABEL: f7:
; CHECK-BE: # %bb.0: # %entry
-; CHECK-BE-NEXT: mr r12, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r12, 0
-; CHECK-BE-NEXT: lis r0, -1
-; CHECK-BE-NEXT: ori r0, r0, 13760
-; CHECK-BE-NEXT: stdux r12, r1, r0
-; CHECK-BE-NEXT: li r0, 15258
-; CHECK-BE-NEXT: mtctr r0
-; CHECK-BE-NEXT: lis r0, -1
-; CHECK-BE-NEXT: nop
+; CHECK-BE-NEXT: mr r0, r1
+; CHECK-BE-NEXT: .cfi_def_cfa r0, 0
+; CHECK-BE-NEXT: lis r12, -1
+; CHECK-BE-NEXT: ori r12, r12, 13760
+; CHECK-BE-NEXT: stdux r0, r1, r12
+; CHECK-BE-NEXT: li r12, 15258
+; CHECK-BE-NEXT: mtctr r12
+; CHECK-BE-NEXT: lis r12, -1
+; CHECK-BE-NEXT: ori r12, r12, 0
; CHECK-BE-NEXT: .LBB7_1: # %entry
; CHECK-BE-NEXT: #
-; CHECK-BE-NEXT: stdux r12, r1, r0
+; CHECK-BE-NEXT: stdux r0, r1, r12
; CHECK-BE-NEXT: bdnz .LBB7_1
; CHECK-BE-NEXT: # %bb.2: # %entry
; CHECK-BE-NEXT: .cfi_def_cfa_register r1
;
; CHECK-32-LABEL: f7:
; CHECK-32: # %bb.0: # %entry
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: lis r0, -1
-; CHECK-32-NEXT: ori r0, r0, 13808
-; CHECK-32-NEXT: stwux r12, r1, r0
-; CHECK-32-NEXT: li r0, 15258
-; CHECK-32-NEXT: mtctr r0
-; CHECK-32-NEXT: lis r0, -1
-; CHECK-32-NEXT: nop
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: .cfi_def_cfa r0, 0
+; CHECK-32-NEXT: lis r12, -1
+; CHECK-32-NEXT: ori r12, r12, 13808
+; CHECK-32-NEXT: stwux r0, r1, r12
+; CHECK-32-NEXT: li r12, 15258
+; CHECK-32-NEXT: mtctr r12
+; CHECK-32-NEXT: lis r12, -1
+; CHECK-32-NEXT: ori r12, r12, 0
; CHECK-32-NEXT: .LBB7_1: # %entry
; CHECK-32-NEXT: #
-; CHECK-32-NEXT: stwux r12, r1, r0
+; CHECK-32-NEXT: stwux r0, r1, r12
; CHECK-32-NEXT: bdnz .LBB7_1
; CHECK-32-NEXT: # %bb.2: # %entry
; CHECK-32-NEXT: .cfi_def_cfa_register r1
-; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: .cfi_def_cfa_offset 1000000016
; CHECK-32-NEXT: li r3, 3
define i32 @f9(i64 %i) local_unnamed_addr #0 {
; CHECK-LE-LABEL: f9:
; CHECK-LE: # %bb.0:
+; CHECK-LE-NEXT: clrldi r12, r1, 53
; CHECK-LE-NEXT: std r30, -16(r1)
; CHECK-LE-NEXT: mr r30, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r30, 0
-; CHECK-LE-NEXT: clrldi r0, r30, 53
-; CHECK-LE-NEXT: subc r12, r30, r0
-; CHECK-LE-NEXT: clrldi r0, r0, 52
-; CHECK-LE-NEXT: cmpdi r0, 0
-; CHECK-LE-NEXT: beq cr0, .LBB9_2
-; CHECK-LE-NEXT: # %bb.1:
-; CHECK-LE-NEXT: neg r0, r0
-; CHECK-LE-NEXT: stdux r30, r1, r0
+; CHECK-LE-NEXT: sub r0, r1, r12
+; CHECK-LE-NEXT: li r12, -10240
+; CHECK-LE-NEXT: add r0, r12, r0
+; CHECK-LE-NEXT: sub r12, r0, r1
+; CHECK-LE-NEXT: cmpdi r12, -4096
+; CHECK-LE-NEXT: bge cr0, .LBB9_2
+; CHECK-LE-NEXT: .LBB9_1:
+; CHECK-LE-NEXT: stdu r30, -4096(r1)
+; CHECK-LE-NEXT: addi r12, r12, 4096
+; CHECK-LE-NEXT: cmpdi r12, -4096
+; CHECK-LE-NEXT: blt cr0, .LBB9_1
; CHECK-LE-NEXT: .LBB9_2:
-; CHECK-LE-NEXT: li r0, -4096
-; CHECK-LE-NEXT: cmpd r1, r12
-; CHECK-LE-NEXT: beq cr0, .LBB9_4
-; CHECK-LE-NEXT: .LBB9_3:
-; CHECK-LE-NEXT: stdux r30, r1, r0
-; CHECK-LE-NEXT: cmpd r1, r12
-; CHECK-LE-NEXT: bne cr0, .LBB9_3
-; CHECK-LE-NEXT: .LBB9_4:
-; CHECK-LE-NEXT: mr r12, r30
-; CHECK-LE-NEXT: stdu r12, -2048(r1)
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
-; CHECK-LE-NEXT: .cfi_def_cfa_register r1
+; CHECK-LE-NEXT: stdux r30, r1, r12
+; CHECK-LE-NEXT: mr r0, r30
+; CHECK-LE-NEXT: .cfi_def_cfa_register r0
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
; CHECK-LE-NEXT: .cfi_offset r30, -16
; CHECK-LE-NEXT: addi r4, r1, 2048
;
; CHECK-BE-LABEL: f9:
; CHECK-BE: # %bb.0:
+; CHECK-BE-NEXT: clrldi r12, r1, 53
; CHECK-BE-NEXT: std r30, -16(r1)
; CHECK-BE-NEXT: mr r30, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r30, 0
-; CHECK-BE-NEXT: clrldi r0, r30, 53
-; CHECK-BE-NEXT: subc r12, r30, r0
-; CHECK-BE-NEXT: clrldi r0, r0, 52
-; CHECK-BE-NEXT: cmpdi r0, 0
-; CHECK-BE-NEXT: beq cr0, .LBB9_2
-; CHECK-BE-NEXT: # %bb.1:
-; CHECK-BE-NEXT: neg r0, r0
-; CHECK-BE-NEXT: stdux r30, r1, r0
+; CHECK-BE-NEXT: sub r0, r1, r12
+; CHECK-BE-NEXT: li r12, -10240
+; CHECK-BE-NEXT: add r0, r12, r0
+; CHECK-BE-NEXT: sub r12, r0, r1
+; CHECK-BE-NEXT: cmpdi r12, -4096
+; CHECK-BE-NEXT: bge cr0, .LBB9_2
+; CHECK-BE-NEXT: .LBB9_1:
+; CHECK-BE-NEXT: stdu r30, -4096(r1)
+; CHECK-BE-NEXT: addi r12, r12, 4096
+; CHECK-BE-NEXT: cmpdi r12, -4096
+; CHECK-BE-NEXT: blt cr0, .LBB9_1
; CHECK-BE-NEXT: .LBB9_2:
-; CHECK-BE-NEXT: li r0, -4096
-; CHECK-BE-NEXT: cmpd r1, r12
-; CHECK-BE-NEXT: beq cr0, .LBB9_4
-; CHECK-BE-NEXT: .LBB9_3:
-; CHECK-BE-NEXT: stdux r30, r1, r0
-; CHECK-BE-NEXT: cmpd r1, r12
-; CHECK-BE-NEXT: bne cr0, .LBB9_3
-; CHECK-BE-NEXT: .LBB9_4:
-; CHECK-BE-NEXT: mr r12, r30
-; CHECK-BE-NEXT: stdu r12, -2048(r1)
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
-; CHECK-BE-NEXT: .cfi_def_cfa_register r1
+; CHECK-BE-NEXT: stdux r30, r1, r12
+; CHECK-BE-NEXT: mr r0, r30
+; CHECK-BE-NEXT: .cfi_def_cfa_register r0
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
; CHECK-BE-NEXT: .cfi_offset r30, -16
; CHECK-BE-NEXT: addi r4, r1, 2048
;
; CHECK-32-LABEL: f9:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: clrlwi r0, r12, 21
-; CHECK-32-NEXT: subc r1, r1, r0
-; CHECK-32-NEXT: stwu r12, -2048(r1)
-; CHECK-32-NEXT: stwu r12, -4096(r1)
-; CHECK-32-NEXT: stwu r12, -4096(r1)
-; CHECK-32-NEXT: .cfi_def_cfa_register r1
+; CHECK-32-NEXT: clrlwi r12, r1, 21
; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: li r12, -10240
+; CHECK-32-NEXT: add r0, r12, r0
+; CHECK-32-NEXT: sub r12, r0, r1
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: cmpwi r12, -4096
+; CHECK-32-NEXT: bge cr0, .LBB9_2
+; CHECK-32-NEXT: .LBB9_1:
+; CHECK-32-NEXT: stwu r0, -4096(r1)
+; CHECK-32-NEXT: addi r12, r12, 4096
+; CHECK-32-NEXT: cmpwi r12, -4096
+; CHECK-32-NEXT: blt cr0, .LBB9_1
+; CHECK-32-NEXT: .LBB9_2:
+; CHECK-32-NEXT: stwux r0, r1, r12
+; CHECK-32-NEXT: .cfi_def_cfa_register r0
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: addic r0, r0, -8
; CHECK-32-NEXT: stwx r30, 0, r0
define i32 @f10(i64 %i) local_unnamed_addr #0 {
; CHECK-LE-LABEL: f10:
; CHECK-LE: # %bb.0:
+; CHECK-LE-NEXT: clrldi r12, r1, 54
; CHECK-LE-NEXT: std r30, -16(r1)
; CHECK-LE-NEXT: mr r30, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r30, 0
-; CHECK-LE-NEXT: clrldi r0, r30, 54
-; CHECK-LE-NEXT: subc r12, r30, r0
-; CHECK-LE-NEXT: clrldi r0, r0, 52
-; CHECK-LE-NEXT: cmpdi r0, 0
-; CHECK-LE-NEXT: beq cr0, .LBB10_2
-; CHECK-LE-NEXT: # %bb.1:
-; CHECK-LE-NEXT: neg r0, r0
-; CHECK-LE-NEXT: stdux r30, r1, r0
+; CHECK-LE-NEXT: sub r0, r1, r12
+; CHECK-LE-NEXT: li r12, -5120
+; CHECK-LE-NEXT: add r0, r12, r0
+; CHECK-LE-NEXT: sub r12, r0, r1
+; CHECK-LE-NEXT: cmpdi r12, -4096
+; CHECK-LE-NEXT: bge cr0, .LBB10_2
+; CHECK-LE-NEXT: .LBB10_1:
+; CHECK-LE-NEXT: stdu r30, -4096(r1)
+; CHECK-LE-NEXT: addi r12, r12, 4096
+; CHECK-LE-NEXT: cmpdi r12, -4096
+; CHECK-LE-NEXT: blt cr0, .LBB10_1
; CHECK-LE-NEXT: .LBB10_2:
-; CHECK-LE-NEXT: li r0, -4096
-; CHECK-LE-NEXT: cmpd r1, r12
-; CHECK-LE-NEXT: beq cr0, .LBB10_4
-; CHECK-LE-NEXT: .LBB10_3:
-; CHECK-LE-NEXT: stdux r30, r1, r0
-; CHECK-LE-NEXT: cmpd r1, r12
-; CHECK-LE-NEXT: bne cr0, .LBB10_3
-; CHECK-LE-NEXT: .LBB10_4:
-; CHECK-LE-NEXT: mr r12, r30
-; CHECK-LE-NEXT: stdu r12, -1024(r1)
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
-; CHECK-LE-NEXT: .cfi_def_cfa_register r1
+; CHECK-LE-NEXT: stdux r30, r1, r12
+; CHECK-LE-NEXT: mr r0, r30
+; CHECK-LE-NEXT: .cfi_def_cfa_register r0
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
; CHECK-LE-NEXT: .cfi_offset r30, -16
; CHECK-LE-NEXT: addi r4, r1, 1024
;
; CHECK-BE-LABEL: f10:
; CHECK-BE: # %bb.0:
+; CHECK-BE-NEXT: clrldi r12, r1, 54
; CHECK-BE-NEXT: std r30, -16(r1)
; CHECK-BE-NEXT: mr r30, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r30, 0
-; CHECK-BE-NEXT: clrldi r0, r30, 54
-; CHECK-BE-NEXT: subc r12, r30, r0
-; CHECK-BE-NEXT: clrldi r0, r0, 52
-; CHECK-BE-NEXT: cmpdi r0, 0
-; CHECK-BE-NEXT: beq cr0, .LBB10_2
-; CHECK-BE-NEXT: # %bb.1:
-; CHECK-BE-NEXT: neg r0, r0
-; CHECK-BE-NEXT: stdux r30, r1, r0
+; CHECK-BE-NEXT: sub r0, r1, r12
+; CHECK-BE-NEXT: li r12, -5120
+; CHECK-BE-NEXT: add r0, r12, r0
+; CHECK-BE-NEXT: sub r12, r0, r1
+; CHECK-BE-NEXT: cmpdi r12, -4096
+; CHECK-BE-NEXT: bge cr0, .LBB10_2
+; CHECK-BE-NEXT: .LBB10_1:
+; CHECK-BE-NEXT: stdu r30, -4096(r1)
+; CHECK-BE-NEXT: addi r12, r12, 4096
+; CHECK-BE-NEXT: cmpdi r12, -4096
+; CHECK-BE-NEXT: blt cr0, .LBB10_1
; CHECK-BE-NEXT: .LBB10_2:
-; CHECK-BE-NEXT: li r0, -4096
-; CHECK-BE-NEXT: cmpd r1, r12
-; CHECK-BE-NEXT: beq cr0, .LBB10_4
-; CHECK-BE-NEXT: .LBB10_3:
-; CHECK-BE-NEXT: stdux r30, r1, r0
-; CHECK-BE-NEXT: cmpd r1, r12
-; CHECK-BE-NEXT: bne cr0, .LBB10_3
-; CHECK-BE-NEXT: .LBB10_4:
-; CHECK-BE-NEXT: mr r12, r30
-; CHECK-BE-NEXT: stdu r12, -1024(r1)
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
-; CHECK-BE-NEXT: .cfi_def_cfa_register r1
+; CHECK-BE-NEXT: stdux r30, r1, r12
+; CHECK-BE-NEXT: mr r0, r30
+; CHECK-BE-NEXT: .cfi_def_cfa_register r0
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
; CHECK-BE-NEXT: .cfi_offset r30, -16
; CHECK-BE-NEXT: addi r4, r1, 1024
;
; CHECK-32-LABEL: f10:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: clrlwi r0, r12, 22
-; CHECK-32-NEXT: subc r1, r1, r0
-; CHECK-32-NEXT: stwu r12, -1024(r1)
-; CHECK-32-NEXT: stwu r12, -4096(r1)
-; CHECK-32-NEXT: .cfi_def_cfa_register r1
+; CHECK-32-NEXT: clrlwi r12, r1, 22
; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: li r12, -5120
+; CHECK-32-NEXT: add r0, r12, r0
+; CHECK-32-NEXT: sub r12, r0, r1
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: cmpwi r12, -4096
+; CHECK-32-NEXT: bge cr0, .LBB10_2
+; CHECK-32-NEXT: .LBB10_1:
+; CHECK-32-NEXT: stwu r0, -4096(r1)
+; CHECK-32-NEXT: addi r12, r12, 4096
+; CHECK-32-NEXT: cmpwi r12, -4096
+; CHECK-32-NEXT: blt cr0, .LBB10_1
+; CHECK-32-NEXT: .LBB10_2:
+; CHECK-32-NEXT: stwux r0, r1, r12
+; CHECK-32-NEXT: .cfi_def_cfa_register r0
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: addic r0, r0, -8
; CHECK-32-NEXT: stwx r30, 0, r0
define void @f11(i32 %vla_size, i64 %i) #0 {
; CHECK-LE-LABEL: f11:
; CHECK-LE: # %bb.0:
+; CHECK-LE-NEXT: clrldi r12, r1, 49
; CHECK-LE-NEXT: std r31, -8(r1)
; CHECK-LE-NEXT: std r30, -16(r1)
; CHECK-LE-NEXT: mr r30, r1
-; CHECK-LE-NEXT: .cfi_def_cfa r30, 0
-; CHECK-LE-NEXT: clrldi r0, r30, 49
-; CHECK-LE-NEXT: subc r12, r30, r0
-; CHECK-LE-NEXT: clrldi r0, r0, 52
-; CHECK-LE-NEXT: cmpdi r0, 0
-; CHECK-LE-NEXT: beq cr0, .LBB11_2
-; CHECK-LE-NEXT: # %bb.1:
-; CHECK-LE-NEXT: neg r0, r0
-; CHECK-LE-NEXT: stdux r30, r1, r0
+; CHECK-LE-NEXT: sub r0, r1, r12
+; CHECK-LE-NEXT: lis r12, -2
+; CHECK-LE-NEXT: ori r12, r12, 32768
+; CHECK-LE-NEXT: add r0, r12, r0
+; CHECK-LE-NEXT: sub r12, r0, r1
+; CHECK-LE-NEXT: cmpdi r12, -4096
+; CHECK-LE-NEXT: bge cr0, .LBB11_2
+; CHECK-LE-NEXT: .LBB11_1:
+; CHECK-LE-NEXT: stdu r30, -4096(r1)
+; CHECK-LE-NEXT: addi r12, r12, 4096
+; CHECK-LE-NEXT: cmpdi r12, -4096
+; CHECK-LE-NEXT: blt cr0, .LBB11_1
; CHECK-LE-NEXT: .LBB11_2:
-; CHECK-LE-NEXT: li r0, -4096
-; CHECK-LE-NEXT: cmpd r1, r12
-; CHECK-LE-NEXT: beq cr0, .LBB11_4
-; CHECK-LE-NEXT: .LBB11_3:
-; CHECK-LE-NEXT: stdux r30, r1, r0
-; CHECK-LE-NEXT: cmpd r1, r12
-; CHECK-LE-NEXT: bne cr0, .LBB11_3
-; CHECK-LE-NEXT: .LBB11_4:
-; CHECK-LE-NEXT: mr r12, r30
-; CHECK-LE-NEXT: li r0, 24
-; CHECK-LE-NEXT: mtctr r0
-; CHECK-LE-NEXT: .LBB11_5:
-; CHECK-LE-NEXT: stdu r12, -4096(r1)
-; CHECK-LE-NEXT: bdnz .LBB11_5
-; CHECK-LE-NEXT: # %bb.6:
-; CHECK-LE-NEXT: .cfi_def_cfa_register r1
+; CHECK-LE-NEXT: stdux r30, r1, r12
+; CHECK-LE-NEXT: mr r0, r30
+; CHECK-LE-NEXT: .cfi_def_cfa_register r0
; CHECK-LE-NEXT: .cfi_def_cfa_register r30
; CHECK-LE-NEXT: .cfi_offset r31, -8
; CHECK-LE-NEXT: .cfi_offset r30, -16
; CHECK-LE-NEXT: add r4, r1, r7
; CHECK-LE-NEXT: stdux r3, r1, r5
; CHECK-LE-NEXT: cmpd r1, r4
-; CHECK-LE-NEXT: beq cr0, .LBB11_8
-; CHECK-LE-NEXT: .LBB11_7:
+; CHECK-LE-NEXT: beq cr0, .LBB11_4
+; CHECK-LE-NEXT: .LBB11_3:
; CHECK-LE-NEXT: stdu r3, -4096(r1)
; CHECK-LE-NEXT: cmpd r1, r4
-; CHECK-LE-NEXT: bne cr0, .LBB11_7
-; CHECK-LE-NEXT: .LBB11_8:
+; CHECK-LE-NEXT: bne cr0, .LBB11_3
+; CHECK-LE-NEXT: .LBB11_4:
; CHECK-LE-NEXT: addi r3, r1, -32768
; CHECK-LE-NEXT: lbz r3, 0(r3)
; CHECK-LE-NEXT: mr r1, r30
;
; CHECK-BE-LABEL: f11:
; CHECK-BE: # %bb.0:
+; CHECK-BE-NEXT: clrldi r12, r1, 49
; CHECK-BE-NEXT: std r31, -8(r1)
; CHECK-BE-NEXT: std r30, -16(r1)
; CHECK-BE-NEXT: mr r30, r1
-; CHECK-BE-NEXT: .cfi_def_cfa r30, 0
-; CHECK-BE-NEXT: clrldi r0, r30, 49
-; CHECK-BE-NEXT: subc r12, r30, r0
-; CHECK-BE-NEXT: clrldi r0, r0, 52
-; CHECK-BE-NEXT: cmpdi r0, 0
-; CHECK-BE-NEXT: beq cr0, .LBB11_2
-; CHECK-BE-NEXT: # %bb.1:
-; CHECK-BE-NEXT: neg r0, r0
-; CHECK-BE-NEXT: stdux r30, r1, r0
+; CHECK-BE-NEXT: sub r0, r1, r12
+; CHECK-BE-NEXT: lis r12, -2
+; CHECK-BE-NEXT: ori r12, r12, 32768
+; CHECK-BE-NEXT: add r0, r12, r0
+; CHECK-BE-NEXT: sub r12, r0, r1
+; CHECK-BE-NEXT: cmpdi r12, -4096
+; CHECK-BE-NEXT: bge cr0, .LBB11_2
+; CHECK-BE-NEXT: .LBB11_1:
+; CHECK-BE-NEXT: stdu r30, -4096(r1)
+; CHECK-BE-NEXT: addi r12, r12, 4096
+; CHECK-BE-NEXT: cmpdi r12, -4096
+; CHECK-BE-NEXT: blt cr0, .LBB11_1
; CHECK-BE-NEXT: .LBB11_2:
-; CHECK-BE-NEXT: li r0, -4096
-; CHECK-BE-NEXT: cmpd r1, r12
-; CHECK-BE-NEXT: beq cr0, .LBB11_4
-; CHECK-BE-NEXT: .LBB11_3:
-; CHECK-BE-NEXT: stdux r30, r1, r0
-; CHECK-BE-NEXT: cmpd r1, r12
-; CHECK-BE-NEXT: bne cr0, .LBB11_3
-; CHECK-BE-NEXT: .LBB11_4:
-; CHECK-BE-NEXT: mr r12, r30
-; CHECK-BE-NEXT: li r0, 24
-; CHECK-BE-NEXT: mtctr r0
-; CHECK-BE-NEXT: .LBB11_5:
-; CHECK-BE-NEXT: stdu r12, -4096(r1)
-; CHECK-BE-NEXT: bdnz .LBB11_5
-; CHECK-BE-NEXT: # %bb.6:
-; CHECK-BE-NEXT: .cfi_def_cfa_register r1
+; CHECK-BE-NEXT: stdux r30, r1, r12
+; CHECK-BE-NEXT: mr r0, r30
+; CHECK-BE-NEXT: .cfi_def_cfa_register r0
; CHECK-BE-NEXT: .cfi_def_cfa_register r30
; CHECK-BE-NEXT: .cfi_offset r31, -8
; CHECK-BE-NEXT: .cfi_offset r30, -16
; CHECK-BE-NEXT: add r4, r1, r7
; CHECK-BE-NEXT: stdux r3, r1, r5
; CHECK-BE-NEXT: cmpd r1, r4
-; CHECK-BE-NEXT: beq cr0, .LBB11_8
-; CHECK-BE-NEXT: .LBB11_7:
+; CHECK-BE-NEXT: beq cr0, .LBB11_4
+; CHECK-BE-NEXT: .LBB11_3:
; CHECK-BE-NEXT: stdu r3, -4096(r1)
; CHECK-BE-NEXT: cmpd r1, r4
-; CHECK-BE-NEXT: bne cr0, .LBB11_7
-; CHECK-BE-NEXT: .LBB11_8:
+; CHECK-BE-NEXT: bne cr0, .LBB11_3
+; CHECK-BE-NEXT: .LBB11_4:
; CHECK-BE-NEXT: addi r3, r1, -32768
; CHECK-BE-NEXT: lbz r3, 0(r3)
; CHECK-BE-NEXT: mr r1, r30
;
; CHECK-32-LABEL: f11:
; CHECK-32: # %bb.0:
-; CHECK-32-NEXT: mr r12, r1
-; CHECK-32-NEXT: .cfi_def_cfa r12, 0
-; CHECK-32-NEXT: clrlwi r0, r12, 17
-; CHECK-32-NEXT: subc r1, r1, r0
-; CHECK-32-NEXT: li r0, 24
-; CHECK-32-NEXT: mtctr r0
-; CHECK-32-NEXT: .LBB11_1:
-; CHECK-32-NEXT: stwu r12, -4096(r1)
-; CHECK-32-NEXT: bdnz .LBB11_1
-; CHECK-32-NEXT: # %bb.2:
-; CHECK-32-NEXT: .cfi_def_cfa_register r1
+; CHECK-32-NEXT: clrlwi r12, r1, 17
; CHECK-32-NEXT: sub r0, r1, r12
+; CHECK-32-NEXT: lis r12, -2
+; CHECK-32-NEXT: ori r12, r12, 32768
+; CHECK-32-NEXT: add r0, r12, r0
+; CHECK-32-NEXT: sub r12, r0, r1
+; CHECK-32-NEXT: mr r0, r1
+; CHECK-32-NEXT: cmpwi r12, -4096
+; CHECK-32-NEXT: bge cr0, .LBB11_2
+; CHECK-32-NEXT: .LBB11_1:
+; CHECK-32-NEXT: stwu r0, -4096(r1)
+; CHECK-32-NEXT: addi r12, r12, 4096
+; CHECK-32-NEXT: cmpwi r12, -4096
+; CHECK-32-NEXT: blt cr0, .LBB11_1
+; CHECK-32-NEXT: .LBB11_2:
+; CHECK-32-NEXT: stwux r0, r1, r12
+; CHECK-32-NEXT: .cfi_def_cfa_register r0
+; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: sub r0, r1, r0
; CHECK-32-NEXT: addic r0, r0, -4
; CHECK-32-NEXT: stwx r31, 0, r0