return Model == TLSModel::GeneralDynamic || Model == TLSModel::LocalDynamic;
}
-bool PPCTTIImpl::mightUseCTR(BasicBlock *BB, TargetLibraryInfo *LibInfo,
- SmallPtrSetImpl<const Value *> &Visited) {
- const PPCTargetMachine &TM = ST->getTargetMachine();
-
- // Loop through the inline asm constraints and look for something that
- // clobbers ctr.
- auto asmClobbersCTR = [](InlineAsm *IA) {
- InlineAsm::ConstraintInfoVector CIV = IA->ParseConstraints();
- for (const InlineAsm::ConstraintInfo &C : CIV) {
- if (C.Type != InlineAsm::isInput)
- for (const auto &Code : C.Codes)
- if (StringRef(Code).equals_insensitive("{ctr}"))
- return true;
- }
- return false;
- };
-
- auto isLargeIntegerTy = [](bool Is32Bit, Type *Ty) {
- if (IntegerType *ITy = dyn_cast<IntegerType>(Ty))
- return ITy->getBitWidth() > (Is32Bit ? 32U : 64U);
-
- return false;
- };
-
- auto supportedHalfPrecisionOp = [](Instruction *Inst) {
- switch (Inst->getOpcode()) {
- default:
- return false;
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::Load:
- case Instruction::Store:
- case Instruction::FPToUI:
- case Instruction::UIToFP:
- case Instruction::FPToSI:
- case Instruction::SIToFP:
- return true;
- }
- };
-
- for (BasicBlock::iterator J = BB->begin(), JE = BB->end();
- J != JE; ++J) {
- // There are no direct operations on half precision so assume that
- // anything with that type requires a call except for a few select
- // operations with Power9.
- if (Instruction *CurrInst = dyn_cast<Instruction>(J)) {
- for (const auto &Op : CurrInst->operands()) {
- if (Op->getType()->getScalarType()->isHalfTy() ||
- CurrInst->getType()->getScalarType()->isHalfTy())
- return !(ST->isISA3_0() && supportedHalfPrecisionOp(CurrInst));
- }
- }
- if (CallInst *CI = dyn_cast<CallInst>(J)) {
- // Inline ASM is okay, unless it clobbers the ctr register.
- if (InlineAsm *IA = dyn_cast<InlineAsm>(CI->getCalledOperand())) {
- if (asmClobbersCTR(IA))
- return true;
- continue;
- }
-
- if (Function *F = CI->getCalledFunction()) {
- // Most intrinsics don't become function calls, but some might.
- // sin, cos, exp and log are always calls.
- unsigned Opcode = 0;
- if (F->getIntrinsicID() != Intrinsic::not_intrinsic) {
- switch (F->getIntrinsicID()) {
- default: continue;
- // If we have a call to loop_decrement or set_loop_iterations,
- // we're definitely using CTR.
- case Intrinsic::set_loop_iterations:
- case Intrinsic::loop_decrement:
- return true;
-
- // Binary operations on 128-bit value will use CTR.
- case Intrinsic::experimental_constrained_fadd:
- case Intrinsic::experimental_constrained_fsub:
- case Intrinsic::experimental_constrained_fmul:
- case Intrinsic::experimental_constrained_fdiv:
- case Intrinsic::experimental_constrained_frem:
- if (F->getType()->getScalarType()->isFP128Ty() ||
- F->getType()->getScalarType()->isPPC_FP128Ty())
- return true;
- break;
-
- case Intrinsic::experimental_constrained_fptosi:
- case Intrinsic::experimental_constrained_fptoui:
- case Intrinsic::experimental_constrained_sitofp:
- case Intrinsic::experimental_constrained_uitofp: {
- Type *SrcType = CI->getArgOperand(0)->getType()->getScalarType();
- Type *DstType = CI->getType()->getScalarType();
- if (SrcType->isPPC_FP128Ty() || DstType->isPPC_FP128Ty() ||
- isLargeIntegerTy(!TM.isPPC64(), SrcType) ||
- isLargeIntegerTy(!TM.isPPC64(), DstType))
- return true;
- break;
- }
-
- // Exclude eh_sjlj_setjmp; we don't need to exclude eh_sjlj_longjmp
- // because, although it does clobber the counter register, the
- // control can't then return to inside the loop unless there is also
- // an eh_sjlj_setjmp.
- case Intrinsic::eh_sjlj_setjmp:
-
- case Intrinsic::memcpy:
- case Intrinsic::memmove:
- case Intrinsic::memset:
- case Intrinsic::powi:
- case Intrinsic::log:
- case Intrinsic::log2:
- case Intrinsic::log10:
- case Intrinsic::exp:
- case Intrinsic::exp2:
- case Intrinsic::pow:
- case Intrinsic::sin:
- case Intrinsic::cos:
- case Intrinsic::experimental_constrained_powi:
- case Intrinsic::experimental_constrained_log:
- case Intrinsic::experimental_constrained_log2:
- case Intrinsic::experimental_constrained_log10:
- case Intrinsic::experimental_constrained_exp:
- case Intrinsic::experimental_constrained_exp2:
- case Intrinsic::experimental_constrained_pow:
- case Intrinsic::experimental_constrained_sin:
- case Intrinsic::experimental_constrained_cos:
- return true;
- case Intrinsic::copysign:
- if (CI->getArgOperand(0)->getType()->getScalarType()->
- isPPC_FP128Ty())
- return true;
- else
- continue; // ISD::FCOPYSIGN is never a library call.
- case Intrinsic::fmuladd:
- case Intrinsic::fma: Opcode = ISD::FMA; break;
- case Intrinsic::sqrt: Opcode = ISD::FSQRT; break;
- case Intrinsic::floor: Opcode = ISD::FFLOOR; break;
- case Intrinsic::ceil: Opcode = ISD::FCEIL; break;
- case Intrinsic::trunc: Opcode = ISD::FTRUNC; break;
- case Intrinsic::rint: Opcode = ISD::FRINT; break;
- case Intrinsic::lrint: Opcode = ISD::LRINT; break;
- case Intrinsic::llrint: Opcode = ISD::LLRINT; break;
- case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break;
- case Intrinsic::round: Opcode = ISD::FROUND; break;
- case Intrinsic::lround: Opcode = ISD::LROUND; break;
- case Intrinsic::llround: Opcode = ISD::LLROUND; break;
- case Intrinsic::minnum: Opcode = ISD::FMINNUM; break;
- case Intrinsic::maxnum: Opcode = ISD::FMAXNUM; break;
- case Intrinsic::experimental_constrained_fcmp:
- Opcode = ISD::STRICT_FSETCC;
- break;
- case Intrinsic::experimental_constrained_fcmps:
- Opcode = ISD::STRICT_FSETCCS;
- break;
- case Intrinsic::experimental_constrained_fma:
- Opcode = ISD::STRICT_FMA;
- break;
- case Intrinsic::experimental_constrained_sqrt:
- Opcode = ISD::STRICT_FSQRT;
- break;
- case Intrinsic::experimental_constrained_floor:
- Opcode = ISD::STRICT_FFLOOR;
- break;
- case Intrinsic::experimental_constrained_ceil:
- Opcode = ISD::STRICT_FCEIL;
- break;
- case Intrinsic::experimental_constrained_trunc:
- Opcode = ISD::STRICT_FTRUNC;
- break;
- case Intrinsic::experimental_constrained_rint:
- Opcode = ISD::STRICT_FRINT;
- break;
- case Intrinsic::experimental_constrained_lrint:
- Opcode = ISD::STRICT_LRINT;
- break;
- case Intrinsic::experimental_constrained_llrint:
- Opcode = ISD::STRICT_LLRINT;
- break;
- case Intrinsic::experimental_constrained_nearbyint:
- Opcode = ISD::STRICT_FNEARBYINT;
- break;
- case Intrinsic::experimental_constrained_round:
- Opcode = ISD::STRICT_FROUND;
- break;
- case Intrinsic::experimental_constrained_lround:
- Opcode = ISD::STRICT_LROUND;
- break;
- case Intrinsic::experimental_constrained_llround:
- Opcode = ISD::STRICT_LLROUND;
- break;
- case Intrinsic::experimental_constrained_minnum:
- Opcode = ISD::STRICT_FMINNUM;
- break;
- case Intrinsic::experimental_constrained_maxnum:
- Opcode = ISD::STRICT_FMAXNUM;
- break;
- case Intrinsic::umul_with_overflow: Opcode = ISD::UMULO; break;
- case Intrinsic::smul_with_overflow: Opcode = ISD::SMULO; break;
- }
- }
-
- // PowerPC does not use [US]DIVREM or other library calls for
- // operations on regular types which are not otherwise library calls
- // (i.e. soft float or atomics). If adapting for targets that do,
- // additional care is required here.
-
- LibFunc Func;
- if (!F->hasLocalLinkage() && F->hasName() && LibInfo &&
- LibInfo->getLibFunc(F->getName(), Func) &&
- LibInfo->hasOptimizedCodeGen(Func)) {
- // Non-read-only functions are never treated as intrinsics.
- if (!CI->onlyReadsMemory())
- return true;
-
- // Conversion happens only for FP calls.
- if (!CI->getArgOperand(0)->getType()->isFloatingPointTy())
- return true;
-
- switch (Func) {
- default: return true;
- case LibFunc_copysign:
- case LibFunc_copysignf:
- continue; // ISD::FCOPYSIGN is never a library call.
- case LibFunc_copysignl:
- return true;
- case LibFunc_fabs:
- case LibFunc_fabsf:
- case LibFunc_fabsl:
- continue; // ISD::FABS is never a library call.
- case LibFunc_sqrt:
- case LibFunc_sqrtf:
- case LibFunc_sqrtl:
- Opcode = ISD::FSQRT; break;
- case LibFunc_floor:
- case LibFunc_floorf:
- case LibFunc_floorl:
- Opcode = ISD::FFLOOR; break;
- case LibFunc_nearbyint:
- case LibFunc_nearbyintf:
- case LibFunc_nearbyintl:
- Opcode = ISD::FNEARBYINT; break;
- case LibFunc_ceil:
- case LibFunc_ceilf:
- case LibFunc_ceill:
- Opcode = ISD::FCEIL; break;
- case LibFunc_rint:
- case LibFunc_rintf:
- case LibFunc_rintl:
- Opcode = ISD::FRINT; break;
- case LibFunc_round:
- case LibFunc_roundf:
- case LibFunc_roundl:
- Opcode = ISD::FROUND; break;
- case LibFunc_trunc:
- case LibFunc_truncf:
- case LibFunc_truncl:
- Opcode = ISD::FTRUNC; break;
- case LibFunc_fmin:
- case LibFunc_fminf:
- case LibFunc_fminl:
- Opcode = ISD::FMINNUM; break;
- case LibFunc_fmax:
- case LibFunc_fmaxf:
- case LibFunc_fmaxl:
- Opcode = ISD::FMAXNUM; break;
- }
- }
-
- if (Opcode) {
- EVT EVTy =
- TLI->getValueType(DL, CI->getArgOperand(0)->getType(), true);
-
- if (EVTy == MVT::Other)
- return true;
-
- if (TLI->isOperationLegalOrCustom(Opcode, EVTy))
- continue;
- else if (EVTy.isVector() &&
- TLI->isOperationLegalOrCustom(Opcode, EVTy.getScalarType()))
- continue;
-
- return true;
- }
- }
-
- return true;
- } else if ((J->getType()->getScalarType()->isFP128Ty() ||
- J->getType()->getScalarType()->isPPC_FP128Ty())) {
- // Most operations on f128 or ppc_f128 values become calls.
- return true;
- } else if (isa<FCmpInst>(J) &&
- J->getOperand(0)->getType()->getScalarType()->isFP128Ty()) {
- return true;
- } else if ((isa<FPTruncInst>(J) || isa<FPExtInst>(J)) &&
- (cast<CastInst>(J)->getSrcTy()->getScalarType()->isFP128Ty() ||
- cast<CastInst>(J)->getDestTy()->getScalarType()->isFP128Ty())) {
- return true;
- } else if (isa<UIToFPInst>(J) || isa<SIToFPInst>(J) ||
- isa<FPToUIInst>(J) || isa<FPToSIInst>(J)) {
- CastInst *CI = cast<CastInst>(J);
- if (CI->getSrcTy()->getScalarType()->isPPC_FP128Ty() ||
- CI->getDestTy()->getScalarType()->isPPC_FP128Ty() ||
- isLargeIntegerTy(!TM.isPPC64(), CI->getSrcTy()->getScalarType()) ||
- isLargeIntegerTy(!TM.isPPC64(), CI->getDestTy()->getScalarType()))
- return true;
- } else if (isLargeIntegerTy(!TM.isPPC64(),
- J->getType()->getScalarType()) &&
- (J->getOpcode() == Instruction::UDiv ||
- J->getOpcode() == Instruction::SDiv ||
- J->getOpcode() == Instruction::URem ||
- J->getOpcode() == Instruction::SRem)) {
- return true;
- } else if (!TM.isPPC64() &&
- isLargeIntegerTy(false, J->getType()->getScalarType()) &&
- (J->getOpcode() == Instruction::Shl ||
- J->getOpcode() == Instruction::AShr ||
- J->getOpcode() == Instruction::LShr)) {
- // Only on PPC32, for 128-bit integers (specifically not 64-bit
- // integers), these might be runtime calls.
- return true;
- } else if (isa<IndirectBrInst>(J) || isa<InvokeInst>(J)) {
- // On PowerPC, indirect jumps use the counter register.
- return true;
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(J)) {
- if (SI->getNumCases() + 1 >= (unsigned)TLI->getMinimumJumpTableEntries())
- return true;
- }
-
- // FREM is always a call.
- if (J->getOpcode() == Instruction::FRem)
- return true;
-
- if (ST->useSoftFloat()) {
- switch(J->getOpcode()) {
- case Instruction::FAdd:
- case Instruction::FSub:
- case Instruction::FMul:
- case Instruction::FDiv:
- case Instruction::FPTrunc:
- case Instruction::FPExt:
- case Instruction::FPToUI:
- case Instruction::FPToSI:
- case Instruction::UIToFP:
- case Instruction::SIToFP:
- case Instruction::FCmp:
- return true;
- }
- }
-
- for (Value *Operand : J->operands())
- if (memAddrUsesCTR(Operand, TM, Visited))
- return true;
- }
-
- return false;
-}
-
bool PPCTTIImpl::isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
return false;
}
- // We don't want to spill/restore the counter register, and so we don't
- // want to use the counter register if the loop contains calls.
- SmallPtrSet<const Value *, 4> Visited;
- for (Loop::block_iterator I = L->block_begin(), IE = L->block_end();
- I != IE; ++I)
- if (mightUseCTR(*I, LibInfo, Visited))
- return false;
-
SmallVector<BasicBlock*, 4> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
// address for that value will be computed in the loop.
SmallVector<BasicBlock *, 4> ExitBlocks;
L->getExitBlocks(ExitBlocks);
+ SmallPtrSet<const Value *, 4> Visited;
for (auto &BB : ExitBlocks) {
for (auto &PHI : BB->phis()) {
for (int Idx = 0, EndIdx = PHI.getNumIncomingValues(); Idx < EndIdx;
; The last (whichever it is) should have a fallthrough exit, and the other three
; need an unconditional branch. No other block should have an unconditional
; branch to cond_next48
+; One of the blocks ends up with a loop exit block that gets a tail-duplicated copy
+; of %cond_next48, so there should only be two unconditional branches.
-;CHECK: .LBB0_7: # %cond_next48
-;CHECK: b .LBB0_7
-;CHECK: b .LBB0_7
-;CHECK: b .LBB0_7
+;CHECK: b .LBB0_13
+;CHECK: b .LBB0_13
+;CHECK-NOT: b .LBB0_13
+;CHECK: .LBB0_13: # %cond_next48
define void @foo(i32 %W, i32 %X, i32 %Y, i32 %Z) {
entry:
; CHECK-NEXT: std 29, -24(1) # 8-byte Folded Spill
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: stdu 1, -64(1)
-; CHECK-NEXT: li 30, 0
+; CHECK-NEXT: li 30, 255
; CHECK-NEXT: addi 29, 3, -8
; CHECK-NEXT: std 0, 80(1)
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: lfdu 1, 8(29)
; CHECK-NEXT: bl cos
; CHECK-NEXT: nop
-; CHECK-NEXT: addi 30, 30, 8
+; CHECK-NEXT: addi 30, 30, -1
; CHECK-NEXT: stfd 1, 0(29)
-; CHECK-NEXT: cmpldi 30, 2040
-; CHECK-NEXT: bne 0, .LBB0_1
+; CHECK-NEXT: cmpldi 30, 0
+; CHECK-NEXT: bc 12, 1, .LBB0_1
; CHECK-NEXT: # %bb.2: # %exit
; CHECK-NEXT: addi 1, 1, 64
; CHECK-NEXT: ld 0, 16(1)
define void @fmul_ctrloop_fp128() nounwind {
; PWR9-LABEL: fmul_ctrloop_fp128:
; PWR9: # %bb.0: # %entry
-; PWR9-NEXT: addis 5, 2, a@toc@ha
+; PWR9-NEXT: addis 3, 2, a@toc@ha
+; PWR9-NEXT: li 4, 4
+; PWR9-NEXT: addi 3, 3, a@toc@l
+; PWR9-NEXT: lxv 34, 0(3)
; PWR9-NEXT: addis 3, 2, y@toc@ha
+; PWR9-NEXT: mtctr 4
; PWR9-NEXT: addis 4, 2, x@toc@ha
-; PWR9-NEXT: addi 5, 5, a@toc@l
; PWR9-NEXT: addi 3, 3, y@toc@l
; PWR9-NEXT: addi 4, 4, x@toc@l
-; PWR9-NEXT: lxv 34, 0(5)
; PWR9-NEXT: addi 3, 3, -16
; PWR9-NEXT: addi 4, 4, -16
-; PWR9-NEXT: li 5, 0
; PWR9-NEXT: .p2align 5
; PWR9-NEXT: .LBB0_1: # %for.body
; PWR9-NEXT: #
; PWR9-NEXT: lxv 35, 16(4)
-; PWR9-NEXT: addi 5, 5, 16
; PWR9-NEXT: addi 4, 4, 16
-; PWR9-NEXT: cmpldi 5, 64
; PWR9-NEXT: xsmulqp 3, 2, 3
; PWR9-NEXT: stxv 35, 16(3)
; PWR9-NEXT: addi 3, 3, 16
-; PWR9-NEXT: bne 0, .LBB0_1
+; PWR9-NEXT: bdnz .LBB0_1
; PWR9-NEXT: # %bb.2: # %for.end
; PWR9-NEXT: blr
;
; PWR8-NEXT: std 28, 80(1) # 8-byte Folded Spill
; PWR8-NEXT: std 29, 88(1) # 8-byte Folded Spill
; PWR8-NEXT: std 30, 96(1) # 8-byte Folded Spill
-; PWR8-NEXT: li 30, 0
+; PWR8-NEXT: li 30, 4
; PWR8-NEXT: addi 4, 4, x@toc@l
; PWR8-NEXT: li 29, 16
; PWR8-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
; PWR8-NEXT: bl __mulkf3
; PWR8-NEXT: nop
; PWR8-NEXT: xxswapd 0, 34
-; PWR8-NEXT: addi 30, 30, 16
+; PWR8-NEXT: addi 30, 30, -1
; PWR8-NEXT: mr 3, 26
-; PWR8-NEXT: cmpldi 30, 64
+; PWR8-NEXT: cmpldi 30, 0
; PWR8-NEXT: stxvd2x 0, 28, 29
; PWR8-NEXT: mr 28, 27
-; PWR8-NEXT: bne 0, .LBB0_1
+; PWR8-NEXT: bc 12, 1, .LBB0_1
; PWR8-NEXT: # %bb.2: # %for.end
; PWR8-NEXT: li 3, 48
; PWR8-NEXT: ld 30, 96(1) # 8-byte Folded Reload
define void @fpext_ctrloop_fp128(ptr %a) nounwind {
; PWR9-LABEL: fpext_ctrloop_fp128:
; PWR9: # %bb.0: # %entry
-; PWR9-NEXT: addis 4, 2, y@toc@ha
+; PWR9-NEXT: li 4, 4
; PWR9-NEXT: addi 3, 3, -8
+; PWR9-NEXT: mtctr 4
+; PWR9-NEXT: addis 4, 2, y@toc@ha
; PWR9-NEXT: addi 4, 4, y@toc@l
-; PWR9-NEXT: addi 5, 4, -16
-; PWR9-NEXT: li 4, 0
+; PWR9-NEXT: addi 4, 4, -16
; PWR9-NEXT: .p2align 5
; PWR9-NEXT: .LBB1_1: # %for.body
; PWR9-NEXT: #
; PWR9-NEXT: lfdu 0, 8(3)
-; PWR9-NEXT: addi 4, 4, 8
-; PWR9-NEXT: cmpldi 4, 32
; PWR9-NEXT: xscpsgndp 34, 0, 0
; PWR9-NEXT: xscvdpqp 2, 2
-; PWR9-NEXT: stxv 34, 16(5)
-; PWR9-NEXT: addi 5, 5, 16
-; PWR9-NEXT: bne 0, .LBB1_1
+; PWR9-NEXT: stxv 34, 16(4)
+; PWR9-NEXT: addi 4, 4, 16
+; PWR9-NEXT: bdnz .LBB1_1
; PWR9-NEXT: # %bb.2: # %for.end
; PWR9-NEXT: blr
;
; PWR8-NEXT: stdu 1, -64(1)
; PWR8-NEXT: addis 4, 2, y@toc@ha
; PWR8-NEXT: addi 30, 3, -8
-; PWR8-NEXT: li 28, 0
+; PWR8-NEXT: li 29, 4
; PWR8-NEXT: std 0, 80(1)
; PWR8-NEXT: addi 4, 4, y@toc@l
-; PWR8-NEXT: addi 29, 4, -16
+; PWR8-NEXT: addi 28, 4, -16
; PWR8-NEXT: .p2align 4
; PWR8-NEXT: .LBB1_1: # %for.body
; PWR8-NEXT: #
; PWR8-NEXT: lfdu 1, 8(30)
-; PWR8-NEXT: addi 29, 29, 16
+; PWR8-NEXT: addi 28, 28, 16
; PWR8-NEXT: bl __extenddfkf2
; PWR8-NEXT: nop
; PWR8-NEXT: xxswapd 0, 34
-; PWR8-NEXT: addi 28, 28, 8
-; PWR8-NEXT: cmpldi 28, 32
-; PWR8-NEXT: stxvd2x 0, 0, 29
-; PWR8-NEXT: bne 0, .LBB1_1
+; PWR8-NEXT: addi 29, 29, -1
+; PWR8-NEXT: cmpldi 29, 0
+; PWR8-NEXT: stxvd2x 0, 0, 28
+; PWR8-NEXT: bc 12, 1, .LBB1_1
; PWR8-NEXT: # %bb.2: # %for.end
; PWR8-NEXT: addi 1, 1, 64
; PWR8-NEXT: ld 0, 16(1)
define void @fptrunc_ctrloop_fp128(ptr %a) nounwind {
; PWR9-LABEL: fptrunc_ctrloop_fp128:
; PWR9: # %bb.0: # %entry
-; PWR9-NEXT: addis 4, 2, x@toc@ha
+; PWR9-NEXT: li 4, 4
; PWR9-NEXT: addi 3, 3, -8
-; PWR9-NEXT: li 5, 0
+; PWR9-NEXT: mtctr 4
+; PWR9-NEXT: addis 4, 2, x@toc@ha
; PWR9-NEXT: addi 4, 4, x@toc@l
; PWR9-NEXT: addi 4, 4, -16
; PWR9-NEXT: .p2align 5
; PWR9-NEXT: .LBB2_1: # %for.body
; PWR9-NEXT: #
; PWR9-NEXT: lxv 34, 16(4)
-; PWR9-NEXT: addi 5, 5, 8
; PWR9-NEXT: addi 4, 4, 16
-; PWR9-NEXT: cmpldi 5, 32
; PWR9-NEXT: xscvqpdp 2, 2
; PWR9-NEXT: xscpsgndp 0, 34, 34
; PWR9-NEXT: stfdu 0, 8(3)
-; PWR9-NEXT: bne 0, .LBB2_1
+; PWR9-NEXT: bdnz .LBB2_1
; PWR9-NEXT: # %bb.2: # %for.end
; PWR9-NEXT: blr
;
; PWR8-NEXT: stdu 1, -64(1)
; PWR8-NEXT: addis 4, 2, x@toc@ha
; PWR8-NEXT: addi 30, 3, -8
-; PWR8-NEXT: li 28, 0
+; PWR8-NEXT: li 29, 4
; PWR8-NEXT: std 0, 80(1)
; PWR8-NEXT: addi 4, 4, x@toc@l
-; PWR8-NEXT: addi 29, 4, -16
+; PWR8-NEXT: addi 28, 4, -16
; PWR8-NEXT: .p2align 4
; PWR8-NEXT: .LBB2_1: # %for.body
; PWR8-NEXT: #
-; PWR8-NEXT: addi 29, 29, 16
-; PWR8-NEXT: lxvd2x 0, 0, 29
+; PWR8-NEXT: addi 28, 28, 16
+; PWR8-NEXT: lxvd2x 0, 0, 28
; PWR8-NEXT: xxswapd 34, 0
; PWR8-NEXT: bl __trunckfdf2
; PWR8-NEXT: nop
-; PWR8-NEXT: addi 28, 28, 8
+; PWR8-NEXT: addi 29, 29, -1
; PWR8-NEXT: stfdu 1, 8(30)
-; PWR8-NEXT: cmpldi 28, 32
-; PWR8-NEXT: bne 0, .LBB2_1
+; PWR8-NEXT: cmpldi 29, 0
+; PWR8-NEXT: bc 12, 1, .LBB2_1
; PWR8-NEXT: # %bb.2: # %for.end
; PWR8-NEXT: addi 1, 1, 64
; PWR8-NEXT: ld 0, 16(1)
; CHECK-LABEL: foo1:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stwu 1, -48(1)
-; CHECK-NEXT: li 6, 2048
-; CHECK-NEXT: li 7, 0
; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill
+; CHECK-NEXT: li 6, 2048
; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT: mtctr 6
+; CHECK-NEXT: li 6, 0
; CHECK-NEXT: .LBB0_1: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwz 10, 12(5)
-; CHECK-NEXT: addi 6, 6, -1
-; CHECK-NEXT: lwz 12, 12(4)
-; CHECK-NEXT: lwz 11, 8(4)
-; CHECK-NEXT: subfic 0, 10, 96
-; CHECK-NEXT: lwz 8, 4(4)
-; CHECK-NEXT: addi 30, 10, -64
-; CHECK-NEXT: lwz 9, 0(4)
-; CHECK-NEXT: cmplwi 10, 64
-; CHECK-NEXT: slw 25, 12, 10
-; CHECK-NEXT: addi 29, 10, -96
-; CHECK-NEXT: subfic 27, 10, 32
-; CHECK-NEXT: srw 0, 12, 0
-; CHECK-NEXT: slw 24, 11, 30
+; CHECK-NEXT: lwz 9, 12(5)
+; CHECK-NEXT: lwz 10, 8(4)
+; CHECK-NEXT: lwz 11, 12(4)
+; CHECK-NEXT: subfic 12, 9, 96
+; CHECK-NEXT: lwz 7, 4(4)
+; CHECK-NEXT: addi 0, 9, -64
+; CHECK-NEXT: lwz 8, 0(4)
+; CHECK-NEXT: subfic 28, 9, 32
+; CHECK-NEXT: cmplwi 9, 64
+; CHECK-NEXT: slw 26, 11, 9
+; CHECK-NEXT: srw 12, 11, 12
+; CHECK-NEXT: slw 25, 10, 0
+; CHECK-NEXT: addi 30, 9, -96
+; CHECK-NEXT: slw 29, 8, 9
+; CHECK-NEXT: or 12, 25, 12
+; CHECK-NEXT: srw 25, 7, 28
; CHECK-NEXT: bc 12, 0, .LBB0_3
; CHECK-NEXT: # %bb.2: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 25, 7, 0
+; CHECK-NEXT: ori 26, 6, 0
; CHECK-NEXT: b .LBB0_3
; CHECK-NEXT: .LBB0_3: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: slw 28, 9, 10
-; CHECK-NEXT: or 0, 24, 0
-; CHECK-NEXT: srw 24, 8, 27
-; CHECK-NEXT: stw 25, 12(3)
-; CHECK-NEXT: subfic 25, 10, 64
-; CHECK-NEXT: slw 29, 12, 29
-; CHECK-NEXT: slw 26, 11, 10
-; CHECK-NEXT: or 28, 28, 24
-; CHECK-NEXT: srw 24, 12, 27
-; CHECK-NEXT: or 29, 0, 29
-; CHECK-NEXT: subfic 0, 25, 32
-; CHECK-NEXT: or 26, 26, 24
-; CHECK-NEXT: addi 24, 10, -32
-; CHECK-NEXT: srw 27, 11, 27
-; CHECK-NEXT: slw 0, 11, 0
-; CHECK-NEXT: srw 11, 11, 25
-; CHECK-NEXT: srw 25, 12, 25
-; CHECK-NEXT: slw 30, 12, 30
-; CHECK-NEXT: slw 12, 12, 24
-; CHECK-NEXT: slw 24, 8, 24
-; CHECK-NEXT: or 0, 25, 0
-; CHECK-NEXT: or 28, 28, 24
-; CHECK-NEXT: cmplwi 1, 10, 0
-; CHECK-NEXT: slw 10, 8, 10
-; CHECK-NEXT: or 0, 0, 27
-; CHECK-NEXT: or 11, 28, 11
-; CHECK-NEXT: or 10, 10, 0
-; CHECK-NEXT: or 12, 26, 12
+; CHECK-NEXT: slw 27, 10, 9
+; CHECK-NEXT: or 29, 29, 25
+; CHECK-NEXT: srw 25, 11, 28
+; CHECK-NEXT: stw 26, 12(3)
+; CHECK-NEXT: subfic 26, 9, 64
+; CHECK-NEXT: slw 30, 11, 30
+; CHECK-NEXT: or 27, 27, 25
+; CHECK-NEXT: addi 25, 9, -32
+; CHECK-NEXT: or 12, 12, 30
+; CHECK-NEXT: subfic 30, 26, 32
+; CHECK-NEXT: srw 28, 10, 28
+; CHECK-NEXT: slw 30, 10, 30
+; CHECK-NEXT: srw 10, 10, 26
+; CHECK-NEXT: srw 26, 11, 26
+; CHECK-NEXT: slw 24, 11, 0
+; CHECK-NEXT: slw 0, 7, 25
+; CHECK-NEXT: or 0, 29, 0
+; CHECK-NEXT: or 30, 26, 30
+; CHECK-NEXT: cmplwi 1, 9, 0
+; CHECK-NEXT: slw 9, 7, 9
+; CHECK-NEXT: or 10, 0, 10
+; CHECK-NEXT: or 0, 30, 28
+; CHECK-NEXT: slw 11, 11, 25
+; CHECK-NEXT: or 9, 9, 0
+; CHECK-NEXT: or 11, 27, 11
; CHECK-NEXT: bc 12, 0, .LBB0_5
; CHECK-NEXT: # %bb.4: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 11, 29, 0
-; CHECK-NEXT: ori 10, 30, 0
-; CHECK-NEXT: ori 12, 7, 0
+; CHECK-NEXT: ori 10, 12, 0
+; CHECK-NEXT: ori 9, 24, 0
+; CHECK-NEXT: ori 11, 6, 0
; CHECK-NEXT: b .LBB0_5
; CHECK-NEXT: .LBB0_5: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: cmplwi 6, 0
; CHECK-NEXT: bc 12, 6, .LBB0_7
; CHECK-NEXT: # %bb.6: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 9, 11, 0
; CHECK-NEXT: ori 8, 10, 0
+; CHECK-NEXT: ori 7, 9, 0
; CHECK-NEXT: b .LBB0_7
; CHECK-NEXT: .LBB0_7: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: stw 12, 8(3)
-; CHECK-NEXT: stw 9, 0(3)
-; CHECK-NEXT: stw 8, 4(3)
-; CHECK-NEXT: bne 0, .LBB0_1
+; CHECK-NEXT: stw 11, 8(3)
+; CHECK-NEXT: stw 8, 0(3)
+; CHECK-NEXT: stw 7, 4(3)
+; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.8: # %for.end
; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload
; CHECK-LABEL: foo2:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stwu 1, -48(1)
-; CHECK-NEXT: li 6, 2048
-; CHECK-NEXT: stw 23, 12(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill
+; CHECK-NEXT: li 6, 2048
; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT: mtctr 6
; CHECK-NEXT: .LBB1_1: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwz 9, 12(5)
-; CHECK-NEXT: addi 6, 6, -1
-; CHECK-NEXT: lwz 10, 4(4)
-; CHECK-NEXT: lwz 11, 0(4)
-; CHECK-NEXT: subfic 12, 9, 96
-; CHECK-NEXT: lwz 7, 8(4)
-; CHECK-NEXT: addi 0, 9, -64
-; CHECK-NEXT: lwz 8, 12(4)
-; CHECK-NEXT: subfic 28, 9, 32
-; CHECK-NEXT: slw 12, 11, 12
-; CHECK-NEXT: srw 24, 10, 0
-; CHECK-NEXT: srw 29, 8, 9
-; CHECK-NEXT: or 12, 24, 12
-; CHECK-NEXT: slw 24, 7, 28
-; CHECK-NEXT: srw 26, 10, 9
-; CHECK-NEXT: or 29, 29, 24
-; CHECK-NEXT: slw 24, 11, 28
-; CHECK-NEXT: cmplwi 9, 64
-; CHECK-NEXT: srawi 25, 11, 31
-; CHECK-NEXT: or 26, 26, 24
-; CHECK-NEXT: sraw 24, 11, 9
-; CHECK-NEXT: addi 30, 9, -96
+; CHECK-NEXT: lwz 8, 12(5)
+; CHECK-NEXT: lwz 9, 4(4)
+; CHECK-NEXT: lwz 10, 0(4)
+; CHECK-NEXT: subfic 11, 8, 96
+; CHECK-NEXT: lwz 6, 8(4)
+; CHECK-NEXT: addi 12, 8, -64
+; CHECK-NEXT: lwz 7, 12(4)
+; CHECK-NEXT: subfic 29, 8, 32
+; CHECK-NEXT: slw 11, 10, 11
+; CHECK-NEXT: srw 25, 9, 12
+; CHECK-NEXT: srw 30, 7, 8
+; CHECK-NEXT: or 11, 25, 11
+; CHECK-NEXT: slw 25, 6, 29
+; CHECK-NEXT: srw 27, 9, 8
+; CHECK-NEXT: or 30, 30, 25
+; CHECK-NEXT: slw 25, 10, 29
+; CHECK-NEXT: addi 0, 8, -96
+; CHECK-NEXT: cmplwi 8, 64
+; CHECK-NEXT: srawi 26, 10, 31
+; CHECK-NEXT: or 27, 27, 25
+; CHECK-NEXT: sraw 25, 10, 8
+; CHECK-NEXT: cmpwi 1, 0, 1
+; CHECK-NEXT: sraw 24, 10, 0
; CHECK-NEXT: bc 12, 0, .LBB1_3
; CHECK-NEXT: # %bb.2: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 24, 25, 0
-; CHECK-NEXT: b .LBB1_3
+; CHECK-NEXT: ori 0, 26, 0
+; CHECK-NEXT: b .LBB1_4
; CHECK-NEXT: .LBB1_3: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: cmpwi 1, 30, 1
-; CHECK-NEXT: sraw 30, 11, 30
-; CHECK-NEXT: stw 24, 0(3)
-; CHECK-NEXT: subfic 24, 9, 64
-; CHECK-NEXT: addi 27, 9, -32
-; CHECK-NEXT: bc 12, 4, .LBB1_5
-; CHECK-NEXT: # %bb.4: # %for.body
+; CHECK-NEXT: addi 0, 25, 0
+; CHECK-NEXT: .LBB1_4: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 12, 30, 0
-; CHECK-NEXT: b .LBB1_5
-; CHECK-NEXT: .LBB1_5: # %for.body
-; CHECK-NEXT: #
-; CHECK-NEXT: subfic 30, 24, 32
-; CHECK-NEXT: slw 28, 10, 28
-; CHECK-NEXT: srw 30, 10, 30
-; CHECK-NEXT: slw 10, 10, 24
-; CHECK-NEXT: slw 24, 11, 24
-; CHECK-NEXT: sraw 23, 11, 0
-; CHECK-NEXT: srw 0, 7, 27
-; CHECK-NEXT: sraw 11, 11, 27
-; CHECK-NEXT: cmpwi 1, 27, 1
-; CHECK-NEXT: or 0, 29, 0
-; CHECK-NEXT: or 30, 24, 30
+; CHECK-NEXT: addi 28, 8, -32
+; CHECK-NEXT: stw 0, 0(3)
+; CHECK-NEXT: subfic 0, 8, 64
+; CHECK-NEXT: subfic 25, 0, 32
+; CHECK-NEXT: slw 29, 9, 29
+; CHECK-NEXT: srw 25, 9, 25
+; CHECK-NEXT: slw 9, 9, 0
+; CHECK-NEXT: slw 0, 10, 0
; CHECK-NEXT: bc 12, 4, .LBB1_6
-; CHECK-NEXT: b .LBB1_7
+; CHECK-NEXT: # %bb.5: # %for.body
+; CHECK-NEXT: #
+; CHECK-NEXT: ori 11, 24, 0
+; CHECK-NEXT: b .LBB1_6
; CHECK-NEXT: .LBB1_6: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: addi 11, 26, 0
+; CHECK-NEXT: sraw 12, 10, 12
+; CHECK-NEXT: sraw 10, 10, 28
+; CHECK-NEXT: cmpwi 1, 28, 1
+; CHECK-NEXT: srw 28, 6, 28
+; CHECK-NEXT: or 0, 0, 25
+; CHECK-NEXT: or 30, 30, 28
+; CHECK-NEXT: bc 12, 4, .LBB1_7
+; CHECK-NEXT: b .LBB1_8
; CHECK-NEXT: .LBB1_7: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: cmplwi 1, 9, 0
-; CHECK-NEXT: srw 9, 7, 9
-; CHECK-NEXT: or 10, 0, 10
-; CHECK-NEXT: or 0, 30, 28
-; CHECK-NEXT: or 9, 9, 0
-; CHECK-NEXT: bc 12, 0, .LBB1_9
-; CHECK-NEXT: # %bb.8: # %for.body
+; CHECK-NEXT: addi 10, 27, 0
+; CHECK-NEXT: .LBB1_8: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 10, 12, 0
-; CHECK-NEXT: ori 9, 23, 0
-; CHECK-NEXT: ori 11, 25, 0
-; CHECK-NEXT: b .LBB1_9
-; CHECK-NEXT: .LBB1_9: # %for.body
+; CHECK-NEXT: cmplwi 1, 8, 0
+; CHECK-NEXT: srw 8, 6, 8
+; CHECK-NEXT: or 0, 0, 29
+; CHECK-NEXT: or 9, 30, 9
+; CHECK-NEXT: or 8, 8, 0
+; CHECK-NEXT: bc 12, 0, .LBB1_10
+; CHECK-NEXT: # %bb.9: # %for.body
+; CHECK-NEXT: #
+; CHECK-NEXT: ori 9, 11, 0
+; CHECK-NEXT: ori 8, 12, 0
+; CHECK-NEXT: ori 10, 26, 0
+; CHECK-NEXT: b .LBB1_10
+; CHECK-NEXT: .LBB1_10: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: cmplwi 6, 0
-; CHECK-NEXT: bc 12, 6, .LBB1_11
-; CHECK-NEXT: # %bb.10: # %for.body
+; CHECK-NEXT: bc 12, 6, .LBB1_12
+; CHECK-NEXT: # %bb.11: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 8, 10, 0
; CHECK-NEXT: ori 7, 9, 0
-; CHECK-NEXT: b .LBB1_11
-; CHECK-NEXT: .LBB1_11: # %for.body
+; CHECK-NEXT: ori 6, 8, 0
+; CHECK-NEXT: b .LBB1_12
+; CHECK-NEXT: .LBB1_12: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: stw 11, 4(3)
-; CHECK-NEXT: stw 8, 12(3)
-; CHECK-NEXT: stw 7, 8(3)
-; CHECK-NEXT: bne 0, .LBB1_1
-; CHECK-NEXT: # %bb.12: # %for.end
+; CHECK-NEXT: stw 10, 4(3)
+; CHECK-NEXT: stw 7, 12(3)
+; CHECK-NEXT: stw 6, 8(3)
+; CHECK-NEXT: bdnz .LBB1_1
+; CHECK-NEXT: # %bb.13: # %for.end
; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 28, 32(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 26, 24(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 25, 20(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 24, 16(1) # 4-byte Folded Reload
-; CHECK-NEXT: lwz 23, 12(1) # 4-byte Folded Reload
; CHECK-NEXT: addi 1, 1, 48
; CHECK-NEXT: blr
entry:
; CHECK-LABEL: foo3:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: stwu 1, -48(1)
-; CHECK-NEXT: li 6, 2048
-; CHECK-NEXT: li 7, 0
; CHECK-NEXT: stw 24, 16(1) # 4-byte Folded Spill
+; CHECK-NEXT: li 6, 2048
; CHECK-NEXT: stw 25, 20(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 26, 24(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 27, 28(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 28, 32(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 29, 36(1) # 4-byte Folded Spill
; CHECK-NEXT: stw 30, 40(1) # 4-byte Folded Spill
+; CHECK-NEXT: mtctr 6
+; CHECK-NEXT: li 6, 0
; CHECK-NEXT: .LBB2_1: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: lwz 10, 12(5)
-; CHECK-NEXT: addi 6, 6, -1
-; CHECK-NEXT: lwz 12, 0(4)
-; CHECK-NEXT: lwz 11, 4(4)
-; CHECK-NEXT: subfic 0, 10, 96
-; CHECK-NEXT: lwz 8, 8(4)
-; CHECK-NEXT: addi 30, 10, -64
-; CHECK-NEXT: lwz 9, 12(4)
-; CHECK-NEXT: cmplwi 10, 64
-; CHECK-NEXT: srw 25, 12, 10
-; CHECK-NEXT: addi 29, 10, -96
-; CHECK-NEXT: subfic 27, 10, 32
-; CHECK-NEXT: slw 0, 12, 0
-; CHECK-NEXT: srw 24, 11, 30
+; CHECK-NEXT: lwz 9, 12(5)
+; CHECK-NEXT: lwz 10, 4(4)
+; CHECK-NEXT: lwz 11, 0(4)
+; CHECK-NEXT: subfic 12, 9, 96
+; CHECK-NEXT: lwz 7, 8(4)
+; CHECK-NEXT: addi 0, 9, -64
+; CHECK-NEXT: lwz 8, 12(4)
+; CHECK-NEXT: subfic 28, 9, 32
+; CHECK-NEXT: cmplwi 9, 64
+; CHECK-NEXT: srw 26, 11, 9
+; CHECK-NEXT: slw 12, 11, 12
+; CHECK-NEXT: srw 25, 10, 0
+; CHECK-NEXT: addi 30, 9, -96
+; CHECK-NEXT: srw 29, 8, 9
+; CHECK-NEXT: or 12, 25, 12
+; CHECK-NEXT: slw 25, 7, 28
; CHECK-NEXT: bc 12, 0, .LBB2_3
; CHECK-NEXT: # %bb.2: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 25, 7, 0
+; CHECK-NEXT: ori 26, 6, 0
; CHECK-NEXT: b .LBB2_3
; CHECK-NEXT: .LBB2_3: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: srw 28, 9, 10
-; CHECK-NEXT: or 0, 24, 0
-; CHECK-NEXT: slw 24, 8, 27
-; CHECK-NEXT: stw 25, 0(3)
-; CHECK-NEXT: subfic 25, 10, 64
-; CHECK-NEXT: srw 29, 12, 29
-; CHECK-NEXT: srw 26, 11, 10
-; CHECK-NEXT: or 28, 28, 24
-; CHECK-NEXT: slw 24, 12, 27
-; CHECK-NEXT: or 29, 0, 29
-; CHECK-NEXT: subfic 0, 25, 32
-; CHECK-NEXT: or 26, 26, 24
-; CHECK-NEXT: addi 24, 10, -32
-; CHECK-NEXT: slw 27, 11, 27
-; CHECK-NEXT: srw 0, 11, 0
-; CHECK-NEXT: slw 11, 11, 25
-; CHECK-NEXT: slw 25, 12, 25
-; CHECK-NEXT: srw 30, 12, 30
-; CHECK-NEXT: srw 12, 12, 24
-; CHECK-NEXT: srw 24, 8, 24
-; CHECK-NEXT: or 0, 25, 0
-; CHECK-NEXT: or 28, 28, 24
-; CHECK-NEXT: cmplwi 1, 10, 0
-; CHECK-NEXT: srw 10, 8, 10
-; CHECK-NEXT: or 0, 0, 27
-; CHECK-NEXT: or 11, 28, 11
-; CHECK-NEXT: or 10, 10, 0
-; CHECK-NEXT: or 12, 26, 12
+; CHECK-NEXT: srw 27, 10, 9
+; CHECK-NEXT: or 29, 29, 25
+; CHECK-NEXT: slw 25, 11, 28
+; CHECK-NEXT: stw 26, 0(3)
+; CHECK-NEXT: subfic 26, 9, 64
+; CHECK-NEXT: srw 30, 11, 30
+; CHECK-NEXT: or 27, 27, 25
+; CHECK-NEXT: addi 25, 9, -32
+; CHECK-NEXT: or 12, 12, 30
+; CHECK-NEXT: subfic 30, 26, 32
+; CHECK-NEXT: slw 28, 10, 28
+; CHECK-NEXT: srw 30, 10, 30
+; CHECK-NEXT: slw 10, 10, 26
+; CHECK-NEXT: slw 26, 11, 26
+; CHECK-NEXT: srw 24, 11, 0
+; CHECK-NEXT: srw 0, 7, 25
+; CHECK-NEXT: or 0, 29, 0
+; CHECK-NEXT: or 30, 26, 30
+; CHECK-NEXT: cmplwi 1, 9, 0
+; CHECK-NEXT: srw 9, 7, 9
+; CHECK-NEXT: or 10, 0, 10
+; CHECK-NEXT: or 0, 30, 28
+; CHECK-NEXT: srw 11, 11, 25
+; CHECK-NEXT: or 9, 9, 0
+; CHECK-NEXT: or 11, 27, 11
; CHECK-NEXT: bc 12, 0, .LBB2_5
; CHECK-NEXT: # %bb.4: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 11, 29, 0
-; CHECK-NEXT: ori 10, 30, 0
-; CHECK-NEXT: ori 12, 7, 0
+; CHECK-NEXT: ori 10, 12, 0
+; CHECK-NEXT: ori 9, 24, 0
+; CHECK-NEXT: ori 11, 6, 0
; CHECK-NEXT: b .LBB2_5
; CHECK-NEXT: .LBB2_5: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: cmplwi 6, 0
; CHECK-NEXT: bc 12, 6, .LBB2_7
; CHECK-NEXT: # %bb.6: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: ori 9, 11, 0
; CHECK-NEXT: ori 8, 10, 0
+; CHECK-NEXT: ori 7, 9, 0
; CHECK-NEXT: b .LBB2_7
; CHECK-NEXT: .LBB2_7: # %for.body
; CHECK-NEXT: #
-; CHECK-NEXT: stw 12, 4(3)
-; CHECK-NEXT: stw 9, 12(3)
-; CHECK-NEXT: stw 8, 8(3)
-; CHECK-NEXT: bne 0, .LBB2_1
+; CHECK-NEXT: stw 11, 4(3)
+; CHECK-NEXT: stw 8, 12(3)
+; CHECK-NEXT: stw 7, 8(3)
+; CHECK-NEXT: bdnz .LBB2_1
; CHECK-NEXT: # %bb.8: # %for.end
; CHECK-NEXT: lwz 30, 40(1) # 4-byte Folded Reload
; CHECK-NEXT: lwz 29, 36(1) # 4-byte Folded Reload
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \
-; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \
-; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=LE
+; RUN: -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-asm-full-reg-names \
; RUN: -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
declare ppc_fp128 @llvm.fmuladd.ppcf128(ppc_fp128, ppc_fp128, ppc_fp128) #2
define ppc_fp128 @test_ctr0() {
+; P9LE-LABEL: test_ctr0:
+; P9LE: # %bb.0: # %bb
+; P9LE-NEXT: mflr r0
+; P9LE-NEXT: .cfi_def_cfa_offset 48
+; P9LE-NEXT: .cfi_offset lr, 16
+; P9LE-NEXT: .cfi_offset r30, -16
+; P9LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P9LE-NEXT: stdu r1, -48(r1)
+; P9LE-NEXT: li r3, 1
+; P9LE-NEXT: xxlxor f1, f1, f1
+; P9LE-NEXT: xxlxor f2, f2, f2
+; P9LE-NEXT: std r0, 64(r1)
+; P9LE-NEXT: rldic r30, r3, 62, 1
+; P9LE-NEXT: .p2align 5
+; P9LE-NEXT: .LBB0_1: # %bb6
+; P9LE-NEXT: #
+; P9LE-NEXT: xxlxor f3, f3, f3
+; P9LE-NEXT: xxlxor f4, f4, f4
+; P9LE-NEXT: bl __gcc_qadd
+; P9LE-NEXT: nop
+; P9LE-NEXT: addi r30, r30, -1
+; P9LE-NEXT: cmpldi r30, 0
+; P9LE-NEXT: bc 12, gt, .LBB0_1
+; P9LE-NEXT: # %bb.2: # %bb14
+; P9LE-NEXT: addi r1, r1, 48
+; P9LE-NEXT: ld r0, 16(r1)
+; P9LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P9LE-NEXT: mtlr r0
+; P9LE-NEXT: blr
+;
; P9BE-LABEL: test_ctr0:
; P9BE: # %bb.0: # %bb
; P9BE-NEXT: mflr r0
; P9BE-NEXT: .cfi_def_cfa_offset 128
; P9BE-NEXT: .cfi_offset lr, 16
; P9BE-NEXT: .cfi_offset r30, -16
+; P9BE-NEXT: li r3, 1
; P9BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
; P9BE-NEXT: xxlxor f1, f1, f1
-; P9BE-NEXT: li r30, 0
+; P9BE-NEXT: rldic r30, r3, 62, 1
; P9BE-NEXT: xxlxor f2, f2, f2
; P9BE-NEXT: .p2align 5
; P9BE-NEXT: .LBB0_1: # %bb6
; P9BE-NEXT: xxlxor f4, f4, f4
; P9BE-NEXT: bl __gcc_qadd
; P9BE-NEXT: nop
-; P9BE-NEXT: addi r30, r30, 4
+; P9BE-NEXT: addi r30, r30, -1
; P9BE-NEXT: cmpldi r30, 0
-; P9BE-NEXT: bne cr0, .LBB0_1
+; P9BE-NEXT: bc 12, gt, .LBB0_1
; P9BE-NEXT: # %bb.2: # %bb14
; P9BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
; P9BE-NEXT: addi r1, r1, 128
; P9BE-NEXT: mtlr r0
; P9BE-NEXT: blr
;
+; P8LE-LABEL: test_ctr0:
+; P8LE: # %bb.0: # %bb
+; P8LE-NEXT: mflr r0
+; P8LE-NEXT: .cfi_def_cfa_offset 48
+; P8LE-NEXT: .cfi_offset lr, 16
+; P8LE-NEXT: .cfi_offset r30, -16
+; P8LE-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P8LE-NEXT: stdu r1, -48(r1)
+; P8LE-NEXT: xxlxor f1, f1, f1
+; P8LE-NEXT: li r3, 1
+; P8LE-NEXT: std r0, 64(r1)
+; P8LE-NEXT: xxlxor f2, f2, f2
+; P8LE-NEXT: rldic r30, r3, 62, 1
+; P8LE-NEXT: .p2align 5
+; P8LE-NEXT: .LBB0_1: # %bb6
+; P8LE-NEXT: #
+; P8LE-NEXT: xxlxor f3, f3, f3
+; P8LE-NEXT: xxlxor f4, f4, f4
+; P8LE-NEXT: bl __gcc_qadd
+; P8LE-NEXT: nop
+; P8LE-NEXT: addi r30, r30, -1
+; P8LE-NEXT: cmpldi r30, 0
+; P8LE-NEXT: bc 12, gt, .LBB0_1
+; P8LE-NEXT: # %bb.2: # %bb14
+; P8LE-NEXT: addi r1, r1, 48
+; P8LE-NEXT: ld r0, 16(r1)
+; P8LE-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
+; P8LE-NEXT: mtlr r0
+; P8LE-NEXT: blr
+;
; P8BE-LABEL: test_ctr0:
; P8BE: # %bb.0: # %bb
; P8BE-NEXT: mflr r0
; P8BE-NEXT: .cfi_offset lr, 16
; P8BE-NEXT: .cfi_offset r30, -16
; P8BE-NEXT: xxlxor f1, f1, f1
+; P8BE-NEXT: li r3, 1
; P8BE-NEXT: std r30, 112(r1) # 8-byte Folded Spill
-; P8BE-NEXT: li r30, 0
; P8BE-NEXT: xxlxor f2, f2, f2
+; P8BE-NEXT: rldic r30, r3, 62, 1
; P8BE-NEXT: .p2align 5
; P8BE-NEXT: .LBB0_1: # %bb6
; P8BE-NEXT: #
; P8BE-NEXT: xxlxor f4, f4, f4
; P8BE-NEXT: bl __gcc_qadd
; P8BE-NEXT: nop
-; P8BE-NEXT: addi r30, r30, 4
+; P8BE-NEXT: addi r30, r30, -1
; P8BE-NEXT: cmpldi r30, 0
-; P8BE-NEXT: bne cr0, .LBB0_1
+; P8BE-NEXT: bc 12, gt, .LBB0_1
; P8BE-NEXT: # %bb.2: # %bb14
; P8BE-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
; P8BE-NEXT: addi r1, r1, 128
bb14: ; preds = %bb6
ret ppc_fp128 %i8
}
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; LE: {{.*}}
; CHECK-NEXT: br label [[FOR_INC:%.*]]
; CHECK: for.inc:
; CHECK-NEXT: [[C_0:%.*]] = call i1 @cond()
-; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25:%.*]], label [[FOR_BODY]]
+; CHECK-NEXT: br i1 [[C_0]], label [[WHILE_COND25_PREHEADER:%.*]], label [[FOR_BODY]]
+; CHECK: while.cond25.preheader:
+; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 51)
+; CHECK-NEXT: br label [[WHILE_COND25:%.*]]
; CHECK: while.cond25:
-; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ], [ 0, [[FOR_INC]] ]
-; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[FOR_INC]] ]
-; CHECK-NEXT: [[CMP26_NOT:%.*]] = icmp eq i64 [[INDVARS_IV349]], 0
-; CHECK-NEXT: br i1 [[CMP26_NOT]], label [[WHILE_END187:%.*]], label [[LAND_RHS]]
+; CHECK-NEXT: [[INDVAR:%.*]] = phi i64 [ 0, [[WHILE_COND25_PREHEADER]] ], [ [[INDVAR_NEXT:%.*]], [[LAND_RHS:%.*]] ]
+; CHECK-NEXT: [[INDVARS_IV349:%.*]] = phi i64 [ [[INDVARS_IV_NEXT350:%.*]], [[LAND_RHS]] ], [ 50, [[WHILE_COND25_PREHEADER]] ]
+; CHECK-NEXT: [[TMP0:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1)
+; CHECK-NEXT: br i1 [[TMP0]], label [[LAND_RHS]], label [[WHILE_END187:%.*]]
; CHECK: land.rhs:
; CHECK-NEXT: [[INDVARS_IV_NEXT350]] = add nsw i64 [[INDVARS_IV349]], -1
; CHECK-NEXT: [[C_1:%.*]] = call i1 @cond()
; CHECK-NEXT: [[C_2:%.*]] = call i1 @cond()
; CHECK-NEXT: br i1 [[C_2]], label [[WHILE_END187]], label [[WHILE_COND35_PREHEADER:%.*]]
; CHECK: while.cond35.preheader:
-; CHECK-NEXT: [[TMP0:%.*]] = mul nsw i64 [[INDVAR_LCSSA1]], -1
-; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 51
-; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 [[TMP1]])
+; CHECK-NEXT: [[TMP1:%.*]] = mul nsw i64 [[INDVAR_LCSSA1]], -1
+; CHECK-NEXT: [[TMP2:%.*]] = add i64 [[TMP1]], 51
+; CHECK-NEXT: call void @llvm.set.loop.iterations.i64(i64 [[TMP2]])
; CHECK-NEXT: br label [[WHILE_COND35:%.*]]
; CHECK: while.cond35:
-; CHECK-NEXT: [[TMP2:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1)
-; CHECK-NEXT: br i1 [[TMP2]], label [[LAND_RHS37:%.*]], label [[IF_END51:%.*]]
+; CHECK-NEXT: [[TMP3:%.*]] = call i1 @llvm.loop.decrement.i64(i64 1)
+; CHECK-NEXT: br i1 [[TMP3]], label [[LAND_RHS37:%.*]], label [[IF_END51:%.*]]
; CHECK: land.rhs37:
; CHECK-NEXT: br label [[WHILE_COND35]]
; CHECK: if.end51:
; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
; CHECK: bl {{.*}}something
;
-; CHECK-DAG: addi [[IV]], [[IV]], -1
-; CHECK-DAG: add [[SUM]], 3, [[SUM]]
-; CHECK-DAG: cmplwi [[IV]], 0
-; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
+; CHECK-64-DAG: addi [[IV]], [[IV]], -1
+; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-64-DAG: cmpldi [[IV]], 0
+; CHECK-32-DAG: addi [[IV]], [[IV]], -1
+; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-32-DAG: cmplwi [[IV]], 0
+; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
;
; Next BB.
; CHECK: slwi 3, [[SUM]], 3
; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
; CHECK: bl {{.*}}something
;
-; CHECK-DAG: addi [[IV]], [[IV]], -1
-; CHECK-DAG: add [[SUM]], 3, [[SUM]]
-; CHECK-DAG: cmplwi [[IV]], 0
+; CHECK-64-DAG: addi [[IV]], [[IV]], -1
+; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-64-DAG: cmpldi [[IV]], 0
+; CHECK-32-DAG: addi [[IV]], [[IV]], -1
+; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-32-DAG: cmplwi [[IV]], 0
;
-; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
+; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
;
; Next BB
; CHECK: %for.exit
; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
; CHECK: bl {{.*}}something
;
-; CHECK-DAG: addi [[IV]], [[IV]], -1
-; CHECK-DAG: add [[SUM]], 3, [[SUM]]
-; CHECK-DAG: cmplwi [[IV]], 0
+; CHECK-64-DAG: addi [[IV]], [[IV]], -1
+; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-64-DAG: cmpldi [[IV]], 0
+; CHECK-32-DAG: addi [[IV]], [[IV]], -1
+; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-32-DAG: cmplwi [[IV]], 0
;
-; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
+; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
;
; Next BB
; CHECK: bl {{.*}}somethingElse
; CHECK: {{.*}}[[LOOP:BB[0-9_]+]]: # %for.body
; CHECK: bl {{.*}}something
;
-; CHECK-DAG: addi [[IV]], [[IV]], -1
-; CHECK-DAG: add [[SUM]], 3, [[SUM]]
-; CHECK-DAG: cmplwi [[IV]], 0
+; CHECK-64-DAG: addi [[IV]], [[IV]], -1
+; CHECK-64-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-64-DAG: cmpldi [[IV]], 0
+; CHECK-32-DAG: addi [[IV]], [[IV]], -1
+; CHECK-32-DAG: add [[SUM]], 3, [[SUM]]
+; CHECK-32-DAG: cmplwi [[IV]], 0
;
-; CHECK-NEXT: bne 0, {{.*}}[[LOOP]]
+; CHECK-NEXT: bc 12, 1, {{.*}}[[LOOP]]
;
; Next BB.
; CHECK: slwi 3, [[SUM]], 3
; CHECK-NEXT: std 30, -16(1) # 8-byte Folded Spill
; CHECK-NEXT: stdu 1, -64(1)
; CHECK-NEXT: std 0, 80(1)
-; CHECK-NEXT: ld 29, 0(3)
+; CHECK-NEXT: ld 3, 0(3)
; CHECK-NEXT: ld 30, 32(1)
-; CHECK-NEXT: cmpld 30, 29
-; CHECK-NEXT: bge- 0, .LBB0_2
-; CHECK-NEXT: .p2align 5
-; CHECK-NEXT: .LBB0_1: # %bounds.ok
+; CHECK-NEXT: sub 4, 3, 30
+; CHECK-NEXT: cmpld 4, 3
+; CHECK-NEXT: iselgt 3, 0, 4
+; CHECK-NEXT: addi 29, 3, 1
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB0_1: # %forcond
+; CHECK-NEXT: #
+; CHECK-NEXT: addi 29, 29, -1
+; CHECK-NEXT: cmpldi 29, 0
+; CHECK-NEXT: bc 4, 1, .LBB0_3
+; CHECK-NEXT: # %bb.2: # %bounds.ok
; CHECK-NEXT: #
; CHECK-NEXT: lfs 2, 0(3)
; CHECK-NEXT: xxlxor 1, 1, 1
; CHECK-NEXT: nop
; CHECK-NEXT: addi 30, 30, 1
; CHECK-NEXT: stfs 1, 0(3)
-; CHECK-NEXT: cmpld 30, 29
-; CHECK-NEXT: blt+ 0, .LBB0_1
-; CHECK-NEXT: .LBB0_2: # %bounds.fail
+; CHECK-NEXT: b .LBB0_1
+; CHECK-NEXT: .LBB0_3: # %bounds.fail
; CHECK-NEXT: std 30, 32(1)
%pos = alloca i64, align 8
br label %forcond
; CHECK-NEXT: nop
; CHECK-NEXT: addi r30, r30, -1
; CHECK-NEXT: cmpldi r30, 0
-; CHECK-NEXT: bne cr0, .LBB0_3
+; CHECK-NEXT: bc 12, gt, .LBB0_3
; CHECK-NEXT: # %bb.4: # %bb15
; CHECK-NEXT: stb r3, 0(r3)
; CHECK-NEXT: addi r1, r1, 64
; CHECK-NEXT: mflr r0
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -48(r1)
-; CHECK-NEXT: li r3, 1
; CHECK-NEXT: li r30, 0
+; CHECK-NEXT: li r3, 1
; CHECK-NEXT: std r0, 64(r1)
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb3
; CHECK-NEXT: addi r30, r30, -1
; CHECK-NEXT: li r3, 0
; CHECK-NEXT: cmpldi r30, 0
-; CHECK-NEXT: bne+ cr0, .LBB0_1
+; CHECK-NEXT: bc 12, gt, .LBB0_1
; CHECK-NEXT: # %bb.2: # %bb11
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: nop
; CHECK-NEXT: #
; CHECK-NEXT: addi r30, r30, -1
; CHECK-NEXT: cmpldi r30, 0
-; CHECK-NEXT: beq cr0, .LBB1_3
+; CHECK-NEXT: bc 4, gt, .LBB1_3
; CHECK-NEXT: # %bb.2: # %bb3
; CHECK-NEXT: #
; CHECK-NEXT: lhz r3, 0(0)
; CHECK-NEXT: ld r3, 0(r3)
; CHECK-NEXT: cmpdi r3, 0
; CHECK-NEXT: crnot 4*cr2+lt, eq
-; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_3
+; CHECK-NEXT: b .LBB2_2
; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB2_1: # %bb4
+; CHECK-NEXT: .LBB2_1: # %bb10
+; CHECK-NEXT: #
+; CHECK-NEXT: addi r30, r30, -1
+; CHECK-NEXT: cmpldi r30, 0
+; CHECK-NEXT: bc 4, gt, .LBB2_5
+; CHECK-NEXT: .LBB2_2: # %bb2
+; CHECK-NEXT: #
+; CHECK-NEXT: bc 12, 4*cr5+lt, .LBB2_1
+; CHECK-NEXT: # %bb.3: # %bb4
+; CHECK-NEXT: #
; CHECK-NEXT: lhz r3, 0(r3)
; CHECK-NEXT: bl __gnu_h2f_ieee
; CHECK-NEXT: nop
; CHECK-NEXT: bc 4, 4*cr2+lt, .LBB2_6
-; CHECK-NEXT: # %bb.2: # %bb8
+; CHECK-NEXT: # %bb.4: # %bb8
+; CHECK-NEXT: #
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: nop
; CHECK-NEXT: sth r3, 0(0)
-; CHECK-NEXT: .LBB2_3: # %bb10
-; CHECK-NEXT: #
-; CHECK-NEXT: cmpldi r30, 0
-; CHECK-NEXT: beq cr0, .LBB2_5
-; CHECK-NEXT: # %bb.4: # %bb12
-; CHECK-NEXT: #
-; CHECK-NEXT: addi r30, r30, 1
-; CHECK-NEXT: bc 4, 4*cr5+lt, .LBB2_1
-; CHECK-NEXT: b .LBB2_3
+; CHECK-NEXT: b .LBB2_1
; CHECK-NEXT: .LBB2_5: # %bb14
; CHECK-NEXT: ld r30, 32(r1) # 8-byte Folded Reload
; CHECK-NEXT: addi r1, r1, 48
; CHECK-NEXT: stfd f31, -8(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
; CHECK-NEXT: fmr f31, f1
-; CHECK-NEXT: li r30, 0
+; CHECK-NEXT: li r3, 1
; CHECK-NEXT: li r29, 0
; CHECK-NEXT: std r0, 80(r1)
+; CHECK-NEXT: rldic r30, r3, 62, 1
; CHECK-NEXT: .p2align 5
; CHECK-NEXT: .LBB3_1: # %bb1
; CHECK-NEXT: #
; CHECK-NEXT: fmr f1, f31
; CHECK-NEXT: bl __gnu_f2h_ieee
; CHECK-NEXT: nop
-; CHECK-NEXT: addi r29, r29, -12
-; CHECK-NEXT: sth r3, 0(r30)
-; CHECK-NEXT: addi r30, r30, 24
-; CHECK-NEXT: cmpldi r29, 0
-; CHECK-NEXT: bne+ cr0, .LBB3_1
+; CHECK-NEXT: addi r30, r30, -1
+; CHECK-NEXT: sth r3, 0(r29)
+; CHECK-NEXT: addi r29, r29, 24
+; CHECK-NEXT: cmpldi r30, 0
+; CHECK-NEXT: bc 12, gt, .LBB3_1
; CHECK-NEXT: # %bb.2: # %bb5
;
; CHECK-P9-LABEL: func_48785:
; CHECK: # %bb.0: # %bb
; CHECK-NEXT: mflr 0
; CHECK-NEXT: stwu 1, -16(1)
-; CHECK-NEXT: # implicit-def: $r3
; CHECK-NEXT: stw 0, 20(1)
+; CHECK-NEXT: mtctr 3
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb1
; CHECK-NEXT: #
-; CHECK-NEXT: addi 3, 3, 1
-; CHECK-NEXT: cmplwi 3, 0
-; CHECK-NEXT: bne 0, .LBB0_1
+; CHECK-NEXT: bdnz .LBB0_1
; CHECK-NEXT: # %bb.2: # %bb8
; CHECK-NEXT: bl wibble
; CHECK-NEXT: lwz 0, 20(1)
; CHECK-NEXT: # kill: def $r7 killed $r7 killed $s7
; CHECK-NEXT: bl fma
; CHECK-NEXT: evmergelo 3, 3, 4
-; CHECK-NEXT: addi 28, 28, 1
+; CHECK-NEXT: addi 28, 28, -1
; CHECK-NEXT: cmplwi 28, 0
; CHECK-NEXT: efdctsiz 3, 3
-; CHECK-NEXT: bne 0, .LBB1_1
+; CHECK-NEXT: bc 12, 1, .LBB1_1
; CHECK-NEXT: # %bb.2: # %bb8
; CHECK-NEXT: bl wibble
; CHECK-NEXT: evldd 30, 32(1) # 8-byte Folded Reload
; CHECK-NEXT: efscfsi 3, 29
; CHECK-NEXT: mr 4, 3
; CHECK-NEXT: bl fmaf
-; CHECK-NEXT: addi 29, 29, 1
-; CHECK-NEXT: cmplw 30, 29
+; CHECK-NEXT: addi 30, 30, -1
; CHECK-NEXT: mr 5, 3
-; CHECK-NEXT: bne 0, .LBB56_2
+; CHECK-NEXT: cmplwi 30, 0
+; CHECK-NEXT: addi 29, 29, 1
+; CHECK-NEXT: bc 12, 1, .LBB56_2
; CHECK-NEXT: b .LBB56_4
; CHECK-NEXT: .LBB56_3:
; CHECK-NEXT: # implicit-def: $r5
; CHECK-NEXT: std r29, -24(r1) # 8-byte Folded Spill
; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill
; CHECK-NEXT: stdu r1, -64(r1)
-; CHECK-NEXT: mr r29, r5
; CHECK-NEXT: mr r30, r4
-; CHECK-NEXT: mr r28, r3
+; CHECK-NEXT: cmpwi r5, 1
+; CHECK-NEXT: mr r29, r3
; CHECK-NEXT: std r2, 24(r1)
; CHECK-NEXT: std r0, 80(r1)
-; CHECK-NEXT: cmpwi r29, 1
-; CHECK-NEXT: bc 12, lt, .LBB0_3
+; CHECK-NEXT: bc 12, lt, .LBB0_4
; CHECK-NEXT: # %bb.1: # %entry
; CHECK-NEXT: cmpwi r30, 11
-; CHECK-NEXT: bc 12, lt, .LBB0_3
+; CHECK-NEXT: bc 12, lt, .LBB0_4
+; CHECK-NEXT: # %bb.2: # %for.body.us.preheader
+; CHECK-NEXT: addi r3, r5, -1
+; CHECK-NEXT: clrldi r3, r3, 32
+; CHECK-NEXT: addi r28, r3, 1
; CHECK-NEXT: .p2align 5
-; CHECK-NEXT: .LBB0_2: # %for.body.us
+; CHECK-NEXT: .LBB0_3: # %for.body.us
; CHECK-NEXT: #
-; CHECK-NEXT: mtctr r28
+; CHECK-NEXT: mtctr r29
; CHECK-NEXT: mr r3, r30
-; CHECK-NEXT: mr r12, r28
+; CHECK-NEXT: mr r12, r29
; CHECK-NEXT: bctrl
; CHECK-NEXT: ld 2, 24(r1)
-; CHECK-NEXT: addi r29, r29, -1
-; CHECK-NEXT: cmplwi r29, 0
-; CHECK-NEXT: bne cr0, .LBB0_2
-; CHECK-NEXT: .LBB0_3: # %for.cond.cleanup
-; CHECK-NEXT: mtctr r28
+; CHECK-NEXT: addi r28, r28, -1
+; CHECK-NEXT: cmpldi r28, 0
+; CHECK-NEXT: bc 12, gt, .LBB0_3
+; CHECK-NEXT: .LBB0_4: # %for.cond.cleanup
+; CHECK-NEXT: mtctr r29
; CHECK-NEXT: mr r3, r30
-; CHECK-NEXT: mr r12, r28
+; CHECK-NEXT: mr r12, r29
; CHECK-NEXT: bctrl
; CHECK-NEXT: ld 2, 24(r1)
; CHECK-NEXT: addi r1, r1, 64