// PPC (the libcall is not available).
setOperationAction(ISD::FP_TO_SINT, MVT::ppcf128, Custom);
setOperationAction(ISD::FP_TO_UINT, MVT::ppcf128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::ppcf128, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::ppcf128, Custom);
// We do not currently implement these libm ops for PowerPC.
setOperationAction(ISD::FFLOOR, MVT::ppcf128, Expand);
return "PPCISD::TLS_DYNAMIC_MAT_PCREL_ADDR";
case PPCISD::LD_SPLAT: return "PPCISD::LD_SPLAT";
case PPCISD::FNMSUB: return "PPCISD::FNMSUB";
+ case PPCISD::STRICT_FADDRTZ:
+ return "PPCISD::STRICT_FADDRTZ";
case PPCISD::STRICT_FCTIDZ:
return "PPCISD::STRICT_FCTIDZ";
case PPCISD::STRICT_FCTIWZ:
bool IsSigned = Op.getOpcode() == ISD::FP_TO_SINT ||
Op.getOpcode() == ISD::STRICT_FP_TO_SINT;
SDValue Src = Op.getOperand(IsStrict ? 1 : 0);
+ EVT SrcVT = Src.getValueType();
+ EVT DstVT = Op.getValueType();
+
// FP to INT conversions are legal for f128.
- if (Src.getValueType() == MVT::f128)
+ if (SrcVT == MVT::f128)
return Op;
// Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on
// PPC (the libcall is not available).
- if (Src.getValueType() == MVT::ppcf128 && !IsStrict) {
- if (Op.getValueType() == MVT::i32) {
+ if (SrcVT == MVT::ppcf128) {
+ if (DstVT == MVT::i32) {
+ // TODO: Conservatively pass only nofpexcept flag here. Need to check and
+ // set other fast-math flags to FP operations in both strict and
+ // non-strict cases. (FP_TO_SINT, FSUB)
+ SDNodeFlags Flags;
+ Flags.setNoFPExcept(Op->getFlags().hasNoFPExcept());
+
if (IsSigned) {
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
DAG.getIntPtrConstant(0, dl));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::f64, Src,
DAG.getIntPtrConstant(1, dl));
- // Add the two halves of the long double in round-to-zero mode.
- SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
-
- // Now use a smaller FP_TO_SINT.
- return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ // Add the two halves of the long double in round-to-zero mode, and use
+ // a smaller FP_TO_SINT.
+ if (IsStrict) {
+ SDValue Res = DAG.getNode(PPCISD::STRICT_FADDRTZ, dl,
+ DAG.getVTList(MVT::f64, MVT::Other),
+ {Op.getOperand(0), Lo, Hi}, Flags);
+ return DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+ DAG.getVTList(MVT::i32, MVT::Other),
+ {Res.getValue(1), Res}, Flags);
+ } else {
+ SDValue Res = DAG.getNode(PPCISD::FADDRTZ, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Res);
+ }
} else {
const uint64_t TwoE31[] = {0x41e0000000000000LL, 0};
APFloat APF = APFloat(APFloat::PPCDoubleDouble(), APInt(128, TwoE31));
- SDValue Tmp = DAG.getConstantFP(APF, dl, MVT::ppcf128);
- // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
- // FIXME: generated code sucks.
- // TODO: Are there fast-math-flags to propagate to this FSUB?
- SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Tmp);
- True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
- True = DAG.getNode(ISD::ADD, dl, MVT::i32, True,
- DAG.getConstant(0x80000000, dl, MVT::i32));
- SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
- return DAG.getSelectCC(dl, Src, Tmp, True, False, ISD::SETGE);
+ SDValue Cst = DAG.getConstantFP(APF, dl, SrcVT);
+ SDValue SignMask = DAG.getConstant(0x80000000, dl, DstVT);
+ if (IsStrict) {
+ // Sel = Src < 0x80000000
+ // FltOfs = select Sel, 0.0, 0x80000000
+ // IntOfs = select Sel, 0, 0x80000000
+ // Result = fp_to_sint(Src - FltOfs) ^ IntOfs
+ SDValue Chain = Op.getOperand(0);
+ EVT SetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), SrcVT);
+ EVT DstSetCCVT =
+ getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), DstVT);
+ SDValue Sel =
+ DAG.getSetCC(dl, SetCCVT, Src, Cst, ISD::SETLT, Chain, true);
+ Chain = Sel.getValue(1);
+
+ SDValue FltOfs = DAG.getSelect(
+ dl, SrcVT, Sel, DAG.getConstantFP(0.0, dl, SrcVT), Cst);
+ Sel = DAG.getBoolExtOrTrunc(Sel, dl, DstSetCCVT, DstVT);
+
+ SDValue Val = DAG.getNode(ISD::STRICT_FSUB, dl,
+ DAG.getVTList(SrcVT, MVT::Other),
+ {Chain, Src, FltOfs}, Flags);
+ Chain = Val.getValue(1);
+ SDValue SInt = DAG.getNode(ISD::STRICT_FP_TO_SINT, dl,
+ DAG.getVTList(DstVT, MVT::Other),
+ {Chain, Val}, Flags);
+ Chain = SInt.getValue(1);
+ SDValue IntOfs = DAG.getSelect(
+ dl, DstVT, Sel, DAG.getConstant(0, dl, DstVT), SignMask);
+ SDValue Result = DAG.getNode(ISD::XOR, dl, DstVT, SInt, IntOfs);
+ return DAG.getMergeValues({Result, Chain}, dl);
+ } else {
+ // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X
+ // FIXME: generated code sucks.
+ SDValue True = DAG.getNode(ISD::FSUB, dl, MVT::ppcf128, Src, Cst);
+ True = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, True);
+ True = DAG.getNode(ISD::ADD, dl, MVT::i32, True, SignMask);
+ SDValue False = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Src);
+ return DAG.getSelectCC(dl, Src, Cst, True, False, ISD::SETGE);
+ }
}
}
.addReg(PPC::RM, RegState::ImplicitDefine);
// Perform addition.
- BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest).addReg(Src1).addReg(Src2);
+ auto MIB = BuildMI(*BB, MI, dl, TII->get(PPC::FADD), Dest)
+ .addReg(Src1)
+ .addReg(Src2);
+ if (MI.getFlag(MachineInstr::NoFPExcept))
+ MIB.setMIFlag(MachineInstr::NoFPExcept);
// Restore FPSCR value.
BuildMI(*BB, MI, dl, TII->get(PPC::MTFSFb)).addImm(1).addReg(MFFSReg);
STRICT_FCFIDS,
STRICT_FCFIDUS,
+ /// Constrained floating point add in round-to-zero mode.
+ STRICT_FADDRTZ,
+
/// CHAIN = STBRX CHAIN, GPRC, Ptr, Type - This is a
/// byte-swapping store instruction. It byte-swaps the low "Type" bits of
/// the GPRC input, then stores it through Ptr. Type can be either i16 or
// Perform FADD in round-to-zero mode.
def PPCfaddrtz: SDNode<"PPCISD::FADDRTZ", SDTFPBinOp, []>;
+def PPCstrict_faddrtz: SDNode<"PPCISD::STRICT_FADDRTZ", SDTFPBinOp,
+ [SDNPHasChain]>;
+def PPCany_faddrtz: PatFrags<(ops node:$lhs, node:$rhs),
+ [(PPCfaddrtz node:$lhs, node:$rhs),
+ (PPCstrict_faddrtz node:$lhs, node:$rhs)]>;
def PPCfsel : SDNode<"PPCISD::FSEL",
// Type constraint for fsel.
let Predicates = [HasFPU] in {
// Custom inserter instruction to perform FADD in round-to-zero mode.
-let Uses = [RM] in {
+let Uses = [RM], mayRaiseFPException = 1 in {
def FADDrtz: PPCCustomInserterPseudo<(outs f8rc:$FRT), (ins f8rc:$FRA, f8rc:$FRB), "",
- [(set f64:$FRT, (PPCfaddrtz f64:$FRA, f64:$FRB))]>;
+ [(set f64:$FRT, (PPCany_faddrtz f64:$FRA, f64:$FRB))]>;
}
// The above pseudo gets expanded to make use of the following instructions
define signext i32 @ppcq_to_i32(ppc_fp128 %m) #0 {
; P8-LABEL: ppcq_to_i32:
; P8: # %bb.0: # %entry
-; P8-NEXT: mflr r0
-; P8-NEXT: std r0, 16(r1)
-; P8-NEXT: stdu r1, -112(r1)
-; P8-NEXT: .cfi_def_cfa_offset 112
-; P8-NEXT: .cfi_offset lr, 16
-; P8-NEXT: bl __gcc_qtou
-; P8-NEXT: nop
+; P8-NEXT: mffs f0
+; P8-NEXT: mtfsb1 31
+; P8-NEXT: mtfsb0 30
+; P8-NEXT: fadd f1, f2, f1
+; P8-NEXT: mtfsf 1, f0
+; P8-NEXT: xscvdpsxws f0, f1
+; P8-NEXT: mffprwz r3, f0
; P8-NEXT: extsw r3, r3
-; P8-NEXT: addi r1, r1, 112
-; P8-NEXT: ld r0, 16(r1)
-; P8-NEXT: mtlr r0
; P8-NEXT: blr
;
; P9-LABEL: ppcq_to_i32:
; P9: # %bb.0: # %entry
-; P9-NEXT: mflr r0
-; P9-NEXT: std r0, 16(r1)
-; P9-NEXT: stdu r1, -32(r1)
-; P9-NEXT: .cfi_def_cfa_offset 32
-; P9-NEXT: .cfi_offset lr, 16
-; P9-NEXT: bl __gcc_qtou
-; P9-NEXT: nop
+; P9-NEXT: mffs f0
+; P9-NEXT: mtfsb1 31
+; P9-NEXT: mtfsb0 30
+; P9-NEXT: fadd f1, f2, f1
+; P9-NEXT: mtfsf 1, f0
+; P9-NEXT: xscvdpsxws f0, f1
+; P9-NEXT: mffprwz r3, f0
; P9-NEXT: extsw r3, r3
-; P9-NEXT: addi r1, r1, 32
-; P9-NEXT: ld r0, 16(r1)
-; P9-NEXT: mtlr r0
; P9-NEXT: blr
;
; NOVSX-LABEL: ppcq_to_i32:
; NOVSX: # %bb.0: # %entry
-; NOVSX-NEXT: mflr r0
-; NOVSX-NEXT: std r0, 16(r1)
-; NOVSX-NEXT: stdu r1, -32(r1)
-; NOVSX-NEXT: .cfi_def_cfa_offset 32
-; NOVSX-NEXT: .cfi_offset lr, 16
-; NOVSX-NEXT: bl __gcc_qtou
-; NOVSX-NEXT: nop
-; NOVSX-NEXT: extsw r3, r3
-; NOVSX-NEXT: addi r1, r1, 32
-; NOVSX-NEXT: ld r0, 16(r1)
-; NOVSX-NEXT: mtlr r0
+; NOVSX-NEXT: mffs f0
+; NOVSX-NEXT: mtfsb1 31
+; NOVSX-NEXT: addi r3, r1, -4
+; NOVSX-NEXT: mtfsb0 30
+; NOVSX-NEXT: fadd f1, f2, f1
+; NOVSX-NEXT: mtfsf 1, f0
+; NOVSX-NEXT: fctiwz f0, f1
+; NOVSX-NEXT: stfiwx f0, 0, r3
+; NOVSX-NEXT: lwa r3, -4(r1)
; NOVSX-NEXT: blr
entry:
%conv = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %m, metadata !"fpexcept.strict") #0
; P8: # %bb.0: # %entry
; P8-NEXT: mflr r0
; P8-NEXT: std r0, 16(r1)
-; P8-NEXT: stdu r1, -112(r1)
-; P8-NEXT: .cfi_def_cfa_offset 112
+; P8-NEXT: stdu r1, -128(r1)
+; P8-NEXT: .cfi_def_cfa_offset 128
; P8-NEXT: .cfi_offset lr, 16
-; P8-NEXT: bl __fixunstfsi
+; P8-NEXT: .cfi_offset r30, -16
+; P8-NEXT: addis r3, r2, .LCPI11_0@toc@ha
+; P8-NEXT: xxlxor f3, f3, f3
+; P8-NEXT: std r30, 112(r1) # 8-byte Folded Spill
+; P8-NEXT: lfs f0, .LCPI11_0@toc@l(r3)
+; P8-NEXT: fcmpo cr0, f2, f3
+; P8-NEXT: lis r3, -32768
+; P8-NEXT: xxlxor f3, f3, f3
+; P8-NEXT: fcmpo cr1, f1, f0
+; P8-NEXT: crand 4*cr5+lt, 4*cr1+eq, lt
+; P8-NEXT: crandc 4*cr5+gt, 4*cr1+lt, 4*cr1+eq
+; P8-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
+; P8-NEXT: isel r30, 0, r3, 4*cr5+lt
+; P8-NEXT: bc 12, 4*cr5+lt, .LBB11_2
+; P8-NEXT: # %bb.1: # %entry
+; P8-NEXT: fmr f3, f0
+; P8-NEXT: .LBB11_2: # %entry
+; P8-NEXT: xxlxor f4, f4, f4
+; P8-NEXT: bl __gcc_qsub
; P8-NEXT: nop
-; P8-NEXT: addi r1, r1, 112
+; P8-NEXT: mffs f0
+; P8-NEXT: mtfsb1 31
+; P8-NEXT: mtfsb0 30
+; P8-NEXT: fadd f1, f2, f1
+; P8-NEXT: mtfsf 1, f0
+; P8-NEXT: xscvdpsxws f0, f1
+; P8-NEXT: mffprwz r3, f0
+; P8-NEXT: xor r3, r3, r30
+; P8-NEXT: ld r30, 112(r1) # 8-byte Folded Reload
+; P8-NEXT: clrldi r3, r3, 32
+; P8-NEXT: addi r1, r1, 128
; P8-NEXT: ld r0, 16(r1)
; P8-NEXT: mtlr r0
; P8-NEXT: blr
; P9-LABEL: ppcq_to_u32:
; P9: # %bb.0: # %entry
; P9-NEXT: mflr r0
-; P9-NEXT: std r0, 16(r1)
-; P9-NEXT: stdu r1, -32(r1)
-; P9-NEXT: .cfi_def_cfa_offset 32
+; P9-NEXT: .cfi_def_cfa_offset 48
; P9-NEXT: .cfi_offset lr, 16
-; P9-NEXT: bl __fixunstfsi
+; P9-NEXT: .cfi_offset r30, -16
+; P9-NEXT: std r30, -16(r1) # 8-byte Folded Spill
+; P9-NEXT: std r0, 16(r1)
+; P9-NEXT: stdu r1, -48(r1)
+; P9-NEXT: addis r3, r2, .LCPI11_0@toc@ha
+; P9-NEXT: xxlxor f3, f3, f3
+; P9-NEXT: lfs f0, .LCPI11_0@toc@l(r3)
+; P9-NEXT: fcmpo cr1, f2, f3
+; P9-NEXT: lis r3, -32768
+; P9-NEXT: fcmpo cr0, f1, f0
+; P9-NEXT: xxlxor f3, f3, f3
+; P9-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
+; P9-NEXT: crandc 4*cr5+gt, lt, eq
+; P9-NEXT: cror 4*cr5+lt, 4*cr5+gt, 4*cr5+lt
+; P9-NEXT: isel r30, 0, r3, 4*cr5+lt
+; P9-NEXT: bc 12, 4*cr5+lt, .LBB11_2
+; P9-NEXT: # %bb.1: # %entry
+; P9-NEXT: fmr f3, f0
+; P9-NEXT: .LBB11_2: # %entry
+; P9-NEXT: xxlxor f4, f4, f4
+; P9-NEXT: bl __gcc_qsub
; P9-NEXT: nop
-; P9-NEXT: addi r1, r1, 32
+; P9-NEXT: mffs f0
+; P9-NEXT: mtfsb1 31
+; P9-NEXT: mtfsb0 30
+; P9-NEXT: fadd f1, f2, f1
+; P9-NEXT: mtfsf 1, f0
+; P9-NEXT: xscvdpsxws f0, f1
+; P9-NEXT: mffprwz r3, f0
+; P9-NEXT: xor r3, r3, r30
+; P9-NEXT: clrldi r3, r3, 32
+; P9-NEXT: addi r1, r1, 48
; P9-NEXT: ld r0, 16(r1)
+; P9-NEXT: ld r30, -16(r1) # 8-byte Folded Reload
; P9-NEXT: mtlr r0
; P9-NEXT: blr
;
; NOVSX-LABEL: ppcq_to_u32:
; NOVSX: # %bb.0: # %entry
+; NOVSX-NEXT: mfocrf r12, 32
; NOVSX-NEXT: mflr r0
; NOVSX-NEXT: std r0, 16(r1)
-; NOVSX-NEXT: stdu r1, -32(r1)
-; NOVSX-NEXT: .cfi_def_cfa_offset 32
+; NOVSX-NEXT: stw r12, 8(r1)
+; NOVSX-NEXT: stdu r1, -48(r1)
+; NOVSX-NEXT: .cfi_def_cfa_offset 48
; NOVSX-NEXT: .cfi_offset lr, 16
-; NOVSX-NEXT: bl __fixunstfsi
+; NOVSX-NEXT: .cfi_offset cr2, 8
+; NOVSX-NEXT: addis r3, r2, .LCPI11_0@toc@ha
+; NOVSX-NEXT: addis r4, r2, .LCPI11_1@toc@ha
+; NOVSX-NEXT: lfs f0, .LCPI11_0@toc@l(r3)
+; NOVSX-NEXT: lfs f4, .LCPI11_1@toc@l(r4)
+; NOVSX-NEXT: fcmpo cr0, f1, f0
+; NOVSX-NEXT: fcmpo cr1, f2, f4
+; NOVSX-NEXT: fmr f3, f4
+; NOVSX-NEXT: crand 4*cr5+lt, eq, 4*cr1+lt
+; NOVSX-NEXT: crandc 4*cr5+gt, lt, eq
+; NOVSX-NEXT: cror 4*cr2+lt, 4*cr5+gt, 4*cr5+lt
+; NOVSX-NEXT: bc 12, 4*cr2+lt, .LBB11_2
+; NOVSX-NEXT: # %bb.1: # %entry
+; NOVSX-NEXT: fmr f3, f0
+; NOVSX-NEXT: .LBB11_2: # %entry
+; NOVSX-NEXT: bl __gcc_qsub
; NOVSX-NEXT: nop
-; NOVSX-NEXT: addi r1, r1, 32
+; NOVSX-NEXT: mffs f0
+; NOVSX-NEXT: mtfsb1 31
+; NOVSX-NEXT: addi r3, r1, 44
+; NOVSX-NEXT: mtfsb0 30
+; NOVSX-NEXT: fadd f1, f2, f1
+; NOVSX-NEXT: mtfsf 1, f0
+; NOVSX-NEXT: fctiwz f0, f1
+; NOVSX-NEXT: stfiwx f0, 0, r3
+; NOVSX-NEXT: lis r3, -32768
+; NOVSX-NEXT: lwz r4, 44(r1)
+; NOVSX-NEXT: isel r3, 0, r3, 4*cr2+lt
+; NOVSX-NEXT: xor r3, r4, r3
+; NOVSX-NEXT: clrldi r3, r3, 32
+; NOVSX-NEXT: addi r1, r1, 48
; NOVSX-NEXT: ld r0, 16(r1)
+; NOVSX-NEXT: lwz r12, 8(r1)
+; NOVSX-NEXT: mtocrf 32, r12
; NOVSX-NEXT: mtlr r0
; NOVSX-NEXT: blr
entry:
ret fp128 %conv
}
-define void @fptoint_nofpexcept(fp128 %m, i32* %addr1, i64* %addr2) {
+define void @fptoint_nofpexcept(ppc_fp128 %p, fp128 %m, i32* %addr1, i64* %addr2) {
; MIR-LABEL: name: fptoint_nofpexcept
; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSWZ
; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUWZ
; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPSDZ
; MIR: renamable $v{{[0-9]+}} = nofpexcept XSCVQPUDZ
+;
+; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD
+; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS
+; MIR: renamable $f{{[0-9]+}} = nofpexcept FADD
+; MIR: renamable $f{{[0-9]+}} = XSCVDPSXWS
entry:
%conv1 = tail call i32 @llvm.experimental.constrained.fptosi.i32.f128(fp128 %m, metadata !"fpexcept.ignore") #0
store volatile i32 %conv1, i32* %addr1, align 4
store volatile i64 %conv3, i64* %addr2, align 8
%conv4 = tail call i64 @llvm.experimental.constrained.fptoui.i64.f128(fp128 %m, metadata !"fpexcept.ignore") #0
store volatile i64 %conv4, i64* %addr2, align 8
+
+ %conv5 = tail call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0
+ store volatile i32 %conv5, i32* %addr1, align 4
+ %conv6 = tail call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(ppc_fp128 %p, metadata !"fpexcept.ignore") #0
+ store volatile i32 %conv6, i32* %addr1, align 4
ret void
}
define i32 @test_fptosi_ppc_i32_ppc_fp128(ppc_fp128 %first) #0 {
; PC64LE-LABEL: test_fptosi_ppc_i32_ppc_fp128:
; PC64LE: # %bb.0: # %entry
-; PC64LE-NEXT: mflr 0
-; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -32(1)
-; PC64LE-NEXT: bl __gcc_qtou
-; PC64LE-NEXT: nop
-; PC64LE-NEXT: addi 1, 1, 32
-; PC64LE-NEXT: ld 0, 16(1)
-; PC64LE-NEXT: mtlr 0
+; PC64LE-NEXT: mffs 0
+; PC64LE-NEXT: mtfsb1 31
+; PC64LE-NEXT: mtfsb0 30
+; PC64LE-NEXT: fadd 1, 2, 1
+; PC64LE-NEXT: mtfsf 1, 0
+; PC64LE-NEXT: xscvdpsxws 0, 1
+; PC64LE-NEXT: mffprwz 3, 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: test_fptosi_ppc_i32_ppc_fp128:
; PC64LE9: # %bb.0: # %entry
-; PC64LE9-NEXT: mflr 0
-; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -32(1)
-; PC64LE9-NEXT: bl __gcc_qtou
-; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addi 1, 1, 32
-; PC64LE9-NEXT: ld 0, 16(1)
-; PC64LE9-NEXT: mtlr 0
+; PC64LE9-NEXT: mffs 0
+; PC64LE9-NEXT: mtfsb1 31
+; PC64LE9-NEXT: mtfsb0 30
+; PC64LE9-NEXT: fadd 1, 2, 1
+; PC64LE9-NEXT: mtfsf 1, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 1
+; PC64LE9-NEXT: mffprwz 3, 0
; PC64LE9-NEXT: blr
;
; PC64-LABEL: test_fptosi_ppc_i32_ppc_fp128:
; PC64: # %bb.0: # %entry
-; PC64-NEXT: mflr 0
-; PC64-NEXT: std 0, 16(1)
-; PC64-NEXT: stdu 1, -112(1)
-; PC64-NEXT: bl __gcc_qtou
-; PC64-NEXT: nop
-; PC64-NEXT: addi 1, 1, 112
-; PC64-NEXT: ld 0, 16(1)
-; PC64-NEXT: mtlr 0
+; PC64-NEXT: mffs 0
+; PC64-NEXT: mtfsb1 31
+; PC64-NEXT: mtfsb0 30
+; PC64-NEXT: fadd 1, 2, 1
+; PC64-NEXT: mtfsf 1, 0
+; PC64-NEXT: fctiwz 0, 1
+; PC64-NEXT: stfd 0, -8(1)
+; PC64-NEXT: lwz 3, -4(1)
; PC64-NEXT: blr
entry:
%fpext = call i32 @llvm.experimental.constrained.fptosi.i32.ppcf128(
; PC64LE-LABEL: test_fptoui_ppc_i32_ppc_fp128:
; PC64LE: # %bb.0: # %entry
; PC64LE-NEXT: mflr 0
+; PC64LE-NEXT: std 30, -16(1) # 8-byte Folded Spill
; PC64LE-NEXT: std 0, 16(1)
-; PC64LE-NEXT: stdu 1, -32(1)
-; PC64LE-NEXT: bl __fixunstfsi
+; PC64LE-NEXT: stdu 1, -48(1)
+; PC64LE-NEXT: addis 3, 2, .LCPI31_0@toc@ha
+; PC64LE-NEXT: xxlxor 3, 3, 3
+; PC64LE-NEXT: lfs 0, .LCPI31_0@toc@l(3)
+; PC64LE-NEXT: fcmpo 0, 2, 3
+; PC64LE-NEXT: lis 3, -32768
+; PC64LE-NEXT: xxlxor 3, 3, 3
+; PC64LE-NEXT: fcmpo 1, 1, 0
+; PC64LE-NEXT: crand 20, 6, 0
+; PC64LE-NEXT: crandc 21, 4, 6
+; PC64LE-NEXT: cror 20, 21, 20
+; PC64LE-NEXT: isel 30, 0, 3, 20
+; PC64LE-NEXT: bc 12, 20, .LBB31_2
+; PC64LE-NEXT: # %bb.1: # %entry
+; PC64LE-NEXT: fmr 3, 0
+; PC64LE-NEXT: .LBB31_2: # %entry
+; PC64LE-NEXT: xxlxor 4, 4, 4
+; PC64LE-NEXT: bl __gcc_qsub
; PC64LE-NEXT: nop
-; PC64LE-NEXT: addi 1, 1, 32
+; PC64LE-NEXT: mffs 0
+; PC64LE-NEXT: mtfsb1 31
+; PC64LE-NEXT: mtfsb0 30
+; PC64LE-NEXT: fadd 1, 2, 1
+; PC64LE-NEXT: mtfsf 1, 0
+; PC64LE-NEXT: xscvdpsxws 0, 1
+; PC64LE-NEXT: mffprwz 3, 0
+; PC64LE-NEXT: xor 3, 3, 30
+; PC64LE-NEXT: addi 1, 1, 48
; PC64LE-NEXT: ld 0, 16(1)
+; PC64LE-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; PC64LE-NEXT: mtlr 0
; PC64LE-NEXT: blr
;
; PC64LE9-LABEL: test_fptoui_ppc_i32_ppc_fp128:
; PC64LE9: # %bb.0: # %entry
; PC64LE9-NEXT: mflr 0
+; PC64LE9-NEXT: std 30, -16(1) # 8-byte Folded Spill
; PC64LE9-NEXT: std 0, 16(1)
-; PC64LE9-NEXT: stdu 1, -32(1)
-; PC64LE9-NEXT: bl __fixunstfsi
+; PC64LE9-NEXT: stdu 1, -48(1)
+; PC64LE9-NEXT: addis 3, 2, .LCPI31_0@toc@ha
+; PC64LE9-NEXT: xxlxor 3, 3, 3
+; PC64LE9-NEXT: lfs 0, .LCPI31_0@toc@l(3)
+; PC64LE9-NEXT: fcmpo 1, 2, 3
+; PC64LE9-NEXT: lis 3, -32768
+; PC64LE9-NEXT: fcmpo 0, 1, 0
+; PC64LE9-NEXT: xxlxor 3, 3, 3
+; PC64LE9-NEXT: crand 20, 2, 4
+; PC64LE9-NEXT: crandc 21, 0, 2
+; PC64LE9-NEXT: cror 20, 21, 20
+; PC64LE9-NEXT: isel 30, 0, 3, 20
+; PC64LE9-NEXT: bc 12, 20, .LBB31_2
+; PC64LE9-NEXT: # %bb.1: # %entry
+; PC64LE9-NEXT: fmr 3, 0
+; PC64LE9-NEXT: .LBB31_2: # %entry
+; PC64LE9-NEXT: xxlxor 4, 4, 4
+; PC64LE9-NEXT: bl __gcc_qsub
; PC64LE9-NEXT: nop
-; PC64LE9-NEXT: addi 1, 1, 32
+; PC64LE9-NEXT: mffs 0
+; PC64LE9-NEXT: mtfsb1 31
+; PC64LE9-NEXT: mtfsb0 30
+; PC64LE9-NEXT: fadd 1, 2, 1
+; PC64LE9-NEXT: mtfsf 1, 0
+; PC64LE9-NEXT: xscvdpsxws 0, 1
+; PC64LE9-NEXT: mffprwz 3, 0
+; PC64LE9-NEXT: xor 3, 3, 30
+; PC64LE9-NEXT: addi 1, 1, 48
; PC64LE9-NEXT: ld 0, 16(1)
+; PC64LE9-NEXT: ld 30, -16(1) # 8-byte Folded Reload
; PC64LE9-NEXT: mtlr 0
; PC64LE9-NEXT: blr
;
; PC64: # %bb.0: # %entry
; PC64-NEXT: mflr 0
; PC64-NEXT: std 0, 16(1)
-; PC64-NEXT: stdu 1, -112(1)
-; PC64-NEXT: bl __fixunstfsi
+; PC64-NEXT: mfcr 12
+; PC64-NEXT: stw 12, 8(1)
+; PC64-NEXT: stdu 1, -128(1)
+; PC64-NEXT: addis 3, 2, .LCPI31_0@toc@ha
+; PC64-NEXT: lfs 0, .LCPI31_0@toc@l(3)
+; PC64-NEXT: addis 3, 2, .LCPI31_1@toc@ha
+; PC64-NEXT: lfs 4, .LCPI31_1@toc@l(3)
+; PC64-NEXT: fcmpo 0, 1, 0
+; PC64-NEXT: crandc 21, 0, 2
+; PC64-NEXT: fcmpo 1, 2, 4
+; PC64-NEXT: crand 20, 2, 4
+; PC64-NEXT: cror 8, 21, 20
+; PC64-NEXT: fmr 3, 4
+; PC64-NEXT: bc 12, 8, .LBB31_2
+; PC64-NEXT: # %bb.1: # %entry
+; PC64-NEXT: fmr 3, 0
+; PC64-NEXT: .LBB31_2: # %entry
+; PC64-NEXT: bl __gcc_qsub
; PC64-NEXT: nop
-; PC64-NEXT: addi 1, 1, 112
+; PC64-NEXT: mffs 0
+; PC64-NEXT: mtfsb1 31
+; PC64-NEXT: lis 4, -32768
+; PC64-NEXT: bc 12, 8, .LBB31_3
+; PC64-NEXT: b .LBB31_4
+; PC64-NEXT: .LBB31_3: # %entry
+; PC64-NEXT: li 4, 0
+; PC64-NEXT: .LBB31_4: # %entry
+; PC64-NEXT: mtfsb0 30
+; PC64-NEXT: fadd 1, 2, 1
+; PC64-NEXT: mtfsf 1, 0
+; PC64-NEXT: fctiwz 0, 1
+; PC64-NEXT: stfd 0, 120(1)
+; PC64-NEXT: lwz 3, 124(1)
+; PC64-NEXT: xor 3, 3, 4
+; PC64-NEXT: addi 1, 1, 128
; PC64-NEXT: ld 0, 16(1)
+; PC64-NEXT: lwz 12, 8(1)
; PC64-NEXT: mtlr 0
+; PC64-NEXT: mtcrf 32, 12 # cr2
; PC64-NEXT: blr
entry:
%fpext = call i32 @llvm.experimental.constrained.fptoui.i32.ppcf128(