From c43b8ec735e88472593ca420a5c6a17630f94066 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 25 Nov 2019 17:14:52 -0800 Subject: [PATCH] [X86] Add support for STRICT_FP_ROUND/STRICT_FP_EXTEND from/to fp128 to/from f32/f64/f80 in 64-bit mode. These need to emit a libcall like we do for the non-strict version. 32-bit mode needs to SoftenFloat support to be implemented for strict FP nodes. Differential Revision: https://reviews.llvm.org/D70504 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 54 ++++++--- llvm/test/CodeGen/X86/fp128-cast-strict.ll | 175 +++++++++++++++++++++++++++++ 2 files changed, 212 insertions(+), 17 deletions(-) create mode 100644 llvm/test/CodeGen/X86/fp128-cast-strict.ll diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c658363..33f50e5 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -690,7 +690,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FSQRT, MVT::f128, LibCall); setOperationAction(ISD::STRICT_FSQRT, MVT::f128, LibCall); - setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::FP_EXTEND, MVT::f128, Custom); + setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f128, Custom); // We need to custom handle any FP_ROUND with an f128 input, but // LegalizeDAG uses the result type to know when to run a custom handler. // So we have to list all legal floating point result types here. @@ -19714,9 +19715,11 @@ SDValue X86TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) const { } SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op->isStrictFPOpcode(); + SDLoc DL(Op); MVT VT = Op.getSimpleValueType(); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); if (VT == MVT::f128) { @@ -19725,6 +19728,8 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } assert(SVT == MVT::v2f32 && "Only customize MVT::v2f32 type legalization!"); + // FIXME: Strict fp. + assert(!IsStrict && "Strict FP not supported yet!"); return DAG.getNode(X86ISD::VFPEXT, DL, VT, DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v4f32, @@ -19732,8 +19737,10 @@ SDValue X86TargetLowering::LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const { } SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { + bool IsStrict = Op->isStrictFPOpcode(); + MVT VT = Op.getSimpleValueType(); - SDValue In = Op.getOperand(0); + SDValue In = Op.getOperand(IsStrict ? 1 : 0); MVT SVT = In.getSimpleValueType(); // It's legal except when f128 is involved @@ -19745,17 +19752,17 @@ SDValue X86TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { // FP_ROUND node has a second operand indicating whether it is known to be // precise. That doesn't take part in the LibCall so we can't directly use // LowerF128Call. + + SDLoc dl(Op); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, LC, VT, In, CallOptions, SDLoc(Op)).first; -} + std::pair Tmp = makeLibCall(DAG, LC, VT, In, CallOptions, + dl, Chain); -// FIXME: This is a hack to allow FP_ROUND to be marked Custom without breaking -// the default expansion of STRICT_FP_ROUND. -static SDValue LowerSTRICT_FP_ROUND(SDValue Op, SelectionDAG &DAG) { - // FIXME: Need to form a libcall with an input chain for f128. - assert(Op.getOperand(0).getValueType() != MVT::f128 && - "Don't know how to handle f128 yet!"); - return Op; + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } /// Depending on uarch and/or optimizing for size, we might prefer to use a @@ -27773,9 +27780,21 @@ SDValue X86TargetLowering::LowerGC_TRANSITION_END(SDValue Op, SDValue X86TargetLowering::LowerF128Call(SDValue Op, SelectionDAG &DAG, RTLIB::Libcall Call) const { - SmallVector Ops(Op->op_begin(), Op->op_end()); + + bool IsStrict = Op->isStrictFPOpcode(); + unsigned Offset = IsStrict ? 1 : 0; + SmallVector Ops(Op->op_begin() + Offset, Op->op_end()); + + SDLoc dl(Op); + SDValue Chain = IsStrict ? Op.getOperand(0) : SDValue(); MakeLibCallOptions CallOptions; - return makeLibCall(DAG, Call, MVT::f128, Ops, CallOptions, SDLoc(Op)).first; + std::pair Tmp = makeLibCall(DAG, Call, MVT::f128, Ops, + CallOptions, dl, Chain); + + if (IsStrict) + return DAG.getMergeValues({ Tmp.first, Tmp.second }, dl); + + return Tmp.first; } /// Provide custom lowering hooks for some operations. @@ -27825,9 +27844,10 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::STRICT_FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::STRICT_FP_TO_UINT: return LowerFP_TO_INT(Op, DAG); - case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); - case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); - case ISD::STRICT_FP_ROUND: return LowerSTRICT_FP_ROUND(Op, DAG); + case ISD::FP_EXTEND: + case ISD::STRICT_FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::FP_ROUND: + case ISD::STRICT_FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::LOAD: return LowerLoad(Op, Subtarget, DAG); case ISD::STORE: return LowerStore(Op, Subtarget, DAG); case ISD::FADD: diff --git a/llvm/test/CodeGen/X86/fp128-cast-strict.ll b/llvm/test/CodeGen/X86/fp128-cast-strict.ll new file mode 100644 index 0000000..84964d7 --- /dev/null +++ b/llvm/test/CodeGen/X86/fp128-cast-strict.ll @@ -0,0 +1,175 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+sse | FileCheck %s --check-prefixes=X64,X64-SSE +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-android -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX +; RUN: llc < %s -disable-strictnode-mutation -mtriple=x86_64-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefixes=X64,X64-AVX + +; Check soft floating point conversion function calls. + +@vf32 = common global float 0.000000e+00, align 4 +@vf64 = common global double 0.000000e+00, align 8 +@vf80 = common global x86_fp80 0xK00000000000000000000, align 8 +@vf128 = common global fp128 0xL00000000000000000000000000000000, align 16 + +define void @TestFPExtF32_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF32_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-SSE-NEXT: callq __extendsftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF32_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; X64-AVX-NEXT: callq __extendsftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load float, float* @vf32, align 4 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f32(float %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPExtF64_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF64_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; X64-SSE-NEXT: callq __extenddftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF64_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; X64-AVX-NEXT: callq __extenddftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load double, double* @vf64, align 8 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f64(double %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPExtF80_F128() nounwind strictfp { +; X64-SSE-LABEL: TestFPExtF80_F128: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: subq $24, %rsp +; X64-SSE-NEXT: fldt {{.*}}(%rip) +; X64-SSE-NEXT: fstpt (%rsp) +; X64-SSE-NEXT: callq __extendxftf2 +; X64-SSE-NEXT: movaps %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: addq $24, %rsp +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPExtF80_F128: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: subq $24, %rsp +; X64-AVX-NEXT: fldt {{.*}}(%rip) +; X64-AVX-NEXT: fstpt (%rsp) +; X64-AVX-NEXT: callq __extendxftf2 +; X64-AVX-NEXT: vmovaps %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: addq $24, %rsp +; X64-AVX-NEXT: retq +entry: + %0 = load x86_fp80, x86_fp80* @vf80, align 8 + %conv = call fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80 %0, metadata !"fpexcept.strict") #0 + store fp128 %conv, fp128* @vf128, align 16 + ret void +} + +define void @TestFPTruncF128_F32() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F32: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfsf2 +; X64-SSE-NEXT: movss %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F32: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfsf2 +; X64-AVX-NEXT: vmovss %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call float @llvm.experimental.constrained.fptrunc.f32.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store float %conv, float* @vf32, align 4 + ret void +} + +define void @TestFPTruncF128_F64() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F64: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfdf2 +; X64-SSE-NEXT: movsd %xmm0, {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F64: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfdf2 +; X64-AVX-NEXT: vmovsd %xmm0, {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call double @llvm.experimental.constrained.fptrunc.f64.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store double %conv, double* @vf64, align 8 + ret void +} + +define void @TestFPTruncF128_F80() nounwind strictfp { +; X64-SSE-LABEL: TestFPTruncF128_F80: +; X64-SSE: # %bb.0: # %entry +; X64-SSE-NEXT: pushq %rax +; X64-SSE-NEXT: movaps {{.*}}(%rip), %xmm0 +; X64-SSE-NEXT: callq __trunctfxf2 +; X64-SSE-NEXT: fstpt {{.*}}(%rip) +; X64-SSE-NEXT: popq %rax +; X64-SSE-NEXT: retq +; +; X64-AVX-LABEL: TestFPTruncF128_F80: +; X64-AVX: # %bb.0: # %entry +; X64-AVX-NEXT: pushq %rax +; X64-AVX-NEXT: vmovaps {{.*}}(%rip), %xmm0 +; X64-AVX-NEXT: callq __trunctfxf2 +; X64-AVX-NEXT: fstpt {{.*}}(%rip) +; X64-AVX-NEXT: popq %rax +; X64-AVX-NEXT: retq +entry: + %0 = load fp128, fp128* @vf128, align 16 + %conv = call x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128 %0, metadata !"round.dynamic", metadata !"fpexcept.strict") #0 + store x86_fp80 %conv, x86_fp80* @vf80, align 8 + ret void +} + +attributes #0 = { strictfp } + +declare float @llvm.experimental.constrained.fptrunc.f32.f128(fp128, metadata, metadata) +declare double @llvm.experimental.constrained.fptrunc.f64.f128(fp128, metadata, metadata) +declare x86_fp80 @llvm.experimental.constrained.fptrunc.f80.f128(fp128, metadata, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f32(float, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f64(double, metadata) +declare fp128 @llvm.experimental.constrained.fpext.f128.f80(x86_fp80, metadata) -- 2.7.4