From: Cameron McInally Date: Tue, 30 Oct 2018 21:01:29 +0000 (+0000) Subject: [FPEnv] [FPEnv] Add constrained intrinsics for MAXNUM and MINNUM X-Git-Tag: llvmorg-8.0.0-rc1~5431 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=2ad870e785213d9c322305983bf6f31763ab9ae0;p=platform%2Fupstream%2Fllvm.git [FPEnv] [FPEnv] Add constrained intrinsics for MAXNUM and MINNUM Differential Revision: https://reviews.llvm.org/D53216 llvm-svn: 345650 --- diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst index d396e3f..39134fa 100644 --- a/llvm/docs/LangRef.rst +++ b/llvm/docs/LangRef.rst @@ -14485,6 +14485,80 @@ mode is determined by the runtime floating-point environment. The rounding mode argument is only intended as information to the compiler. +'``llvm.experimental.constrained.maxnum``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.maxnum( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.maxnum``' intrinsic returns the maximum +of the two arguments. + +Arguments: +"""""""""" + +The first two arguments and the return value are floating-point numbers +of the same type. + +The third and forth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function follows the IEEE-754 semantics for maxNum. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating-point environment. The rounding +mode argument is only intended as information to the compiler. + + +'``llvm.experimental.constrained.minnum``' Intrinsic +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Syntax: +""""""" + +:: + + declare + @llvm.experimental.constrained.minnum( , + metadata , + metadata ) + +Overview: +""""""""" + +The '``llvm.experimental.constrained.minnum``' intrinsic returns the minimum +of the two arguments. + +Arguments: +"""""""""" + +The first two arguments and the return value are floating-point numbers +of the same type. + +The third and forth arguments specify the rounding mode and exception +behavior as described above. + +Semantics: +"""""""""" + +This function follows the IEEE-754 semantics for minNum. The rounding mode is +described, not determined, by the rounding mode argument. The actual rounding +mode is determined by the runtime floating-point environment. The rounding +mode argument is only intended as information to the compiler. + + General Intrinsics ------------------ diff --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h index b8e3129..a023aa5 100644 --- a/llvm/include/llvm/CodeGen/ISDOpcodes.h +++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h @@ -288,7 +288,7 @@ namespace ISD { /// They are used to limit optimizations while the DAG is being optimized. STRICT_FSQRT, STRICT_FPOW, STRICT_FPOWI, STRICT_FSIN, STRICT_FCOS, STRICT_FEXP, STRICT_FEXP2, STRICT_FLOG, STRICT_FLOG10, STRICT_FLOG2, - STRICT_FRINT, STRICT_FNEARBYINT, + STRICT_FRINT, STRICT_FNEARBYINT, STRICT_FMAXNUM, STRICT_FMINNUM, /// FMA - Perform a * b + c with no intermediate rounding step. FMA, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h index 28d27b7..262c7b7 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGNodes.h @@ -672,6 +672,8 @@ public: case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: return true; } } diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index a4356db..7a779f0 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -819,6 +819,8 @@ public: case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; + case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; + case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; } auto Action = getOperationAction(EqOpc, VT); diff --git a/llvm/include/llvm/IR/IntrinsicInst.h b/llvm/include/llvm/IR/IntrinsicInst.h index 32a62a4..54e344d 100644 --- a/llvm/include/llvm/IR/IntrinsicInst.h +++ b/llvm/include/llvm/IR/IntrinsicInst.h @@ -251,6 +251,8 @@ namespace llvm { case Intrinsic::experimental_constrained_log2: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_maxnum: + case Intrinsic::experimental_constrained_minnum: return true; default: return false; } diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td index 989a04d..47a66a2 100644 --- a/llvm/include/llvm/IR/Intrinsics.td +++ b/llvm/include/llvm/IR/Intrinsics.td @@ -565,6 +565,16 @@ let IntrProperties = [IntrInaccessibleMemOnly] in { [ LLVMMatchType<0>, llvm_metadata_ty, llvm_metadata_ty ]>; + def int_experimental_constrained_maxnum : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; + def int_experimental_constrained_minnum : Intrinsic<[ llvm_anyfloat_ty ], + [ LLVMMatchType<0>, + LLVMMatchType<0>, + llvm_metadata_ty, + llvm_metadata_ty ]>; } // FIXME: Add intrinsics for fcmp, fptrunc, fpext, fptoui and fptosi. // FIXME: Add intrinsics for fabs, copysign, floor, ceil, trunc and round? diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 6d9e69e..65f7877 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -1107,6 +1107,8 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -3833,11 +3835,13 @@ void SelectionDAGLegalize::ConvertNodeToLibcall(SDNode *Node) { break; } case ISD::FMINNUM: + case ISD::STRICT_FMINNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMIN_F32, RTLIB::FMIN_F64, RTLIB::FMIN_F80, RTLIB::FMIN_F128, RTLIB::FMIN_PPCF128)); break; case ISD::FMAXNUM: + case ISD::STRICT_FMAXNUM: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMAX_F32, RTLIB::FMAX_F64, RTLIB::FMAX_F80, RTLIB::FMAX_F128, RTLIB::FMAX_PPCF128)); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 109276a..1b68f21 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -305,6 +305,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: // These pseudo-ops get legalized as if they were their non-strict // equivalent. For instance, if ISD::FSQRT is legal then ISD::STRICT_FSQRT // is also legal, but if ISD::FSQRT requires expansion then so does @@ -751,6 +753,8 @@ SDValue VectorLegalizer::Expand(SDValue Op) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: return ExpandStrictFPOp(Op); default: return DAG.UnrollVectorOp(Op.getNode()); diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index e7ad251..59bd751 100644 --- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -164,6 +164,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: R = ScalarizeVecRes_StrictFPOp(N); break; } @@ -834,6 +836,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: SplitVecRes_StrictFPOp(N, Lo, Hi); break; } @@ -2400,6 +2404,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::STRICT_FLOG2: case ISD::STRICT_FRINT: case ISD::STRICT_FNEARBYINT: + case ISD::STRICT_FMAXNUM: + case ISD::STRICT_FMINNUM: Res = WidenVecRes_StrictFP(N); break; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 84e955f..4d509c9 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -7404,6 +7404,8 @@ SDNode* SelectionDAG::mutateStrictFPToFP(SDNode *Node) { NewOpc = ISD::FNEARBYINT; IsUnary = true; break; + case ISD::STRICT_FMAXNUM: NewOpc = ISD::FMAXNUM; break; + case ISD::STRICT_FMINNUM: NewOpc = ISD::FMINNUM; break; } // We're taking this node out of the chain, so we need to re-link things. diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index ddead1d..5ecb2ab 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -5627,6 +5627,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::experimental_constrained_log2: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_maxnum: + case Intrinsic::experimental_constrained_minnum: visitConstrainedFPIntrinsic(cast(I)); return nullptr; case Intrinsic::fmuladd: { @@ -6374,6 +6376,12 @@ void SelectionDAGBuilder::visitConstrainedFPIntrinsic( case Intrinsic::experimental_constrained_nearbyint: Opcode = ISD::STRICT_FNEARBYINT; break; + case Intrinsic::experimental_constrained_maxnum: + Opcode = ISD::STRICT_FMAXNUM; + break; + case Intrinsic::experimental_constrained_minnum: + Opcode = ISD::STRICT_FMINNUM; + break; } const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue Chain = getRoot(); diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index bae163d..5c17a5d 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -175,7 +175,9 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { // Unary operators case ISD::FABS: return "fabs"; case ISD::FMINNUM: return "fminnum"; + case ISD::STRICT_FMINNUM: return "strict_fminnum"; case ISD::FMAXNUM: return "fmaxnum"; + case ISD::STRICT_FMAXNUM: return "strict_fmaxnum"; case ISD::FMINNUM_IEEE: return "fminnum_ieee"; case ISD::FMAXNUM_IEEE: return "fmaxnum_ieee"; case ISD::FMINIMUM: return "fminimum"; diff --git a/llvm/lib/IR/Verifier.cpp b/llvm/lib/IR/Verifier.cpp index 3c6defd..7c0381a 100644 --- a/llvm/lib/IR/Verifier.cpp +++ b/llvm/lib/IR/Verifier.cpp @@ -4104,6 +4104,8 @@ void Verifier::visitIntrinsicCallSite(Intrinsic::ID ID, CallSite CS) { case Intrinsic::experimental_constrained_log2: case Intrinsic::experimental_constrained_rint: case Intrinsic::experimental_constrained_nearbyint: + case Intrinsic::experimental_constrained_maxnum: + case Intrinsic::experimental_constrained_minnum: visitConstrainedFPIntrinsic( cast(*CS.getInstruction())); break; diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index 44c1948..ad07e9b 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -3668,6 +3668,515 @@ entry: ret <4 x double> %nearby } +define <1 x float> @constrained_vector_maxnum_v1f32() { +; NO-FMA-LABEL: constrained_vector_maxnum_v1f32: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: pushq %rax +; NO-FMA-NEXT: .cfi_def_cfa_offset 16 +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaxf +; NO-FMA-NEXT: popq %rax +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_maxnum_v1f32: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: pushq %rax +; HAS-FMA-NEXT: .cfi_def_cfa_offset 16 +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fmaxf +; HAS-FMA-NEXT: popq %rax +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %max = call <1 x float> @llvm.experimental.constrained.maxnum.v1f32( + <1 x float> , <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <1 x float> %max +} + +define <2 x double> @constrained_vector_maxnum_v2f64() { +; NO-FMA-LABEL: constrained_vector_maxnum_v2f64: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_maxnum_v2f64: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %max = call <2 x double> @llvm.experimental.constrained.maxnum.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %max +} + +define <3 x float> @constrained_vector_maxnum_v3f32() { +; NO-FMA-LABEL: constrained_vector_maxnum_v3f32: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 48 +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaxf +; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaxf +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fmaxf +; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] +; NO-FMA-NEXT: movaps %xmm1, %xmm0 +; NO-FMA-NEXT: addq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_maxnum_v3f32: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fmaxf +; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fmaxf +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fmaxf +; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; HAS-FMA-NEXT: addq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %max = call <3 x float> @llvm.experimental.constrained.maxnum.v3f32( + <3 x float> , + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x float> %max +} + +define <3 x double> @constrained_vector_max_v3f64() { +; NO-FMA-LABEL: constrained_vector_max_v3f64: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp) +; NO-FMA-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; NO-FMA-NEXT: # xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; NO-FMA-NEXT: # xmm1 = mem[0],zero +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_max_v3f64: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $56, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 64 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: vzeroupper +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; HAS-FMA-NEXT: addq $56, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %max = call <3 x double> @llvm.experimental.constrained.maxnum.v3f64( + <3 x double> , + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x double> %max +} + +define <4 x double> @constrained_vector_maxnum_v4f64() { +; NO-FMA-LABEL: constrained_vector_maxnum_v4f64: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 48 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmax +; NO-FMA-NEXT: movaps %xmm0, %xmm1 +; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] +; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; NO-FMA-NEXT: addq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_maxnum_v4f64: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmax +; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: addq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %max = call <4 x double> @llvm.experimental.constrained.maxnum.v4f64( + <4 x double> , + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x double> %max +} + +define <1 x float> @constrained_vector_minnum_v1f32() { +; NO-FMA-LABEL: constrained_vector_minnum_v1f32: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: pushq %rax +; NO-FMA-NEXT: .cfi_def_cfa_offset 16 +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fminf +; NO-FMA-NEXT: popq %rax +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_minnum_v1f32: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: pushq %rax +; HAS-FMA-NEXT: .cfi_def_cfa_offset 16 +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fminf +; HAS-FMA-NEXT: popq %rax +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %min = call <1 x float> @llvm.experimental.constrained.minnum.v1f32( + <1 x float> , <1 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <1 x float> %min +} + +define <2 x double> @constrained_vector_minnum_v2f64() { +; NO-FMA-LABEL: constrained_vector_minnum_v2f64: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_minnum_v2f64: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 32 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: addq $24, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %min = call <2 x double> @llvm.experimental.constrained.minnum.v2f64( + <2 x double> , + <2 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <2 x double> %min +} + +define <3 x float> @constrained_vector_minnum_v3f32() { +; NO-FMA-LABEL: constrained_vector_minnum_v3f32: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 48 +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fminf +; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fminf +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; NO-FMA-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; NO-FMA-NEXT: callq fminf +; NO-FMA-NEXT: movaps (%rsp), %xmm1 # 16-byte Reload +; NO-FMA-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] +; NO-FMA-NEXT: movaps %xmm1, %xmm0 +; NO-FMA-NEXT: addq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_minnum_v3f32: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fminf +; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fminf +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero +; HAS-FMA-NEXT: callq fminf +; HAS-FMA-NEXT: vmovaps (%rsp), %xmm1 # 16-byte Reload +; HAS-FMA-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[2,3] +; HAS-FMA-NEXT: vinsertps $32, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0,1],mem[0],xmm0[3] +; HAS-FMA-NEXT: addq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %min = call <3 x float> @llvm.experimental.constrained.minnum.v3f32( + <3 x float> , + <3 x float> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x float> %min +} + +define <3 x double> @constrained_vector_min_v3f64() {entry: +; NO-FMA-LABEL: constrained_vector_min_v3f64: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 32 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: movsd %xmm0, {{[0-9]+}}(%rsp) +; NO-FMA-NEXT: fldl {{[0-9]+}}(%rsp) +; NO-FMA-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Reload +; NO-FMA-NEXT: # xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd (%rsp), %xmm1 # 8-byte Reload +; NO-FMA-NEXT: # xmm1 = mem[0],zero +; NO-FMA-NEXT: addq $24, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_min_v3f64: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $56, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 64 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: vmovups %ymm0, (%rsp) # 32-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: vzeroupper +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vmovups (%rsp), %ymm1 # 32-byte Reload +; HAS-FMA-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 +; HAS-FMA-NEXT: addq $56, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq + %min = call <3 x double> @llvm.experimental.constrained.minnum.v3f64( + <3 x double> , + <3 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <3 x double> %min +} + +define <4 x double> @constrained_vector_minnum_v4f64() { +; NO-FMA-LABEL: constrained_vector_minnum_v4f64: +; NO-FMA: # %bb.0: # %entry +; NO-FMA-NEXT: subq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 48 +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; NO-FMA-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; NO-FMA-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero +; NO-FMA-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero +; NO-FMA-NEXT: callq fmin +; NO-FMA-NEXT: movaps %xmm0, %xmm1 +; NO-FMA-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload +; NO-FMA-NEXT: # xmm1 = xmm1[0],mem[0] +; NO-FMA-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload +; NO-FMA-NEXT: addq $40, %rsp +; NO-FMA-NEXT: .cfi_def_cfa_offset 8 +; NO-FMA-NEXT: retq +; +; HAS-FMA-LABEL: constrained_vector_minnum_v4f64: +; HAS-FMA: # %bb.0: # %entry +; HAS-FMA-NEXT: subq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 48 +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vunpcklpd (%rsp), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero +; HAS-FMA-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero +; HAS-FMA-NEXT: callq fmin +; HAS-FMA-NEXT: vunpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: # xmm0 = xmm0[0],mem[0] +; HAS-FMA-NEXT: vinsertf128 $1, (%rsp), %ymm0, %ymm0 # 16-byte Folded Reload +; HAS-FMA-NEXT: addq $40, %rsp +; HAS-FMA-NEXT: .cfi_def_cfa_offset 8 +; HAS-FMA-NEXT: retq +entry: + %min = call <4 x double> @llvm.experimental.constrained.minnum.v4f64( + <4 x double> , + <4 x double> , + metadata !"round.dynamic", + metadata !"fpexcept.strict") + ret <4 x double> %min +} + ; Single width declarations declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata) @@ -3688,6 +4197,8 @@ declare <2 x double> @llvm.experimental.constrained.log10.v2f64(<2 x double>, me declare <2 x double> @llvm.experimental.constrained.log2.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.rint.v2f64(<2 x double>, metadata, metadata) declare <2 x double> @llvm.experimental.constrained.nearbyint.v2f64(<2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.maxnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) +declare <2 x double> @llvm.experimental.constrained.minnum.v2f64(<2 x double>, <2 x double>, metadata, metadata) ; Scalar width declarations declare <1 x float> @llvm.experimental.constrained.fadd.v1f32(<1 x float>, <1 x float>, metadata, metadata) @@ -3708,6 +4219,8 @@ declare <1 x float> @llvm.experimental.constrained.log10.v1f32(<1 x float>, meta declare <1 x float> @llvm.experimental.constrained.log2.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.rint.v1f32(<1 x float>, metadata, metadata) declare <1 x float> @llvm.experimental.constrained.nearbyint.v1f32(<1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.maxnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) +declare <1 x float> @llvm.experimental.constrained.minnum.v1f32(<1 x float>, <1 x float>, metadata, metadata) ; Illegal width declarations declare <3 x float> @llvm.experimental.constrained.fadd.v3f32(<3 x float>, <3 x float>, metadata, metadata) @@ -3746,6 +4259,10 @@ declare <3 x float> @llvm.experimental.constrained.rint.v3f32(<3 x float>, metad declare <3 x double> @llvm.experimental.constrained.rint.v3f64(<3 x double>, metadata, metadata) declare <3 x float> @llvm.experimental.constrained.nearbyint.v3f32(<3 x float>, metadata, metadata) declare <3 x double> @llvm.experimental.constrained.nearbyint.v3f64(<3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.maxnum.v3f32(<3 x float>, <3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.maxnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) +declare <3 x float> @llvm.experimental.constrained.minnum.v3f32(<3 x float>, <3 x float>, metadata, metadata) +declare <3 x double> @llvm.experimental.constrained.minnum.v3f64(<3 x double>, <3 x double>, metadata, metadata) ; Double width declarations declare <4 x double> @llvm.experimental.constrained.fadd.v4f64(<4 x double>, <4 x double>, metadata, metadata) @@ -3767,3 +4284,5 @@ declare <4 x double> @llvm.experimental.constrained.log10.v4f64(<4 x double>, me declare <4 x double> @llvm.experimental.constrained.log2.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.rint.v4f64(<4 x double>, metadata, metadata) declare <4 x double> @llvm.experimental.constrained.nearbyint.v4f64(<4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.maxnum.v4f64(<4 x double>, <4 x double>, metadata, metadata) +declare <4 x double> @llvm.experimental.constrained.minnum.v4f64(<4 x double>, <4 x double>, metadata, metadata)