From c06e53119b1f04696fbcf710aaa0818cbfc99600 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 24 Dec 2019 11:08:06 -0800 Subject: [PATCH] [X86] Use 128-bit vector instructions for f32/f64->i64 conversions on 32-bit targets with avx512dq and avx512vl instructions. On 32-bit targets we can't use the scalar instruction so we insert the scalar into a vector and use packed conversions. Previously we used either v4f32->v4i64 or v4f64->v4i64 to avoid some complexity creating target specific ISD opcodes for v4f32->v2i64. But this causes extra vzeroupper instructions and possibly frequency throttling on Intel CPUs. This patch changes this to create a 128-bit vector and uses a target specific ISD opcode if needed. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 21 ++++++++++++++------- llvm/test/CodeGen/X86/scalar-fp-to-i64.ll | 24 ++++++++---------------- 2 files changed, 22 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index c6f834c..2e7d3d8 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -28690,12 +28690,19 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, if (Subtarget.hasDQI() && VT == MVT::i64 && (SrcVT == MVT::f32 || SrcVT == MVT::f64)) { assert(!Subtarget.is64Bit() && "i64 should be legal"); - unsigned NumElts = Subtarget.hasVLX() ? 4 : 8; - // Using a 256-bit input here to guarantee 128-bit input for f32 case. - // TODO: Use 128-bit vectors for f64 case? - // TODO: Use 128-bit vectors for f32 by using CVTTP2SI/CVTTP2UI. + unsigned NumElts = Subtarget.hasVLX() ? 2 : 8; + // If we use a 128-bit result we might need to use a target specific node. + unsigned SrcElts = + std::max(NumElts, 128U / (unsigned)SrcVT.getSizeInBits()); MVT VecVT = MVT::getVectorVT(MVT::i64, NumElts); - MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), NumElts); + MVT VecInVT = MVT::getVectorVT(SrcVT.getSimpleVT(), SrcElts); + unsigned Opc = N->getOpcode(); + if (NumElts != SrcElts) { + if (IsStrict) + Opc = IsSigned ? X86ISD::STRICT_CVTTP2SI : X86ISD::STRICT_CVTTP2UI; + else + Opc = IsSigned ? X86ISD::CVTTP2SI : X86ISD::CVTTP2UI; + } SDValue ZeroIdx = DAG.getIntPtrConstant(0, dl); SDValue Res = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecInVT, @@ -28704,10 +28711,10 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, SDValue Chain; if (IsStrict) { SDVTList Tys = DAG.getVTList(VecVT, MVT::Other); - Res = DAG.getNode(N->getOpcode(), SDLoc(N), Tys, N->getOperand(0), Res); + Res = DAG.getNode(Opc, SDLoc(N), Tys, N->getOperand(0), Res); Chain = Res.getValue(1); } else - Res = DAG.getNode(N->getOpcode(), SDLoc(N), VecVT, Res); + Res = DAG.getNode(Opc, SDLoc(N), VecVT, Res); Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, Res, ZeroIdx); Results.push_back(Res); if (IsStrict) diff --git a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll index 6164ebe..30e4996 100644 --- a/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/llvm/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -42,19 +42,17 @@ define i64 @f_to_u64(float %a) nounwind { ; AVX512DQVL_32_WIN-LABEL: f_to_u64: ; AVX512DQVL_32_WIN: # %bb.0: ; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vcvttps2uqq %xmm0, %xmm0 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_WIN-NEXT: vzeroupper ; AVX512DQVL_32_WIN-NEXT: retl ; ; AVX512DQVL_32_LIN-LABEL: f_to_u64: ; AVX512DQVL_32_LIN: # %bb.0: ; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vcvttps2uqq %xmm0, %xmm0 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_LIN-NEXT: vzeroupper ; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: f_to_u64: @@ -337,19 +335,17 @@ define i64 @f_to_s64(float %a) nounwind { ; AVX512DQVL_32_WIN-LABEL: f_to_s64: ; AVX512DQVL_32_WIN: # %bb.0: ; AVX512DQVL_32_WIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vcvttps2qq %xmm0, %xmm0 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_WIN-NEXT: vzeroupper ; AVX512DQVL_32_WIN-NEXT: retl ; ; AVX512DQVL_32_LIN-LABEL: f_to_s64: ; AVX512DQVL_32_LIN: # %bb.0: ; AVX512DQVL_32_LIN-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero -; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vcvttps2qq %xmm0, %xmm0 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_LIN-NEXT: vzeroupper ; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: f_to_s64: @@ -524,19 +520,17 @@ define i64 @d_to_u64(double %a) nounwind { ; AVX512DQVL_32_WIN-LABEL: d_to_u64: ; AVX512DQVL_32_WIN: # %bb.0: ; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %ymm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vcvttpd2uqq %xmm0, %xmm0 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_WIN-NEXT: vzeroupper ; AVX512DQVL_32_WIN-NEXT: retl ; ; AVX512DQVL_32_LIN-LABEL: d_to_u64: ; AVX512DQVL_32_LIN: # %bb.0: ; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %ymm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vcvttpd2uqq %xmm0, %xmm0 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_LIN-NEXT: vzeroupper ; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: d_to_u64: @@ -819,19 +813,17 @@ define i64 @d_to_s64(double %a) nounwind { ; AVX512DQVL_32_WIN-LABEL: d_to_s64: ; AVX512DQVL_32_WIN: # %bb.0: ; AVX512DQVL_32_WIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %ymm0, %ymm0 +; AVX512DQVL_32_WIN-NEXT: vcvttpd2qq %xmm0, %xmm0 ; AVX512DQVL_32_WIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_WIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_WIN-NEXT: vzeroupper ; AVX512DQVL_32_WIN-NEXT: retl ; ; AVX512DQVL_32_LIN-LABEL: d_to_s64: ; AVX512DQVL_32_LIN: # %bb.0: ; AVX512DQVL_32_LIN-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %ymm0, %ymm0 +; AVX512DQVL_32_LIN-NEXT: vcvttpd2qq %xmm0, %xmm0 ; AVX512DQVL_32_LIN-NEXT: vmovd %xmm0, %eax ; AVX512DQVL_32_LIN-NEXT: vpextrd $1, %xmm0, %edx -; AVX512DQVL_32_LIN-NEXT: vzeroupper ; AVX512DQVL_32_LIN-NEXT: retl ; ; AVX512_64-LABEL: d_to_s64: -- 2.7.4