From cb7e4e8193f3504073932f9e9337fec6ab7675df Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 3 Jun 2019 13:02:07 +0000 Subject: [PATCH] [SelectionDAG] Add [us]itofp(undef) --> 0 constant fold (PR39205) We were missing this fold in the DAG, which I've copied directly from llvm::ConstantFoldCastInstruction Differential Revision: https://reviews.llvm.org/D62807 llvm-svn: 362397 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 8 ++ llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 6 ++ llvm/test/CodeGen/X86/avx512-cvt-widen.ll | 28 ++++-- llvm/test/CodeGen/X86/avx512-cvt.ll | 28 ++++-- llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll | 130 ++++++++----------------- llvm/test/CodeGen/X86/vec_int_to_fp.ll | 130 ++++++++----------------- 6 files changed, 126 insertions(+), 204 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 27e56a3..a4c1e98 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -12440,6 +12440,10 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); + // [us]itofp(undef) = 0, because the result value is bounded. + if (N0.isUndef()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + // fold (sint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values @@ -12497,6 +12501,10 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { EVT VT = N->getValueType(0); EVT OpVT = N0.getValueType(); + // [us]itofp(undef) = 0, because the result value is bounded. + if (N0.isUndef()) + return DAG.getConstantFP(0.0, SDLoc(N), VT); + // fold (uint_to_fp c1) -> c1fp if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && // ...but only if the target supports immediate floating-point values diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d6d8cf5..1dc9d74 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -4440,6 +4440,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT, if (Operand.isUndef()) return getUNDEF(VT); break; + case ISD::SINT_TO_FP: + case ISD::UINT_TO_FP: + // [us]itofp(undef) = 0, because the result value is bounded. + if (Operand.isUndef()) + return getConstantFP(0.0, DL, VT); + break; case ISD::SIGN_EXTEND: assert(VT.isInteger() && Operand.getValueType().isInteger() && "Invalid SIGN_EXTEND!"); diff --git a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll index 90631ba..eddd003 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt-widen.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt-widen.ll @@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64> %a) { } define <2 x float> @sltof2f32(<2 x i64> %a) { -; NODQ-LABEL: sltof2f32: -; NODQ: # %bb.0: -; NODQ-NEXT: vpextrq $1, %xmm0, %rax -; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 -; NODQ-NEXT: vmovq %xmm0, %rax -; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] -; NODQ-NEXT: retq +; NOVLDQ-LABEL: sltof2f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax +; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; NOVLDQ-NEXT: vmovq %xmm0, %rax +; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 +; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sltof2f32: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 ; VLDQ-NEXT: retq ; +; VLNODQ-LABEL: sltof2f32: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax +; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; VLNODQ-NEXT: vmovq %xmm0, %rax +; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 +; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; VLNODQ-NEXT: retq +; ; DQNOVL-LABEL: sltof2f32: ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 diff --git a/llvm/test/CodeGen/X86/avx512-cvt.ll b/llvm/test/CodeGen/X86/avx512-cvt.ll index 2b51126..c42be0d 100644 --- a/llvm/test/CodeGen/X86/avx512-cvt.ll +++ b/llvm/test/CodeGen/X86/avx512-cvt.ll @@ -120,22 +120,30 @@ define <2 x double> @slto2f64(<2 x i64> %a) { } define <2 x float> @sltof2f32(<2 x i64> %a) { -; NODQ-LABEL: sltof2f32: -; NODQ: # %bb.0: -; NODQ-NEXT: vpextrq $1, %xmm0, %rax -; NODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 -; NODQ-NEXT: vmovq %xmm0, %rax -; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; NODQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; NODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; NODQ-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] -; NODQ-NEXT: retq +; NOVLDQ-LABEL: sltof2f32: +; NOVLDQ: # %bb.0: +; NOVLDQ-NEXT: vpextrq $1, %xmm0, %rax +; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; NOVLDQ-NEXT: vmovq %xmm0, %rax +; NOVLDQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 +; NOVLDQ-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; NOVLDQ-NEXT: retq ; ; VLDQ-LABEL: sltof2f32: ; VLDQ: # %bb.0: ; VLDQ-NEXT: vcvtqq2ps %xmm0, %xmm0 ; VLDQ-NEXT: retq ; +; VLNODQ-LABEL: sltof2f32: +; VLNODQ: # %bb.0: +; VLNODQ-NEXT: vpextrq $1, %xmm0, %rax +; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 +; VLNODQ-NEXT: vmovq %xmm0, %rax +; VLNODQ-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 +; VLNODQ-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; VLNODQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero +; VLNODQ-NEXT: retq +; ; DQNOVL-LABEL: sltof2f32: ; DQNOVL: # %bb.0: ; DQNOVL-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll index 78bc214..26027bc 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp-widen.ll @@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_2i64_to_4f32: @@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_2i64_to_4f32: @@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_2i64_to_4f32: @@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0] -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_4i64_to_4f32_undef: @@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0 -; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: cvtsi2ss %rax, %xmm1 -; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_4i64_to_4f32_undef: @@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_4i64_to_4f32_undef: @@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef: @@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef: @@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX-NEXT: js .LBB39_4 ; VEX-NEXT: # %bb.5: ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: jmp .LBB39_6 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: retq ; VEX-NEXT: .LBB39_4: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx @@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; VEX-NEXT: .LBB39_6: -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; VEX-NEXT: js .LBB39_8 -; VEX-NEXT: # %bb.7: -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB39_8: -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i64_to_4f32: @@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_2i64_to_4f32: @@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_2i64_to_4f32: @@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE2-LABEL: uitofp_4i64_to_4f32_undef: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB41_1 ; SSE2-NEXT: # %bb.2: -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 ; SSE2-NEXT: jmp .LBB41_3 ; SSE2-NEXT: .LBB41_1: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: addss %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: addss %xmm1, %xmm1 ; SSE2-NEXT: .LBB41_3: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: movq %xmm1, %rax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB41_4 ; SSE2-NEXT: # %bb.5: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 ; SSE2-NEXT: jmp .LBB41_6 ; SSE2-NEXT: .LBB41_4: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: addss %xmm1, %xmm1 +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: addss %xmm0, %xmm0 ; SSE2-NEXT: .LBB41_6: -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: js .LBB41_8 -; SSE2-NEXT: # %bb.7: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: .LBB41_8: -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; SSE2-NEXT: retq ; ; SSE41-LABEL: uitofp_4i64_to_4f32_undef: @@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE41-NEXT: # %bb.5: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0 -; SSE41-NEXT: jmp .LBB41_6 +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; SSE41-NEXT: retq ; SSE41-NEXT: .LBB41_4: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx @@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0 ; SSE41-NEXT: addss %xmm0, %xmm0 -; SSE41-NEXT: .LBB41_6: -; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: js .LBB41_8 -; SSE41-NEXT: # %bb.7: -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: cvtsi2ss %rax, %xmm1 -; SSE41-NEXT: .LBB41_8: -; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; SSE41-NEXT: retq ; ; VEX-LABEL: uitofp_4i64_to_4f32_undef: @@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX-NEXT: js .LBB41_4 ; VEX-NEXT: # %bb.5: ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: jmp .LBB41_6 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: retq ; VEX-NEXT: .LBB41_4: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx @@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; VEX-NEXT: .LBB41_6: -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; VEX-NEXT: js .LBB41_8 -; VEX-NEXT: # %bb.7: -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB41_8: -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: uitofp_4i64_to_4f32_undef: @@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef: @@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef: diff --git a/llvm/test/CodeGen/X86/vec_int_to_fp.ll b/llvm/test/CodeGen/X86/vec_int_to_fp.ll index 9b54307..f7d8216 100644 --- a/llvm/test/CodeGen/X86/vec_int_to_fp.ll +++ b/llvm/test/CodeGen/X86/vec_int_to_fp.ll @@ -1186,9 +1186,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_2i64_to_4f32: @@ -1197,9 +1195,7 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_2i64_to_4f32: @@ -1208,9 +1204,8 @@ define <4 x float> @sitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_2i64_to_4f32: @@ -1309,11 +1304,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: xorps %xmm0, %xmm0 ; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[0,0] -; SSE2-NEXT: movaps %xmm1, %xmm0 +; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; SSE2-NEXT: retq ; ; SSE41-LABEL: sitofp_4i64_to_4f32_undef: @@ -1323,10 +1315,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE41-NEXT: movq %xmm0, %rax ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0 -; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: cvtsi2ss %rax, %xmm1 -; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; SSE41-NEXT: retq ; ; VEX-LABEL: sitofp_4i64_to_4f32_undef: @@ -1335,9 +1324,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; VEX-NEXT: vmovq %xmm0, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: sitofp_4i64_to_4f32_undef: @@ -1346,9 +1333,7 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: sitofp_4i64_to_4f32_undef: @@ -1357,9 +1342,8 @@ define <4 x float> @sitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: sitofp_4i64_to_4f32_undef: @@ -1918,7 +1902,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX-NEXT: js .LBB39_4 ; VEX-NEXT: # %bb.5: ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: jmp .LBB39_6 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: retq ; VEX-NEXT: .LBB39_4: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx @@ -1926,15 +1911,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; VEX-NEXT: .LBB39_6: -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; VEX-NEXT: js .LBB39_8 -; VEX-NEXT: # %bb.7: -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB39_8: -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: uitofp_2i64_to_4f32: @@ -1943,9 +1920,7 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_2i64_to_4f32: @@ -1954,9 +1929,8 @@ define <4 x float> @uitofp_2i64_to_4f32(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_2i64_to_4f32: @@ -2126,49 +2100,39 @@ define <4 x float> @uitofp_2i64_to_2f32(<2 x i64> %a) { define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE2-LABEL: uitofp_4i64_to_4f32_undef: ; SSE2: # %bb.0: -; SSE2-NEXT: movdqa %xmm0, %xmm1 ; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB41_1 ; SSE2-NEXT: # %bb.2: -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 ; SSE2-NEXT: jmp .LBB41_3 ; SSE2-NEXT: .LBB41_1: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm0, %xmm0 -; SSE2-NEXT: cvtsi2ss %rax, %xmm0 -; SSE2-NEXT: addss %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: addss %xmm1, %xmm1 ; SSE2-NEXT: .LBB41_3: -; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] -; SSE2-NEXT: movq %xmm1, %rax +; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] +; SSE2-NEXT: movq %xmm0, %rax ; SSE2-NEXT: testq %rax, %rax ; SSE2-NEXT: js .LBB41_4 ; SSE2-NEXT: # %bb.5: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 ; SSE2-NEXT: jmp .LBB41_6 ; SSE2-NEXT: .LBB41_4: ; SSE2-NEXT: movq %rax, %rcx ; SSE2-NEXT: shrq %rcx ; SSE2-NEXT: andl $1, %eax ; SSE2-NEXT: orq %rcx, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: addss %xmm1, %xmm1 +; SSE2-NEXT: xorps %xmm0, %xmm0 +; SSE2-NEXT: cvtsi2ss %rax, %xmm0 +; SSE2-NEXT: addss %xmm0, %xmm0 ; SSE2-NEXT: .LBB41_6: -; SSE2-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] -; SSE2-NEXT: testq %rax, %rax -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: js .LBB41_8 -; SSE2-NEXT: # %bb.7: -; SSE2-NEXT: xorps %xmm1, %xmm1 -; SSE2-NEXT: cvtsi2ss %rax, %xmm1 -; SSE2-NEXT: .LBB41_8: -; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE2-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] +; SSE2-NEXT: movq {{.*#+}} xmm0 = xmm1[0],zero ; SSE2-NEXT: retq ; ; SSE41-LABEL: uitofp_4i64_to_4f32_undef: @@ -2193,7 +2157,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE41-NEXT: # %bb.5: ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0 -; SSE41-NEXT: jmp .LBB41_6 +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; SSE41-NEXT: retq ; SSE41-NEXT: .LBB41_4: ; SSE41-NEXT: movq %rax, %rcx ; SSE41-NEXT: shrq %rcx @@ -2202,16 +2167,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; SSE41-NEXT: xorps %xmm0, %xmm0 ; SSE41-NEXT: cvtsi2ss %rax, %xmm0 ; SSE41-NEXT: addss %xmm0, %xmm0 -; SSE41-NEXT: .LBB41_6: -; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; SSE41-NEXT: testq %rax, %rax -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: js .LBB41_8 -; SSE41-NEXT: # %bb.7: -; SSE41-NEXT: xorps %xmm1, %xmm1 -; SSE41-NEXT: cvtsi2ss %rax, %xmm1 -; SSE41-NEXT: .LBB41_8: -; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; SSE41-NEXT: insertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; SSE41-NEXT: retq ; ; VEX-LABEL: uitofp_4i64_to_4f32_undef: @@ -2235,7 +2191,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX-NEXT: js .LBB41_4 ; VEX-NEXT: # %bb.5: ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 -; VEX-NEXT: jmp .LBB41_6 +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero +; VEX-NEXT: retq ; VEX-NEXT: .LBB41_4: ; VEX-NEXT: movq %rax, %rcx ; VEX-NEXT: shrq %rcx @@ -2243,15 +2200,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; VEX-NEXT: orq %rcx, %rax ; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm0 ; VEX-NEXT: vaddss %xmm0, %xmm0, %xmm0 -; VEX-NEXT: .LBB41_6: -; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; VEX-NEXT: testq %rax, %rax -; VEX-NEXT: vxorps %xmm1, %xmm1, %xmm1 -; VEX-NEXT: js .LBB41_8 -; VEX-NEXT: # %bb.7: -; VEX-NEXT: vcvtsi2ss %rax, %xmm2, %xmm1 -; VEX-NEXT: .LBB41_8: -; VEX-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; VEX-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; VEX-NEXT: retq ; ; AVX512F-LABEL: uitofp_4i64_to_4f32_undef: @@ -2260,9 +2209,7 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512F-NEXT: vmovq %xmm0, %rax ; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512F-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512F-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: uitofp_4i64_to_4f32_undef: @@ -2271,9 +2218,8 @@ define <4 x float> @uitofp_4i64_to_4f32_undef(<2 x i64> %a) { ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm1, %xmm1 ; AVX512VL-NEXT: vmovq %xmm0, %rax ; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm0 -; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[2,3] -; AVX512VL-NEXT: vcvtusi2ss %rax, %xmm2, %xmm1 -; AVX512VL-NEXT: vshufps {{.*#+}} xmm0 = xmm0[0,1],xmm1[0,0] +; AVX512VL-NEXT: vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] +; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero ; AVX512VL-NEXT: retq ; ; AVX512DQ-LABEL: uitofp_4i64_to_4f32_undef: -- 2.7.4