From 8238580aae31a222133fa52d3d9f945b64a3e2ec Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Tue, 11 Sep 2018 17:57:27 +0000 Subject: [PATCH] [X86] Prefer unpckhpd over movhlps in isel for fake unary cases In r337348, I changed lowering to prefer X86ISD::UNPCKL/UNPCKH opcodes over MOVLHPS/MOVHLPS for v2f64 {0,0} and {1,1} shuffles when we have SSE2. This enabled the removal of a bunch of weirdly bitcasted isel patterns in r337349. To avoid changing the tests I placed a gross hack in isel to still emit movhlps instructions for fake unary unpckh nodes. A similar hack was not needed for unpckl and movlhps because we do execution domain switching for those. But unpckh and movhlps have swapped operand order. This patch removes the hack. This is a code size increase since unpckhpd requires a 0x66 prefix and movhlps does not. But if that's a big concern we should be using movhlps for all unpckhpd opcodes and let commuteInstruction turnit into unpckhpd when its an advantage. Differential Revision: https://reviews.llvm.org/D49499 llvm-svn: 341973 --- llvm/lib/Target/X86/X86InstrSSE.td | 13 --- llvm/test/CodeGen/X86/buildvec-insertvec.ll | 2 +- llvm/test/CodeGen/X86/combine-fcopysign.ll | 6 +- llvm/test/CodeGen/X86/complex-fastmath.ll | 16 ++-- llvm/test/CodeGen/X86/ftrunc.ll | 26 +++--- llvm/test/CodeGen/X86/haddsub-2.ll | 12 +-- llvm/test/CodeGen/X86/haddsub-3.ll | 4 +- llvm/test/CodeGen/X86/haddsub-undef.ll | 6 +- llvm/test/CodeGen/X86/pr11334.ll | 2 +- llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll | 4 +- llvm/test/CodeGen/X86/vec_fp_to_int.ll | 6 +- .../X86/vector-constrained-fp-intrinsics.ll | 10 +-- llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll | 66 +++++++-------- llvm/test/CodeGen/X86/vector-reduce-fadd.ll | 84 +++++++++---------- llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll | 22 ++--- llvm/test/CodeGen/X86/vector-reduce-fmax.ll | 22 ++--- llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll | 22 ++--- llvm/test/CodeGen/X86/vector-reduce-fmin.ll | 22 ++--- llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll | 66 +++++++-------- llvm/test/CodeGen/X86/vector-reduce-fmul.ll | 96 +++++++++++----------- llvm/test/CodeGen/X86/widen_conv-3.ll | 2 +- llvm/test/CodeGen/X86/widen_conv-4.ll | 4 +- 22 files changed, 250 insertions(+), 263 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td index db5cfff..359089a 100644 --- a/llvm/lib/Target/X86/X86InstrSSE.td +++ b/llvm/lib/Target/X86/X86InstrSSE.td @@ -820,19 +820,6 @@ let Constraints = "$src1 = $dst" in { Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable; } -// TODO: This is largely to trick fastisel into ignoring the pattern. -def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2), - (X86Unpckh node:$src1, node:$src2), [{ - return N->getOperand(0) == N->getOperand(1); -}]>; - -let Predicates = [UseSSE2] in { - // TODO: This is a hack pattern to allow lowering to emit unpckh instead of - // movhlps for sse2 without changing a bunch of tests. - def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)), - (MOVHLPSrr VR128:$src, VR128:$src)>; -} - //===----------------------------------------------------------------------===// // SSE 1 & 2 - Conversion Instructions //===----------------------------------------------------------------------===// diff --git a/llvm/test/CodeGen/X86/buildvec-insertvec.ll b/llvm/test/CodeGen/X86/buildvec-insertvec.ll index 882814b..ce7614c 100644 --- a/llvm/test/CodeGen/X86/buildvec-insertvec.ll +++ b/llvm/test/CodeGen/X86/buildvec-insertvec.ll @@ -38,7 +38,7 @@ define <4 x float> @test_negative_zero_1(<4 x float> %A) { ; SSE2-LABEL: test_negative_zero_1: ; SSE2: # %bb.0: # %entry ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: xorps %xmm2, %xmm2 ; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3] ; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero diff --git a/llvm/test/CodeGen/X86/combine-fcopysign.ll b/llvm/test/CodeGen/X86/combine-fcopysign.ll index 72b7f76..2c781de 100644 --- a/llvm/test/CodeGen/X86/combine-fcopysign.ll +++ b/llvm/test/CodeGen/X86/combine-fcopysign.ll @@ -197,7 +197,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float ; SSE-NEXT: cvtss2sd %xmm2, %xmm4 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3] ; SSE-NEXT: movaps %xmm2, %xmm6 -; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1],xmm2[1] ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3] ; SSE-NEXT: movaps {{.*#+}} xmm7 ; SSE-NEXT: movaps %xmm0, %xmm2 @@ -213,7 +213,7 @@ define <4 x double> @combine_vec_fcopysign_fpext_sgn(<4 x double> %x, <4 x float ; SSE-NEXT: orps %xmm0, %xmm4 ; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0] ; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: andps %xmm7, %xmm0 ; SSE-NEXT: cvtss2sd %xmm3, %xmm3 ; SSE-NEXT: andps %xmm8, %xmm3 @@ -260,7 +260,7 @@ define <4 x float> @combine_vec_fcopysign_fptrunc_sgn(<4 x float> %x, <4 x doubl ; SSE-NEXT: orps %xmm6, %xmm1 ; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] ; SSE-NEXT: movaps %xmm3, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE-NEXT: andps %xmm5, %xmm1 ; SSE-NEXT: xorps %xmm6, %xmm6 ; SSE-NEXT: cvtsd2ss %xmm2, %xmm6 diff --git a/llvm/test/CodeGen/X86/complex-fastmath.ll b/llvm/test/CodeGen/X86/complex-fastmath.ll index bae1ac3..d61c124 100644 --- a/llvm/test/CodeGen/X86/complex-fastmath.ll +++ b/llvm/test/CodeGen/X86/complex-fastmath.ll @@ -57,9 +57,9 @@ define <2 x float> @complex_square_f32(<2 x float>) #0 { define <2 x double> @complex_square_f64(<2 x double>) #0 { ; SSE-LABEL: complex_square_f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE-NEXT: movaps %xmm0, %xmm2 +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE-NEXT: movapd %xmm0, %xmm2 ; SSE-NEXT: addsd %xmm0, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: mulsd %xmm0, %xmm0 @@ -160,11 +160,11 @@ define <2 x float> @complex_mul_f32(<2 x float>, <2 x float>) #0 { define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 { ; SSE-LABEL: complex_mul_f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] -; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] -; SSE-NEXT: movaps %xmm3, %xmm4 +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] +; SSE-NEXT: movapd %xmm1, %xmm3 +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] +; SSE-NEXT: movapd %xmm3, %xmm4 ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm0 ; SSE-NEXT: mulsd %xmm2, %xmm1 diff --git a/llvm/test/CodeGen/X86/ftrunc.ll b/llvm/test/CodeGen/X86/ftrunc.ll index 1b0bcbf..c0e1674 100644 --- a/llvm/test/CodeGen/X86/ftrunc.ll +++ b/llvm/test/CodeGen/X86/ftrunc.ll @@ -67,7 +67,7 @@ define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 { ; SSE2-NEXT: cvttss2si %xmm1, %rax ; SSE2-NEXT: movd %eax, %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: cvttss2si %xmm2, %rax ; SSE2-NEXT: movd %eax, %xmm2 ; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -105,10 +105,10 @@ define <4 x float> @trunc_unsigned_v4f32(<4 x float> %x) #0 { define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v2f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rax ; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000 @@ -116,7 +116,7 @@ define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { ; SSE2-NEXT: cvttsd2si %xmm1, %rdx ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rax, %rdx -; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm0, %xmm1 ; SSE2-NEXT: subsd %xmm2, %xmm1 ; SSE2-NEXT: cvttsd2si %xmm1, %rax ; SSE2-NEXT: xorq %rcx, %rax @@ -155,10 +155,10 @@ define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 { define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 { ; SSE2-LABEL: trunc_unsigned_v4f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: movapd %xmm1, %xmm3 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero -; SSE2-NEXT: movaps %xmm3, %xmm4 +; SSE2-NEXT: movapd %xmm3, %xmm4 ; SSE2-NEXT: subsd %xmm2, %xmm4 ; SSE2-NEXT: cvttsd2si %xmm4, %rcx ; SSE2-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000 @@ -166,23 +166,23 @@ define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 { ; SSE2-NEXT: cvttsd2si %xmm3, %rax ; SSE2-NEXT: ucomisd %xmm2, %xmm3 ; SSE2-NEXT: cmovaeq %rcx, %rax -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rsi ; SSE2-NEXT: xorq %rdx, %rsi ; SSE2-NEXT: cvttsd2si %xmm1, %rcx ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rsi, %rcx -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] -; SSE2-NEXT: movaps %xmm1, %xmm3 +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm3 ; SSE2-NEXT: subsd %xmm2, %xmm3 ; SSE2-NEXT: cvttsd2si %xmm3, %rsi ; SSE2-NEXT: xorq %rdx, %rsi ; SSE2-NEXT: cvttsd2si %xmm1, %rdi ; SSE2-NEXT: ucomisd %xmm2, %xmm1 ; SSE2-NEXT: cmovaeq %rsi, %rdi -; SSE2-NEXT: movaps %xmm0, %xmm1 +; SSE2-NEXT: movapd %xmm0, %xmm1 ; SSE2-NEXT: subsd %xmm2, %xmm1 ; SSE2-NEXT: cvttsd2si %xmm1, %rsi ; SSE2-NEXT: xorq %rdx, %rsi diff --git a/llvm/test/CodeGen/X86/haddsub-2.ll b/llvm/test/CodeGen/X86/haddsub-2.ll index 92a393b..a75fe12 100644 --- a/llvm/test/CodeGen/X86/haddsub-2.ll +++ b/llvm/test/CodeGen/X86/haddsub-2.ll @@ -902,7 +902,7 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; SSE-LABEL: not_a_hsub_2: ; SSE: # %bb.0: ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: movaps %xmm0, %xmm3 ; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3] ; SSE-NEXT: subss %xmm3, %xmm2 @@ -912,7 +912,7 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movaps %xmm1, %xmm2 ; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3] ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE-NEXT: subss %xmm3, %xmm2 ; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE-NEXT: subss %xmm3, %xmm1 @@ -958,11 +958,11 @@ define <4 x float> @not_a_hsub_2(<4 x float> %A, <4 x float> %B) { define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) { ; SSE-LABEL: not_a_hsub_3: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE-NEXT: movapd %xmm1, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE-NEXT: subsd %xmm2, %xmm1 -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: subsd %xmm0, %xmm2 ; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0] ; SSE-NEXT: movapd %xmm2, %xmm0 diff --git a/llvm/test/CodeGen/X86/haddsub-3.ll b/llvm/test/CodeGen/X86/haddsub-3.ll index 1a2863f..b1406fc 100644 --- a/llvm/test/CodeGen/X86/haddsub-3.ll +++ b/llvm/test/CodeGen/X86/haddsub-3.ll @@ -10,7 +10,7 @@ define float @pr26491(<4 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -19,7 +19,7 @@ define float @pr26491(<4 x float> %a0) { ; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSSE3-NEXT: addps %xmm0, %xmm1 ; SSSE3-NEXT: movaps %xmm1, %xmm0 -; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSSE3-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSSE3-NEXT: addss %xmm1, %xmm0 ; SSSE3-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/haddsub-undef.ll b/llvm/test/CodeGen/X86/haddsub-undef.ll index 004fec5..c26bf13 100644 --- a/llvm/test/CodeGen/X86/haddsub-undef.ll +++ b/llvm/test/CodeGen/X86/haddsub-undef.ll @@ -102,8 +102,8 @@ define <4 x float> @test4_undef(<4 x float> %a, <4 x float> %b) { define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) { ; SSE-LABEL: test5_undef: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: addsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -168,7 +168,7 @@ define <4 x float> @test8_undef(<4 x float> %a, <4 x float> %b) { ; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm0, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE-NEXT: addss %xmm2, %xmm0 ; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0] diff --git a/llvm/test/CodeGen/X86/pr11334.ll b/llvm/test/CodeGen/X86/pr11334.ll index d0b965a1..ea40dd3 100644 --- a/llvm/test/CodeGen/X86/pr11334.ll +++ b/llvm/test/CodeGen/X86/pr11334.ll @@ -25,7 +25,7 @@ define <3 x double> @v3f2d_ext_vec(<3 x float> %v1) nounwind { ; SSE-NEXT: cvtps2pd %xmm0, %xmm0 ; SSE-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE-NEXT: fldl -{{[0-9]+}}(%rsp) ; SSE-NEXT: movaps %xmm2, %xmm0 ; SSE-NEXT: retq diff --git a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll index d563180..8041647 100644 --- a/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll +++ b/llvm/test/CodeGen/X86/sse3-avx-addsub-2.ll @@ -432,9 +432,9 @@ define <4 x float> @test16(<4 x float> %A, <4 x float> %B) { ; SSE-NEXT: movaps %xmm0, %xmm2 ; SSE-NEXT: subss %xmm3, %xmm2 ; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE-NEXT: movaps %xmm1, %xmm5 -; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE-NEXT: subss %xmm5, %xmm4 ; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] ; SSE-NEXT: addss %xmm3, %xmm5 diff --git a/llvm/test/CodeGen/X86/vec_fp_to_int.ll b/llvm/test/CodeGen/X86/vec_fp_to_int.ll index 0d25fde..e09cd0a 100644 --- a/llvm/test/CodeGen/X86/vec_fp_to_int.ll +++ b/llvm/test/CodeGen/X86/vec_fp_to_int.ll @@ -1575,7 +1575,7 @@ define <4 x i32> @fptoui_4f32_to_4i32(<4 x float> %a) { ; SSE-NEXT: cvttss2si %xmm1, %rax ; SSE-NEXT: movd %eax, %xmm1 ; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] @@ -1858,7 +1858,7 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { ; SSE-NEXT: cvttss2si %xmm0, %rax ; SSE-NEXT: movd %eax, %xmm0 ; SSE-NEXT: movaps %xmm2, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm2[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] @@ -1874,7 +1874,7 @@ define <8 x i32> @fptoui_8f32_to_8i32(<8 x float> %a) { ; SSE-NEXT: cvttss2si %xmm2, %rax ; SSE-NEXT: movd %eax, %xmm2 ; SSE-NEXT: movaps %xmm1, %xmm3 -; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE-NEXT: cvttss2si %xmm3, %rax ; SSE-NEXT: movd %eax, %xmm3 ; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] diff --git a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll index f998cbd..1e7b594 100644 --- a/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll +++ b/llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll @@ -88,7 +88,7 @@ define <3 x double> @constrained_vector_fdiv_v3f64() { ; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -473,7 +473,7 @@ define <3 x double> @constrained_vector_fmul_v3f64() { ; NO-FMA-NEXT: mulsd {{.*}}(%rip), %xmm1 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -604,7 +604,7 @@ define <3 x double> @constrained_vector_fadd_v3f64() { ; NO-FMA-NEXT: addsd {{.*}}(%rip), %xmm1 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -737,7 +737,7 @@ define <3 x double> @constrained_vector_fsub_v3f64() { ; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; @@ -1210,7 +1210,7 @@ define <3 x double> @constrained_vector_sqrt_v3f64() { ; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0 ; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: movapd %xmm0, %xmm1 -; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp) ; NO-FMA-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll index 46769d0..e126d90 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fadd-fast.ll @@ -41,7 +41,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -51,7 +51,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -78,7 +78,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -89,7 +89,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -126,7 +126,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: addps %xmm3, %xmm1 ; SSE2-NEXT: addps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -139,7 +139,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: addps %xmm3, %xmm1 ; SSE41-NEXT: addps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 -; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: haddps %xmm0, %xmm0 ; SSE41-NEXT: retq @@ -208,7 +208,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32_zero: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -218,7 +218,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32_zero: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -246,7 +246,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -257,7 +257,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -295,7 +295,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE2-NEXT: addps %xmm2, %xmm0 ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -308,7 +308,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE41-NEXT: addps %xmm2, %xmm0 ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -378,7 +378,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -388,7 +388,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -416,7 +416,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -427,7 +427,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -465,7 +465,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: addps %xmm2, %xmm0 ; SSE2-NEXT: addps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -478,7 +478,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: addps %xmm2, %xmm0 ; SSE41-NEXT: addps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addps %xmm0, %xmm1 ; SSE41-NEXT: haddps %xmm1, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm0 @@ -520,8 +520,8 @@ define float @test_v16f32_undef(<16 x float> %a0) { define double @test_v2f64(double %a0, <2 x double> %a1) { ; SSE2-LABEL: test_v2f64: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: movapd %xmm1, %xmm0 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -549,7 +549,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) { ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm2, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -588,7 +588,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) { ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm2, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -637,7 +637,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { ; SSE2-NEXT: addpd %xmm2, %xmm4 ; SSE2-NEXT: addpd %xmm1, %xmm4 ; SSE2-NEXT: movapd %xmm4, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1] ; SSE2-NEXT: addpd %xmm4, %xmm0 ; SSE2-NEXT: retq ; @@ -689,8 +689,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { define double @test_v2f64_zero(<2 x double> %a0) { ; SSE2-LABEL: test_v2f64_zero: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -718,7 +718,7 @@ define double @test_v4f64_zero(<4 x double> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -757,7 +757,7 @@ define double @test_v8f64_zero(<8 x double> %a0) { ; SSE2-NEXT: addpd %xmm2, %xmm0 ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -806,7 +806,7 @@ define double @test_v16f64_zero(<16 x double> %a0) { ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; @@ -858,8 +858,8 @@ define double @test_v16f64_zero(<16 x double> %a0) { define double @test_v2f64_undef(<2 x double> %a0) { ; SSE2-LABEL: test_v2f64_undef: ; SSE2: # %bb.0: -; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: movapd %xmm0, %xmm1 +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -887,7 +887,7 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -926,7 +926,7 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE2-NEXT: addpd %xmm2, %xmm0 ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: movapd %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 ; SSE2-NEXT: retq @@ -975,7 +975,7 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE2-NEXT: addpd %xmm3, %xmm1 ; SSE2-NEXT: addpd %xmm0, %xmm1 ; SSE2-NEXT: movapd %xmm1, %xmm0 -; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE2-NEXT: addpd %xmm1, %xmm0 ; SSE2-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll index 24ef206..f025c57 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fadd.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fadd.ll @@ -50,7 +50,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -62,7 +62,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -101,7 +101,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: addss %xmm3, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -110,7 +110,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -122,7 +122,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm3, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE41-NEXT: addss %xmm3, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -130,7 +130,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -187,7 +187,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm5, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE2-NEXT: addss %xmm5, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -196,7 +196,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -205,7 +205,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -214,7 +214,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm4, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 @@ -226,7 +226,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm5, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE41-NEXT: addss %xmm5, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -234,7 +234,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -242,7 +242,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -250,7 +250,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm4, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE41-NEXT: addss %xmm4, %xmm0 @@ -379,7 +379,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: addss %xmm2, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -392,7 +392,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: addss %xmm2, %xmm1 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -434,7 +434,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm2, %xmm3 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: addss %xmm3, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -443,7 +443,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -456,7 +456,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm3 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: addss %xmm3, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -464,7 +464,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -524,7 +524,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3] ; SSE2-NEXT: addss %xmm4, %xmm5 ; SSE2-NEXT: movaps %xmm0, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE2-NEXT: addss %xmm5, %xmm4 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 @@ -533,7 +533,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -542,7 +542,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -551,7 +551,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -564,7 +564,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm5 ; SSE41-NEXT: movaps %xmm0, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE41-NEXT: addss %xmm5, %xmm4 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm4, %xmm0 @@ -572,7 +572,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -580,7 +580,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -588,7 +588,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -709,7 +709,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: addss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -720,7 +720,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: addss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -756,7 +756,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: addss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -765,7 +765,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: addss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -776,7 +776,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: addss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 @@ -784,7 +784,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: addss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -838,7 +838,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: addss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: addss %xmm5, %xmm0 @@ -847,7 +847,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: addss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 @@ -856,7 +856,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: addss %xmm2, %xmm0 @@ -865,7 +865,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: addss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: addss %xmm3, %xmm0 @@ -876,7 +876,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: addss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: addss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: addss %xmm5, %xmm0 @@ -884,7 +884,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: addss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: addss %xmm1, %xmm0 @@ -892,7 +892,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: addss %xmm2, %xmm0 @@ -900,7 +900,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: addss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: addss %xmm3, %xmm0 diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll index 2155081..a133a8c 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax-nnan.ll @@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: maxps %xmm2, %xmm0 ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: maxps %xmm2, %xmm0 ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) { define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: maxpd %xmm3, %xmm1 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll index 5f5e34c..c2a06cf 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmax.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmax.ll @@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: maxps %xmm2, %xmm0 ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: maxps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: maxps %xmm2, %xmm0 ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: maxps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: maxps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) { define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: maxpd %xmm2, %xmm0 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: maxpd %xmm3, %xmm1 ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: maxpd %xmm1, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll index 28dd742..f0711af 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin-nnan.ll @@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: minps %xmm2, %xmm0 ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: minps %xmm2, %xmm0 ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) { define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: minpd %xmm3, %xmm1 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll index 9ef289e..bd95bba 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmin.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmin.ll @@ -43,7 +43,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -53,7 +53,7 @@ define float @test_v4f32(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -83,7 +83,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -94,7 +94,7 @@ define float @test_v8f32(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -134,7 +134,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE2-NEXT: minps %xmm2, %xmm0 ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: minps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] @@ -147,7 +147,7 @@ define float @test_v16f32(<16 x float> %a0) { ; SSE41-NEXT: minps %xmm2, %xmm0 ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: minps %xmm1, %xmm0 ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: minps %xmm1, %xmm0 @@ -190,8 +190,8 @@ define float @test_v16f32(<16 x float> %a0) { define double @test_v2f64(<2 x double> %a0) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -215,7 +215,7 @@ define double @test_v4f64(<4 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -249,7 +249,7 @@ define double @test_v8f64(<8 x double> %a0) { ; SSE-NEXT: minpd %xmm2, %xmm0 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -290,7 +290,7 @@ define double @test_v16f64(<16 x double> %a0) { ; SSE-NEXT: minpd %xmm3, %xmm1 ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: minpd %xmm1, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll index f9dd20c..4c09356 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmul-fast.ll @@ -43,7 +43,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE2-LABEL: test_v4f32: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -53,7 +53,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE41-LABEL: test_v4f32: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -83,7 +83,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -94,7 +94,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -134,7 +134,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: mulps %xmm3, %xmm1 ; SSE2-NEXT: mulps %xmm2, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulps %xmm1, %xmm2 ; SSE2-NEXT: movaps %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3] @@ -147,7 +147,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: mulps %xmm3, %xmm1 ; SSE41-NEXT: mulps %xmm2, %xmm1 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulps %xmm1, %xmm2 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3] ; SSE41-NEXT: mulps %xmm2, %xmm0 @@ -221,7 +221,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32_zero: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -231,7 +231,7 @@ define float @test_v4f32_zero(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32_zero: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -262,7 +262,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -273,7 +273,7 @@ define float @test_v8f32_zero(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -314,7 +314,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE2-NEXT: mulps %xmm2, %xmm0 ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -327,7 +327,7 @@ define float @test_v16f32_zero(<16 x float> %a0) { ; SSE41-NEXT: mulps %xmm2, %xmm0 ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -402,7 +402,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE2-LABEL: test_v4f32_undef: ; SSE2: # %bb.0: ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -412,7 +412,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE41-LABEL: test_v4f32_undef: ; SSE41: # %bb.0: ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -443,7 +443,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE2: # %bb.0: ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -454,7 +454,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE41: # %bb.0: ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -495,7 +495,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: mulps %xmm2, %xmm0 ; SSE2-NEXT: mulps %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm0, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE2-NEXT: mulps %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3] @@ -508,7 +508,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: mulps %xmm2, %xmm0 ; SSE41-NEXT: mulps %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm0, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE41-NEXT: mulps %xmm0, %xmm1 ; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] ; SSE41-NEXT: mulps %xmm0, %xmm1 @@ -552,8 +552,8 @@ define float @test_v16f32_undef(<16 x float> %a0) { define double @test_v2f64(double %a0, <2 x double> %a1) { ; SSE-LABEL: test_v2f64: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: movapd %xmm1, %xmm0 +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -577,7 +577,7 @@ define double @test_v4f64(double %a0, <4 x double> %a1) { ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm2, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -611,7 +611,7 @@ define double @test_v8f64(double %a0, <8 x double> %a1) { ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm2, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -652,7 +652,7 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { ; SSE-NEXT: mulpd %xmm2, %xmm4 ; SSE-NEXT: mulpd %xmm1, %xmm4 ; SSE-NEXT: movapd %xmm4, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1] ; SSE-NEXT: mulpd %xmm4, %xmm0 ; SSE-NEXT: retq ; @@ -692,8 +692,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { define double @test_v2f64_zero(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_zero: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -718,7 +718,7 @@ define double @test_v4f64_zero(<4 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -753,7 +753,7 @@ define double @test_v8f64_zero(<8 x double> %a0) { ; SSE-NEXT: mulpd %xmm2, %xmm0 ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -795,7 +795,7 @@ define double @test_v16f64_zero(<16 x double> %a0) { ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; @@ -835,8 +835,8 @@ define double @test_v16f64_zero(<16 x double> %a0) { define double @test_v2f64_undef(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_undef: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -861,7 +861,7 @@ define double @test_v4f64_undef(<4 x double> %a0) { ; SSE: # %bb.0: ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -896,7 +896,7 @@ define double @test_v8f64_undef(<8 x double> %a0) { ; SSE-NEXT: mulpd %xmm2, %xmm0 ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: movapd %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -938,7 +938,7 @@ define double @test_v16f64_undef(<16 x double> %a0) { ; SSE-NEXT: mulpd %xmm3, %xmm1 ; SSE-NEXT: mulpd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 -; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1] +; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1] ; SSE-NEXT: mulpd %xmm1, %xmm0 ; SSE-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll index 411acd8..a4b255c 100644 --- a/llvm/test/CodeGen/X86/vector-reduce-fmul.ll +++ b/llvm/test/CodeGen/X86/vector-reduce-fmul.ll @@ -50,7 +50,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -62,7 +62,7 @@ define float @test_v4f32(float %a0, <4 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -101,7 +101,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE2-NEXT: mulss %xmm3, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -110,7 +110,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -122,7 +122,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1] ; SSE41-NEXT: mulss %xmm3, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -130,7 +130,7 @@ define float @test_v8f32(float %a0, <8 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -187,7 +187,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE2-NEXT: mulss %xmm5, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -196,7 +196,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -205,7 +205,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -214,7 +214,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm4, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 @@ -226,7 +226,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1] ; SSE41-NEXT: mulss %xmm5, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -234,7 +234,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -242,7 +242,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -250,7 +250,7 @@ define float @test_v16f32(float %a0, <16 x float> %a1) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm4, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 @@ -371,7 +371,7 @@ define float @test_v4f32_one(<4 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: mulss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -382,7 +382,7 @@ define float @test_v4f32_one(<4 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: mulss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -418,7 +418,7 @@ define float @test_v8f32_one(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: mulss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -427,7 +427,7 @@ define float @test_v8f32_one(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -438,7 +438,7 @@ define float @test_v8f32_one(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: mulss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -446,7 +446,7 @@ define float @test_v8f32_one(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -500,7 +500,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: mulss %xmm0, %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: mulss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 @@ -509,7 +509,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -518,7 +518,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -527,7 +527,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -538,7 +538,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss %xmm0, %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: mulss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 @@ -546,7 +546,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -554,7 +554,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -562,7 +562,7 @@ define float @test_v16f32_one(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -679,7 +679,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE2-NEXT: movaps %xmm0, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE2-NEXT: mulss %xmm1, %xmm2 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -690,7 +690,7 @@ define float @test_v4f32_undef(<4 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1 ; SSE41-NEXT: movaps %xmm0, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE41-NEXT: mulss %xmm1, %xmm2 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -726,7 +726,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE2-NEXT: movaps %xmm0, %xmm3 -; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE2-NEXT: mulss %xmm2, %xmm3 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -735,7 +735,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm2 -; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE2-NEXT: mulss %xmm2, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -746,7 +746,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2 ; SSE41-NEXT: movaps %xmm0, %xmm3 -; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1] ; SSE41-NEXT: mulss %xmm2, %xmm3 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -754,7 +754,7 @@ define float @test_v8f32_undef(<8 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm2 -; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1] ; SSE41-NEXT: mulss %xmm2, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -808,7 +808,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3] ; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE2-NEXT: movaps %xmm0, %xmm5 -; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE2-NEXT: mulss %xmm4, %xmm5 ; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE2-NEXT: mulss %xmm5, %xmm0 @@ -817,7 +817,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: movaps %xmm1, %xmm4 -; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE2-NEXT: mulss %xmm4, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 @@ -826,7 +826,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm2, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE2-NEXT: mulss %xmm2, %xmm0 @@ -835,7 +835,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: movaps %xmm3, %xmm1 -; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE2-NEXT: mulss %xmm1, %xmm0 ; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE2-NEXT: mulss %xmm3, %xmm0 @@ -846,7 +846,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] ; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4 ; SSE41-NEXT: movaps %xmm0, %xmm5 -; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1] ; SSE41-NEXT: mulss %xmm4, %xmm5 ; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3] ; SSE41-NEXT: mulss %xmm5, %xmm0 @@ -854,7 +854,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: movaps %xmm1, %xmm4 -; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1] ; SSE41-NEXT: mulss %xmm4, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 @@ -862,7 +862,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm2, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3] ; SSE41-NEXT: mulss %xmm2, %xmm0 @@ -870,7 +870,7 @@ define float @test_v16f32_undef(<16 x float> %a0) { ; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: movaps %xmm3, %xmm1 -; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1] +; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1] ; SSE41-NEXT: mulss %xmm1, %xmm0 ; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3] ; SSE41-NEXT: mulss %xmm3, %xmm0 @@ -1182,8 +1182,8 @@ define double @test_v16f64(double %a0, <16 x double> %a1) { define double @test_v2f64_one(<2 x double> %a0) { ; SSE-LABEL: test_v2f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm1 -; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; SSE-NEXT: movapd %xmm0, %xmm1 +; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm1 ; SSE-NEXT: movapd %xmm1, %xmm0 ; SSE-NEXT: retq @@ -1206,8 +1206,8 @@ define double @test_v2f64_one(<2 x double> %a0) { define double @test_v4f64_one(<4 x double> %a0) { ; SSE-LABEL: test_v4f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm2 -; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1] +; SSE-NEXT: movapd %xmm0, %xmm2 +; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm2 ; SSE-NEXT: mulsd %xmm1, %xmm2 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] @@ -1243,8 +1243,8 @@ define double @test_v4f64_one(<4 x double> %a0) { define double @test_v8f64_one(<8 x double> %a0) { ; SSE-LABEL: test_v8f64_one: ; SSE: # %bb.0: -; SSE-NEXT: movaps %xmm0, %xmm4 -; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1] +; SSE-NEXT: movapd %xmm0, %xmm4 +; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1] ; SSE-NEXT: mulsd %xmm0, %xmm4 ; SSE-NEXT: mulsd %xmm1, %xmm4 ; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1] diff --git a/llvm/test/CodeGen/X86/widen_conv-3.ll b/llvm/test/CodeGen/X86/widen_conv-3.ll index 57caab4..1eb76b2 100644 --- a/llvm/test/CodeGen/X86/widen_conv-3.ll +++ b/llvm/test/CodeGen/X86/widen_conv-3.ll @@ -74,7 +74,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) diff --git a/llvm/test/CodeGen/X86/widen_conv-4.ll b/llvm/test/CodeGen/X86/widen_conv-4.ll index c771643..3f8e203 100644 --- a/llvm/test/CodeGen/X86/widen_conv-4.ll +++ b/llvm/test/CodeGen/X86/widen_conv-4.ll @@ -19,7 +19,7 @@ define void @convert_v7i16_v7f32(<7 x float>* %dst.addr, <7 x i16> %src) nounwin ; X86-SSE2-NEXT: movups %xmm0, (%eax) ; X86-SSE2-NEXT: movss %xmm2, 16(%eax) ; X86-SSE2-NEXT: movaps %xmm2, %xmm0 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm2[1],xmm0[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1] ; X86-SSE2-NEXT: movss %xmm0, 24(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm2, 20(%eax) @@ -100,7 +100,7 @@ define void @convert_v3i8_to_v3f32(<3 x float>* %dst.addr, <3 x i8>* %src.addr) ; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0 ; X86-SSE2-NEXT: movss %xmm0, (%eax) ; X86-SSE2-NEXT: movaps %xmm0, %xmm1 -; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1] +; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1] ; X86-SSE2-NEXT: movss %xmm1, 8(%eax) ; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3] ; X86-SSE2-NEXT: movss %xmm0, 4(%eax) -- 2.7.4