In r337348, I changed lowering to prefer X86ISD::UNPCKL/UNPCKH opcodes over MOVLHPS/MOVHLPS for v2f64 {0,0} and {1,1} shuffles when we have SSE2. This enabled the removal of a bunch of weirdly bitcasted isel patterns in r337349. To avoid changing the tests I placed a gross hack in isel to still emit movhlps instructions for fake unary unpckh nodes. A similar hack was not needed for unpckl and movlhps because we do execution domain switching for those. But unpckh and movhlps have swapped operand order.
This patch removes the hack.
This is a code size increase since unpckhpd requires a 0x66 prefix and movhlps does not. But if that's a big concern we should be using movhlps for all unpckhpd opcodes and let commuteInstruction turnit into unpckhpd when its an advantage.
Differential Revision: https://reviews.llvm.org/D49499
llvm-svn: 341973
Sched<[SchedWriteFShuffle.XMM]>, NotMemoryFoldable;
}
-// TODO: This is largely to trick fastisel into ignoring the pattern.
-def UnpckhUnary : PatFrag<(ops node:$src1, node:$src2),
- (X86Unpckh node:$src1, node:$src2), [{
- return N->getOperand(0) == N->getOperand(1);
-}]>;
-
-let Predicates = [UseSSE2] in {
- // TODO: This is a hack pattern to allow lowering to emit unpckh instead of
- // movhlps for sse2 without changing a bunch of tests.
- def : Pat<(v2f64 (UnpckhUnary VR128:$src, VR128:$src)),
- (MOVHLPSrr VR128:$src, VR128:$src)>;
-}
-
//===----------------------------------------------------------------------===//
// SSE 1 & 2 - Conversion Instructions
//===----------------------------------------------------------------------===//
; SSE2-LABEL: test_negative_zero_1:
; SSE2: # %bb.0: # %entry
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: xorps %xmm2, %xmm2
; SSE2-NEXT: movss {{.*#+}} xmm2 = xmm1[0],xmm2[1,2,3]
; SSE2-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; SSE-NEXT: cvtss2sd %xmm2, %xmm4
; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm2[1,1,3,3]
; SSE-NEXT: movaps %xmm2, %xmm6
-; SSE-NEXT: movhlps {{.*#+}} xmm6 = xmm2[1],xmm6[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm6 = xmm6[1],xmm2[1]
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm2[2,3]
; SSE-NEXT: movaps {{.*#+}} xmm7
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: orps %xmm0, %xmm4
; SSE-NEXT: movlhps {{.*#+}} xmm2 = xmm2[0],xmm4[0]
; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: andps %xmm7, %xmm0
; SSE-NEXT: cvtss2sd %xmm3, %xmm3
; SSE-NEXT: andps %xmm8, %xmm3
; SSE-NEXT: orps %xmm6, %xmm1
; SSE-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
; SSE-NEXT: movaps %xmm3, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE-NEXT: andps %xmm5, %xmm1
; SSE-NEXT: xorps %xmm6, %xmm6
; SSE-NEXT: cvtsd2ss %xmm2, %xmm6
define <2 x double> @complex_square_f64(<2 x double>) #0 {
; SSE-LABEL: complex_square_f64:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
-; SSE-NEXT: movaps %xmm0, %xmm2
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE-NEXT: movapd %xmm0, %xmm2
; SSE-NEXT: addsd %xmm0, %xmm2
; SSE-NEXT: mulsd %xmm1, %xmm2
; SSE-NEXT: mulsd %xmm0, %xmm0
define <2 x double> @complex_mul_f64(<2 x double>, <2 x double>) #0 {
; SSE-LABEL: complex_mul_f64:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
-; SSE-NEXT: movaps %xmm1, %xmm3
-; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
-; SSE-NEXT: movaps %xmm3, %xmm4
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
+; SSE-NEXT: movapd %xmm1, %xmm3
+; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
+; SSE-NEXT: movapd %xmm3, %xmm4
; SSE-NEXT: mulsd %xmm0, %xmm4
; SSE-NEXT: mulsd %xmm1, %xmm0
; SSE-NEXT: mulsd %xmm2, %xmm1
; SSE2-NEXT: cvttss2si %xmm1, %rax
; SSE2-NEXT: movd %eax, %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: cvttss2si %xmm2, %rax
; SSE2-NEXT: movd %eax, %xmm2
; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
define <2 x double> @trunc_unsigned_v2f64(<2 x double> %x) #0 {
; SSE2-LABEL: trunc_unsigned_v2f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: movapd %xmm0, %xmm1
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE2-NEXT: movaps %xmm1, %xmm3
+; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: subsd %xmm2, %xmm3
; SSE2-NEXT: cvttsd2si %xmm3, %rax
; SSE2-NEXT: movabsq $-9223372036854775808, %rcx # imm = 0x8000000000000000
; SSE2-NEXT: cvttsd2si %xmm1, %rdx
; SSE2-NEXT: ucomisd %xmm2, %xmm1
; SSE2-NEXT: cmovaeq %rax, %rdx
-; SSE2-NEXT: movaps %xmm0, %xmm1
+; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: subsd %xmm2, %xmm1
; SSE2-NEXT: cvttsd2si %xmm1, %rax
; SSE2-NEXT: xorq %rcx, %rax
define <4 x double> @trunc_unsigned_v4f64(<4 x double> %x) #0 {
; SSE2-LABEL: trunc_unsigned_v4f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE2-NEXT: movapd %xmm1, %xmm3
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE2-NEXT: movsd {{.*#+}} xmm2 = mem[0],zero
-; SSE2-NEXT: movaps %xmm3, %xmm4
+; SSE2-NEXT: movapd %xmm3, %xmm4
; SSE2-NEXT: subsd %xmm2, %xmm4
; SSE2-NEXT: cvttsd2si %xmm4, %rcx
; SSE2-NEXT: movabsq $-9223372036854775808, %rdx # imm = 0x8000000000000000
; SSE2-NEXT: cvttsd2si %xmm3, %rax
; SSE2-NEXT: ucomisd %xmm2, %xmm3
; SSE2-NEXT: cmovaeq %rcx, %rax
-; SSE2-NEXT: movaps %xmm1, %xmm3
+; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: subsd %xmm2, %xmm3
; SSE2-NEXT: cvttsd2si %xmm3, %rsi
; SSE2-NEXT: xorq %rdx, %rsi
; SSE2-NEXT: cvttsd2si %xmm1, %rcx
; SSE2-NEXT: ucomisd %xmm2, %xmm1
; SSE2-NEXT: cmovaeq %rsi, %rcx
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
-; SSE2-NEXT: movaps %xmm1, %xmm3
+; SSE2-NEXT: movapd %xmm0, %xmm1
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm3
; SSE2-NEXT: subsd %xmm2, %xmm3
; SSE2-NEXT: cvttsd2si %xmm3, %rsi
; SSE2-NEXT: xorq %rdx, %rsi
; SSE2-NEXT: cvttsd2si %xmm1, %rdi
; SSE2-NEXT: ucomisd %xmm2, %xmm1
; SSE2-NEXT: cmovaeq %rsi, %rdi
-; SSE2-NEXT: movaps %xmm0, %xmm1
+; SSE2-NEXT: movapd %xmm0, %xmm1
; SSE2-NEXT: subsd %xmm2, %xmm1
; SSE2-NEXT: cvttsd2si %xmm1, %rsi
; SSE2-NEXT: xorq %rdx, %rsi
; SSE-LABEL: not_a_hsub_2:
; SSE: # %bb.0:
; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: movaps %xmm0, %xmm3
; SSE-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1],xmm0[2,3]
; SSE-NEXT: subss %xmm3, %xmm2
; SSE-NEXT: movaps %xmm1, %xmm2
; SSE-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1],xmm1[2,3]
; SSE-NEXT: movaps %xmm1, %xmm3
-; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE-NEXT: subss %xmm3, %xmm2
; SSE-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE-NEXT: subss %xmm3, %xmm1
define <2 x double> @not_a_hsub_3(<2 x double> %A, <2 x double> %B) {
; SSE-LABEL: not_a_hsub_3:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm1, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE-NEXT: movapd %xmm1, %xmm2
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE-NEXT: subsd %xmm2, %xmm1
-; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: subsd %xmm0, %xmm2
; SSE-NEXT: unpcklpd {{.*#+}} xmm2 = xmm2[0],xmm1[0]
; SSE-NEXT: movapd %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[3,3]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSSE3-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSSE3-NEXT: addps %xmm0, %xmm1
; SSSE3-NEXT: movaps %xmm1, %xmm0
-; SSSE3-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSSE3-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSSE3-NEXT: addss %xmm1, %xmm0
; SSSE3-NEXT: retq
;
define <2 x double> @test5_undef(<2 x double> %a, <2 x double> %b) {
; SSE-LABEL: test5_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: addsd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; SSE-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE-NEXT: addss %xmm0, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE-NEXT: addss %xmm2, %xmm0
; SSE-NEXT: movlhps {{.*#+}} xmm1 = xmm1[0],xmm0[0]
; SSE-NEXT: cvtps2pd %xmm0, %xmm0
; SSE-NEXT: movlps %xmm0, -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm2, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE-NEXT: fldl -{{[0-9]+}}(%rsp)
; SSE-NEXT: movaps %xmm2, %xmm0
; SSE-NEXT: retq
; SSE-NEXT: movaps %xmm0, %xmm2
; SSE-NEXT: subss %xmm3, %xmm2
; SSE-NEXT: movaps %xmm0, %xmm4
-; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE-NEXT: movaps %xmm1, %xmm5
-; SSE-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE-NEXT: subss %xmm5, %xmm4
; SSE-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
; SSE-NEXT: addss %xmm3, %xmm5
; SSE-NEXT: cvttss2si %xmm1, %rax
; SSE-NEXT: movd %eax, %xmm1
; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
; SSE-NEXT: cvttss2si %xmm0, %rax
; SSE-NEXT: movd %eax, %xmm0
; SSE-NEXT: movaps %xmm2, %xmm3
-; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm2[1],xmm3[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm2[1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1]
; SSE-NEXT: cvttss2si %xmm2, %rax
; SSE-NEXT: movd %eax, %xmm2
; SSE-NEXT: movaps %xmm1, %xmm3
-; SSE-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE-NEXT: cvttss2si %xmm3, %rax
; SSE-NEXT: movd %eax, %xmm3
; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1]
; NO-FMA-NEXT: divsd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
-; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
; NO-FMA-NEXT: mulsd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
-; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
; NO-FMA-NEXT: addsd {{.*}}(%rip), %xmm1
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
-; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
; NO-FMA-NEXT: subpd {{.*}}(%rip), %xmm0
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
-; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
; NO-FMA-NEXT: sqrtpd {{.*}}(%rip), %xmm0
; NO-FMA-NEXT: movsd %xmm1, -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: movapd %xmm0, %xmm1
-; NO-FMA-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; NO-FMA-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; NO-FMA-NEXT: fldl -{{[0-9]+}}(%rsp)
; NO-FMA-NEXT: retq
;
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm1, %xmm0
-; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: haddps %xmm0, %xmm0
; SSE41-NEXT: retq
; SSE2: # %bb.0:
; SSE2-NEXT: addps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: addps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
-; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: haddps %xmm0, %xmm0
; SSE41-NEXT: retq
; SSE2-NEXT: addps %xmm3, %xmm1
; SSE2-NEXT: addps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
; SSE41-NEXT: addps %xmm3, %xmm1
; SSE41-NEXT: addps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
-; SSE41-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: haddps %xmm0, %xmm0
; SSE41-NEXT: retq
; SSE2-LABEL: test_v4f32_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-LABEL: test_v4f32_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE2: # %bb.0:
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: addps %xmm2, %xmm0
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-NEXT: addps %xmm2, %xmm0
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE2-LABEL: test_v4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-LABEL: test_v4f32_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE2: # %bb.0:
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: addps %xmm2, %xmm0
; SSE2-NEXT: addps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-NEXT: addps %xmm2, %xmm0
; SSE41-NEXT: addps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addps %xmm0, %xmm1
; SSE41-NEXT: haddps %xmm1, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm0
define double @test_v2f64(double %a0, <2 x double> %a1) {
; SSE2-LABEL: test_v2f64:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm1, %xmm0
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE2-NEXT: movapd %xmm1, %xmm0
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE2: # %bb.0:
; SSE2-NEXT: addpd %xmm2, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE2-NEXT: addpd %xmm3, %xmm1
; SSE2-NEXT: addpd %xmm2, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE2-NEXT: addpd %xmm2, %xmm4
; SSE2-NEXT: addpd %xmm1, %xmm4
; SSE2-NEXT: movapd %xmm4, %xmm0
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1]
; SSE2-NEXT: addpd %xmm4, %xmm0
; SSE2-NEXT: retq
;
define double @test_v2f64_zero(<2 x double> %a0) {
; SSE2-LABEL: test_v2f64_zero:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: movapd %xmm0, %xmm1
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE2: # %bb.0:
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE2-NEXT: addpd %xmm2, %xmm0
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE2-NEXT: addpd %xmm3, %xmm1
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
define double @test_v2f64_undef(<2 x double> %a0) {
; SSE2-LABEL: test_v2f64_undef:
; SSE2: # %bb.0:
-; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: movapd %xmm0, %xmm1
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE2: # %bb.0:
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE2-NEXT: addpd %xmm2, %xmm0
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: movapd %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
; SSE2-NEXT: retq
; SSE2-NEXT: addpd %xmm3, %xmm1
; SSE2-NEXT: addpd %xmm0, %xmm1
; SSE2-NEXT: movapd %xmm1, %xmm0
-; SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE2-NEXT: addpd %xmm1, %xmm0
; SSE2-NEXT: retq
;
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm5, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE2-NEXT: addss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm4, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm5, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE41-NEXT: addss %xmm5, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm4, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: addss %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: addss %xmm2, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: addss %xmm2, %xmm1
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm0[2,3]
; SSE2-NEXT: addss %xmm2, %xmm3
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: addss %xmm3, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm0[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm3
; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: addss %xmm3, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm0[2,3]
; SSE2-NEXT: addss %xmm4, %xmm5
; SSE2-NEXT: movaps %xmm0, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE2-NEXT: addss %xmm5, %xmm4
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm0[1,1,3,3]
; SSE41-NEXT: addss %xmm4, %xmm5
; SSE41-NEXT: movaps %xmm0, %xmm4
-; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE41-NEXT: addss %xmm5, %xmm4
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
-; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: addss {{.*}}(%rip), %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: addss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: addss {{.*}}(%rip), %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: addss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: addss {{.*}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: addss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: addss {{.*}}(%rip), %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: addss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: addss {{.*}}(%rip), %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: addss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: addss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: addss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: addss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: addss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: addss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: addss {{.*}}(%rip), %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: addss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: addss %xmm5, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
-; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: addss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: addss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: addss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: addss %xmm3, %xmm0
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: maxps %xmm2, %xmm0
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-NEXT: maxps %xmm2, %xmm0
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE: # %bb.0:
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: maxpd %xmm2, %xmm0
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: maxpd %xmm3, %xmm1
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE2: # %bb.0:
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: maxps %xmm2, %xmm0
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: maxps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-NEXT: maxps %xmm2, %xmm0
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: maxps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: maxps %xmm1, %xmm0
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE: # %bb.0:
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: maxpd %xmm2, %xmm0
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: maxpd %xmm3, %xmm1
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: maxpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE2: # %bb.0:
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: minps %xmm2, %xmm0
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-NEXT: minps %xmm2, %xmm0
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE: # %bb.0:
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: minpd %xmm2, %xmm0
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: minpd %xmm3, %xmm1
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE2: # %bb.0:
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: minps %xmm2, %xmm0
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: minps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE41-NEXT: minps %xmm2, %xmm0
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: minps %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: minps %xmm1, %xmm0
define double @test_v2f64(<2 x double> %a0) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE: # %bb.0:
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: minpd %xmm2, %xmm0
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: minpd %xmm3, %xmm1
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: minpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE2-LABEL: test_v4f32:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
; SSE41-LABEL: test_v4f32:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulps %xmm1, %xmm2
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
; SSE2: # %bb.0:
; SSE2-NEXT: mulps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: mulps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulps %xmm1, %xmm2
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
; SSE2-NEXT: mulps %xmm3, %xmm1
; SSE2-NEXT: mulps %xmm2, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulps %xmm1, %xmm2
; SSE2-NEXT: movaps %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm2[2,3]
; SSE41-NEXT: mulps %xmm3, %xmm1
; SSE41-NEXT: mulps %xmm2, %xmm1
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulps %xmm1, %xmm2
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm2[1,1,3,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
; SSE2-LABEL: test_v4f32_zero:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-LABEL: test_v4f32_zero:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE2: # %bb.0:
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: mulps %xmm2, %xmm0
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE2-LABEL: test_v4f32_undef:
; SSE2: # %bb.0:
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-LABEL: test_v4f32_undef:
; SSE41: # %bb.0:
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE2: # %bb.0:
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41: # %bb.0:
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: mulps %xmm2, %xmm0
; SSE2-NEXT: mulps %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm0, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE2-NEXT: mulps %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[2,3]
; SSE41-NEXT: mulps %xmm2, %xmm0
; SSE41-NEXT: mulps %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm0, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE41-NEXT: mulps %xmm0, %xmm1
; SSE41-NEXT: movshdup {{.*#+}} xmm0 = xmm1[1,1,3,3]
; SSE41-NEXT: mulps %xmm0, %xmm1
define double @test_v2f64(double %a0, <2 x double> %a1) {
; SSE-LABEL: test_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm1, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE: # %bb.0:
; SSE-NEXT: mulpd %xmm2, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd %xmm2, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE-NEXT: mulpd %xmm2, %xmm4
; SSE-NEXT: mulpd %xmm1, %xmm4
; SSE-NEXT: movapd %xmm4, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm4[1],xmm0[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm4[1]
; SSE-NEXT: mulpd %xmm4, %xmm0
; SSE-NEXT: retq
;
define double @test_v2f64_zero(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_zero:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; SSE: # %bb.0:
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; SSE-NEXT: mulpd %xmm2, %xmm0
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
define double @test_v2f64_undef(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_undef:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; SSE: # %bb.0:
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; SSE-NEXT: mulpd %xmm2, %xmm0
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: movapd %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; SSE-NEXT: mulpd %xmm3, %xmm1
; SSE-NEXT: mulpd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
-; SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm1[1],xmm0[1]
+; SSE-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
; SSE-NEXT: mulpd %xmm1, %xmm0
; SSE-NEXT: retq
;
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm3 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm1[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm5 = xmm5[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm4[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm4, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm5 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm1[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm1[1]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm4[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm4, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm4[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm4[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm4 = xmm4[3,1,2,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: mulss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: mulss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: mulss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: mulss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: mulss %xmm0, %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: mulss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss %xmm0, %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: mulss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
-; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE2-NEXT: movaps %xmm0, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE2-NEXT: mulss %xmm1, %xmm2
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm1
; SSE41-NEXT: movaps %xmm0, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE41-NEXT: mulss %xmm1, %xmm2
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE2-NEXT: movaps %xmm0, %xmm3
-; SSE2-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE2-NEXT: mulss %xmm2, %xmm3
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm2
-; SSE2-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm2
; SSE41-NEXT: movaps %xmm0, %xmm3
-; SSE41-NEXT: movhlps {{.*#+}} xmm3 = xmm0[1],xmm3[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm0[1]
; SSE41-NEXT: mulss %xmm2, %xmm3
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm2 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm2
-; SSE41-NEXT: movhlps {{.*#+}} xmm2 = xmm1[1],xmm2[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm0[2,3]
; SSE2-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE2-NEXT: movaps %xmm0, %xmm5
-; SSE2-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE2-NEXT: mulss %xmm4, %xmm5
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE2-NEXT: mulss %xmm5, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm4 = xmm4[1,1],xmm1[2,3]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: movaps %xmm1, %xmm4
-; SSE2-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE2-NEXT: mulss %xmm4, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm2[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm2, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE2-NEXT: mulss %xmm2, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm3[2,3]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: movaps %xmm3, %xmm1
-; SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE2-NEXT: mulss %xmm1, %xmm0
; SSE2-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE2-NEXT: mulss %xmm3, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm0[1,1,3,3]
; SSE41-NEXT: mulss {{.*}}(%rip), %xmm4
; SSE41-NEXT: movaps %xmm0, %xmm5
-; SSE41-NEXT: movhlps {{.*#+}} xmm5 = xmm0[1],xmm5[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm5 = xmm5[1],xmm0[1]
; SSE41-NEXT: mulss %xmm4, %xmm5
; SSE41-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,1,2,3]
; SSE41-NEXT: mulss %xmm5, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm4 = xmm1[1,1,3,3]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: movaps %xmm1, %xmm4
-; SSE41-NEXT: movhlps {{.*#+}} xmm4 = xmm1[1],xmm4[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm1[1]
; SSE41-NEXT: mulss %xmm4, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm1 = xmm1[3,1,2,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm2[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm2, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm2[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm2[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm2 = xmm2[3,1,2,3]
; SSE41-NEXT: mulss %xmm2, %xmm0
; SSE41-NEXT: movshdup {{.*#+}} xmm1 = xmm3[1,1,3,3]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: movaps %xmm3, %xmm1
-; SSE41-NEXT: movhlps {{.*#+}} xmm1 = xmm3[1],xmm1[1]
+; SSE41-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm3[1]
; SSE41-NEXT: mulss %xmm1, %xmm0
; SSE41-NEXT: shufps {{.*#+}} xmm3 = xmm3[3,1,2,3]
; SSE41-NEXT: mulss %xmm3, %xmm0
define double @test_v2f64_one(<2 x double> %a0) {
; SSE-LABEL: test_v2f64_one:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm1
-; SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; SSE-NEXT: movapd %xmm0, %xmm1
+; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm1
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
define double @test_v4f64_one(<4 x double> %a0) {
; SSE-LABEL: test_v4f64_one:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm2
-; SSE-NEXT: movhlps {{.*#+}} xmm2 = xmm0[1],xmm2[1]
+; SSE-NEXT: movapd %xmm0, %xmm2
+; SSE-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm2
; SSE-NEXT: mulsd %xmm1, %xmm2
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
define double @test_v8f64_one(<8 x double> %a0) {
; SSE-LABEL: test_v8f64_one:
; SSE: # %bb.0:
-; SSE-NEXT: movaps %xmm0, %xmm4
-; SSE-NEXT: movhlps {{.*#+}} xmm4 = xmm0[1],xmm4[1]
+; SSE-NEXT: movapd %xmm0, %xmm4
+; SSE-NEXT: unpckhpd {{.*#+}} xmm4 = xmm4[1],xmm0[1]
; SSE-NEXT: mulsd %xmm0, %xmm4
; SSE-NEXT: mulsd %xmm1, %xmm4
; SSE-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1,1]
; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-SSE2-NEXT: movss %xmm0, (%eax)
; X86-SSE2-NEXT: movaps %xmm0, %xmm1
-; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; X86-SSE2-NEXT: movss %xmm1, 8(%eax)
; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-SSE2-NEXT: movss %xmm0, 4(%eax)
; X86-SSE2-NEXT: movups %xmm0, (%eax)
; X86-SSE2-NEXT: movss %xmm2, 16(%eax)
; X86-SSE2-NEXT: movaps %xmm2, %xmm0
-; X86-SSE2-NEXT: movhlps {{.*#+}} xmm0 = xmm2[1],xmm0[1]
+; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm0 = xmm0[1],xmm2[1]
; X86-SSE2-NEXT: movss %xmm0, 24(%eax)
; X86-SSE2-NEXT: shufps {{.*#+}} xmm2 = xmm2[1,1,2,3]
; X86-SSE2-NEXT: movss %xmm2, 20(%eax)
; X86-SSE2-NEXT: cvtdq2ps %xmm0, %xmm0
; X86-SSE2-NEXT: movss %xmm0, (%eax)
; X86-SSE2-NEXT: movaps %xmm0, %xmm1
-; X86-SSE2-NEXT: movhlps {{.*#+}} xmm1 = xmm0[1],xmm1[1]
+; X86-SSE2-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
; X86-SSE2-NEXT: movss %xmm1, 8(%eax)
; X86-SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,2,3]
; X86-SSE2-NEXT: movss %xmm0, 4(%eax)