; TODO (v)movhps
-define void @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
+define <4 x float> @test_movhps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movhps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; GENERIC-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movhps:
; ATOM: # %bb.0:
+; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00]
+; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movhlps {{.*#+}} xmm2 = xmm2[1,1] sched: [1:1.00]
+; ATOM-NEXT: movlps %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movhps:
; SLM: # %bb.0:
; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SLM-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movhps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SANDY-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movhps:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:1.00]
+; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movhps:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; HASWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movhps:
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movhps:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movhps:
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movhps:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movhps:
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movhps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:1.00]
-; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:1.00]
+; SKX-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movhps:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [2:1.00]
+; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movhps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
-; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:0.50]
+; BTVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movhps:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [3:1.00]
+; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movhps:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm1 = xmm1[1,1] sched: [1:0.50]
-; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movhps:
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vpextrq $1, %xmm0, (%rdi) # sched: [5:3.00]
+; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to <2 x float>*
%2 = load <2 x float>, <2 x float> *%1, align 8
%5 = fadd <4 x float> %a0, %4
%6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 2, i32 3>
store <2 x float> %6, <2 x float>* %1
- ret void
+ ret <4 x float> %4
}
; TODO (v)movlhps
ret <4 x float> %2
}
-define void @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
+define <4 x float> @test_movlps(<4 x float> %a0, <4 x float> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movlps:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; GENERIC-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movlps:
; ATOM: # %bb.0:
+; ATOM-NEXT: movaps %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: addps %xmm0, %xmm1 # sched: [5:5.00]
-; ATOM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addps %xmm1, %xmm2 # sched: [5:5.00]
+; ATOM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movlps %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movlps:
; SLM: # %bb.0:
; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
-; SLM-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movlps:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movlps:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movlps:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movlps:
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; HASWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movlps:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movlps:
; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BROADWELL-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movlps:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movlps:
; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKYLAKE-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movlps:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addps %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addps %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlps:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movlps:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movlps:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovlps %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movlps:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addps %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movlps %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addps %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movlps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movlps:
; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
; ZNVER1-NEXT: vaddps %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vmovlps %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-NEXT: vmovaps %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to <2 x float>*
%2 = load <2 x float>, <2 x float> *%1, align 8
%5 = fadd <4 x float> %a0, %4
%6 = shufflevector <4 x float> %5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
store <2 x float> %6, <2 x float>* %1
- ret void
+ ret <4 x float> %4
}
define i32 @test_movmskps(<4 x float> %a0) {
ret i64 %7
}
-define void @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
+define <2 x double> @test_movhpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movhpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movhpd:
; ATOM: # %bb.0:
+; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00]
+; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movhpd %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movhpd:
; SLM: # %bb.0:
; SLM-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movhpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movhpd:
; SANDY-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movhpd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movhpd:
; HASWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movhpd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movhpd:
; BROADWELL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movhpd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movhpd:
; SKYLAKE-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movhpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movhpd:
; SKX-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movhpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movhpd:
; BTVER2-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovhpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movhpd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movhpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movhpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movhpd:
; ZNVER1-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] sched: [8:0.50]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vmovhpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to double*
%2 = load double, double *%1, align 8
%4 = fadd <2 x double> %a0, %3
%5 = extractelement <2 x double> %4, i32 1
store double %5, double* %1
- ret void
+ ret <2 x double> %3
}
-define void @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
+define <2 x double> @test_movlpd(<2 x double> %a0, <2 x double> %a1, x86_mmx *%a2) {
; GENERIC-LABEL: test_movlpd:
; GENERIC: # %bb.0:
; GENERIC-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; GENERIC-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; GENERIC-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; GENERIC-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; GENERIC-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; GENERIC-NEXT: retq # sched: [1:1.00]
;
; ATOM-LABEL: test_movlpd:
; ATOM: # %bb.0:
+; ATOM-NEXT: movapd %xmm0, %xmm2 # sched: [1:0.50]
; ATOM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [1:1.00]
-; ATOM-NEXT: addpd %xmm0, %xmm1 # sched: [6:3.00]
-; ATOM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; ATOM-NEXT: addpd %xmm1, %xmm2 # sched: [6:3.00]
+; ATOM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
+; ATOM-NEXT: movlpd %xmm2, (%rdi) # sched: [1:1.00]
; ATOM-NEXT: retq # sched: [79:39.50]
;
; SLM-LABEL: test_movlpd:
; SLM: # %bb.0:
; SLM-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [4:1.00]
-; SLM-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SLM-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SLM-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SLM-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; SLM-NEXT: retq # sched: [4:1.00]
;
; SANDY-SSE-LABEL: test_movlpd:
; SANDY-SSE: # %bb.0:
; SANDY-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
-; SANDY-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; SANDY-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; SANDY-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-SSE-NEXT: retq # sched: [1:1.00]
;
; SANDY-LABEL: test_movlpd:
; SANDY-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [7:1.00]
; SANDY-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; SANDY-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SANDY-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; SANDY-NEXT: retq # sched: [1:1.00]
;
; HASWELL-SSE-LABEL: test_movlpd:
; HASWELL-SSE: # %bb.0:
; HASWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; HASWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; HASWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; HASWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; HASWELL-LABEL: test_movlpd:
; HASWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; HASWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; HASWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; HASWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; HASWELL-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-SSE-LABEL: test_movlpd:
; BROADWELL-SSE: # %bb.0:
; BROADWELL-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BROADWELL-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BROADWELL-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BROADWELL-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-SSE-NEXT: retq # sched: [7:1.00]
;
; BROADWELL-LABEL: test_movlpd:
; BROADWELL-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BROADWELL-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BROADWELL-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; BROADWELL-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:1.00]
; BROADWELL-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-SSE-LABEL: test_movlpd:
; SKYLAKE-SSE: # %bb.0:
; SKYLAKE-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKYLAKE-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKYLAKE-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKYLAKE-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-SSE-NEXT: retq # sched: [7:1.00]
;
; SKYLAKE-LABEL: test_movlpd:
; SKYLAKE-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKYLAKE-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKYLAKE-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKYLAKE-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKYLAKE-NEXT: retq # sched: [7:1.00]
;
; SKX-SSE-LABEL: test_movlpd:
; SKX-SSE: # %bb.0:
; SKX-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; SKX-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [4:0.50]
-; SKX-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [4:0.50]
+; SKX-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-SSE-NEXT: retq # sched: [7:1.00]
;
; SKX-LABEL: test_movlpd:
; SKX-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; SKX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [4:0.50]
; SKX-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:1.00]
+; SKX-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.33]
; SKX-NEXT: retq # sched: [7:1.00]
;
; BTVER2-SSE-LABEL: test_movlpd:
; BTVER2-SSE: # %bb.0:
; BTVER2-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
-; BTVER2-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; BTVER2-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; BTVER2-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-SSE-NEXT: retq # sched: [4:1.00]
;
; BTVER2-LABEL: test_movlpd:
; BTVER2-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [6:1.00]
; BTVER2-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; BTVER2-NEXT: vmovlpd %xmm0, (%rdi) # sched: [2:1.00]
+; BTVER2-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.50]
; BTVER2-NEXT: retq # sched: [4:1.00]
;
; ZNVER1-SSE-LABEL: test_movlpd:
; ZNVER1-SSE: # %bb.0:
; ZNVER1-SSE-NEXT: movlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
-; ZNVER1-SSE-NEXT: addpd %xmm0, %xmm1 # sched: [3:1.00]
-; ZNVER1-SSE-NEXT: movlpd %xmm1, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: addpd %xmm1, %xmm0 # sched: [3:1.00]
+; ZNVER1-SSE-NEXT: movlpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-SSE-NEXT: movapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-SSE-NEXT: retq # sched: [1:0.50]
;
; ZNVER1-LABEL: test_movlpd:
; ZNVER1-NEXT: vmovlpd {{.*#+}} xmm1 = mem[0],xmm1[1] sched: [8:0.50]
; ZNVER1-NEXT: vaddpd %xmm1, %xmm0, %xmm0 # sched: [3:1.00]
; ZNVER1-NEXT: vmovlpd %xmm0, (%rdi) # sched: [1:0.50]
+; ZNVER1-NEXT: vmovapd %xmm1, %xmm0 # sched: [1:0.25]
; ZNVER1-NEXT: retq # sched: [1:0.50]
%1 = bitcast x86_mmx* %a2 to double*
%2 = load double, double *%1, align 8
%4 = fadd <2 x double> %a0, %3
%5 = extractelement <2 x double> %4, i32 0
store double %5, double* %1
- ret void
+ ret <2 x double> %3
}
define i32 @test_movmskpd(<2 x double> %a0) {