; SSSE3_SLOW: # %bb.0:
; SSSE3_SLOW-NEXT: movapd %xmm0, %xmm1
; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSSE3_SLOW-NEXT: addpd %xmm0, %xmm1
+; SSSE3_SLOW-NEXT: addsd %xmm0, %xmm1
; SSSE3_SLOW-NEXT: movddup {{.*#+}} xmm0 = xmm1[0,0]
; SSSE3_SLOW-NEXT: retq
;
; AVX1_SLOW-LABEL: hadd_v2f64:
; AVX1_SLOW: # %bb.0:
; AVX1_SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1_SLOW-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX1_SLOW-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX1_SLOW-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1_SLOW-NEXT: retq
;
; AVX2_SLOW-LABEL: hadd_v2f64:
; AVX2_SLOW: # %bb.0:
; AVX2_SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX2_SLOW-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX2_SLOW-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX2_SLOW-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX2_SLOW-NEXT: retq
;
; SSSE3_SLOW: # %bb.0:
; SSSE3_SLOW-NEXT: movapd %xmm0, %xmm2
; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
-; SSSE3_SLOW-NEXT: movapd %xmm1, %xmm3
-; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
-; SSSE3_SLOW-NEXT: addpd %xmm1, %xmm3
-; SSSE3_SLOW-NEXT: addpd %xmm0, %xmm2
+; SSSE3_SLOW-NEXT: addsd %xmm0, %xmm2
; SSSE3_SLOW-NEXT: movddup {{.*#+}} xmm0 = xmm2[0,0]
-; SSSE3_SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm3[0,0]
+; SSSE3_SLOW-NEXT: movapd %xmm1, %xmm2
+; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; SSSE3_SLOW-NEXT: addsd %xmm1, %xmm2
+; SSSE3_SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm2[0,0]
; SSSE3_SLOW-NEXT: retq
;
; SSSE3_FAST-LABEL: hadd_v4f64:
; SSSE3_SLOW: # %bb.0:
; SSSE3_SLOW-NEXT: movapd %xmm0, %xmm1
; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm1 = xmm1[1],xmm0[1]
-; SSSE3_SLOW-NEXT: subpd %xmm1, %xmm0
+; SSSE3_SLOW-NEXT: subsd %xmm1, %xmm0
; SSSE3_SLOW-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
; SSSE3_SLOW-NEXT: retq
;
; AVX1_SLOW-LABEL: hsub_v2f64:
; AVX1_SLOW: # %bb.0:
; AVX1_SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX1_SLOW-NEXT: vsubpd %xmm1, %xmm0, %xmm0
+; AVX1_SLOW-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX1_SLOW-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX1_SLOW-NEXT: retq
;
; AVX2_SLOW-LABEL: hsub_v2f64:
; AVX2_SLOW: # %bb.0:
; AVX2_SLOW-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX2_SLOW-NEXT: vsubpd %xmm1, %xmm0, %xmm0
+; AVX2_SLOW-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX2_SLOW-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX2_SLOW-NEXT: retq
;
; SSSE3_SLOW: # %bb.0:
; SSSE3_SLOW-NEXT: movapd %xmm0, %xmm2
; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm0[1]
-; SSSE3_SLOW-NEXT: movapd %xmm1, %xmm3
-; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm3 = xmm3[1],xmm1[1]
-; SSSE3_SLOW-NEXT: subpd %xmm3, %xmm1
-; SSSE3_SLOW-NEXT: subpd %xmm2, %xmm0
+; SSSE3_SLOW-NEXT: subsd %xmm2, %xmm0
; SSSE3_SLOW-NEXT: movddup {{.*#+}} xmm0 = xmm0[0,0]
+; SSSE3_SLOW-NEXT: movapd %xmm1, %xmm2
+; SSSE3_SLOW-NEXT: unpckhpd {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; SSSE3_SLOW-NEXT: subsd %xmm2, %xmm1
; SSSE3_SLOW-NEXT: movddup {{.*#+}} xmm1 = xmm1[0,0]
; SSSE3_SLOW-NEXT: retq
;
define <2 x double> @fadd_splat_splat_v2f64(<2 x double> %vx, <2 x double> %vy) {
; SSE-LABEL: fadd_splat_splat_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: addsd %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE-NEXT: retq
;
; AVX-LABEL: fadd_splat_splat_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%splatx = shufflevector <2 x double> %vx, <2 x double> undef, <2 x i32> zeroinitializer
define <4 x double> @fsub_splat_splat_v4f64(double %x, double %y) {
; SSE-LABEL: fsub_splat_splat_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: subpd %xmm1, %xmm0
+; SSE-NEXT: subsd %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: fsub_splat_splat_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
define <4 x float> @fmul_splat_splat_v4f32(<4 x float> %vx, <4 x float> %vy) {
; SSE-LABEL: fmul_splat_splat_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: mulps %xmm1, %xmm0
+; SSE-NEXT: mulss %xmm1, %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX-LABEL: fmul_splat_splat_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: retq
%splatx = shufflevector <4 x float> %vx, <4 x float> undef, <4 x i32> zeroinitializer
define <8 x float> @fdiv_splat_splat_v8f32(<8 x float> %vx, <8 x float> %vy) {
; SSE-LABEL: fdiv_splat_splat_v8f32:
; SSE: # %bb.0:
-; SSE-NEXT: rcpps %xmm2, %xmm3
-; SSE-NEXT: mulps %xmm3, %xmm2
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; SSE-NEXT: subps %xmm2, %xmm1
-; SSE-NEXT: mulps %xmm3, %xmm1
-; SSE-NEXT: addps %xmm3, %xmm1
-; SSE-NEXT: mulps %xmm0, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: divss %xmm2, %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: fdiv_splat_splat_v8f32:
; AVX: # %bb.0:
-; AVX-NEXT: vrcpps %ymm1, %ymm2
-; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX-NEXT: vsubps %xmm1, %xmm3, %xmm1
-; AVX-NEXT: vmulps %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
; SSE-LABEL: fsub_const_op0_splat_v4f64:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: subpd %xmm0, %xmm1
+; SSE-NEXT: subsd %xmm0, %xmm1
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; AVX-LABEL: fsub_const_op0_splat_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
define <2 x double> @splat0_fadd_v2f64(<2 x double> %vx, <2 x double> %vy) {
; SSE-LABEL: splat0_fadd_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: addpd %xmm1, %xmm0
+; SSE-NEXT: addsd %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE-NEXT: retq
;
; AVX-LABEL: splat0_fadd_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%b = fadd <2 x double> %vx, %vy
define <4 x double> @splat0_fsub_v4f64(double %x, double %y) {
; SSE-LABEL: splat0_fsub_v4f64:
; SSE: # %bb.0:
-; SSE-NEXT: subpd %xmm1, %xmm0
+; SSE-NEXT: subsd %xmm1, %xmm0
; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE-NEXT: movapd %xmm0, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: splat0_fsub_v4f64:
; AVX: # %bb.0:
-; AVX-NEXT: vsubpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vsubsd %xmm1, %xmm0, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
define <4 x float> @splat0_fmul_v4f32(<4 x float> %vx, <4 x float> %vy) {
; SSE-LABEL: splat0_fmul_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: mulps %xmm1, %xmm0
+; SSE-NEXT: mulss %xmm1, %xmm0
; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX-LABEL: splat0_fmul_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: retq
%b = fmul fast <4 x float> %vx, %vy
define <8 x float> @splat0_fdiv_v8f32(<8 x float> %vx, <8 x float> %vy) {
; SSE-LABEL: splat0_fdiv_v8f32:
; SSE: # %bb.0:
-; SSE-NEXT: rcpps %xmm2, %xmm3
-; SSE-NEXT: mulps %xmm3, %xmm2
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; SSE-NEXT: subps %xmm2, %xmm1
-; SSE-NEXT: mulps %xmm3, %xmm1
-; SSE-NEXT: addps %xmm3, %xmm1
-; SSE-NEXT: mulps %xmm0, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: divss %xmm2, %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
+; SSE-NEXT: movaps %xmm0, %xmm1
; SSE-NEXT: retq
;
; AVX-LABEL: splat0_fdiv_v8f32:
; AVX: # %bb.0:
-; AVX-NEXT: vrcpps %ymm1, %ymm2
-; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX-NEXT: vsubps %xmm1, %xmm3, %xmm1
-; AVX-NEXT: vmulps %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vaddps %xmm1, %xmm2, %xmm1
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
define <2 x double> @splat0_fadd_const_op1_v2f64(<2 x double> %vx) {
; SSE-LABEL: splat0_fadd_const_op1_v2f64:
; SSE: # %bb.0:
-; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: addpd %xmm0, %xmm1
-; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
-; SSE-NEXT: movapd %xmm1, %xmm0
+; SSE-NEXT: addsd {{.*}}(%rip), %xmm0
+; SSE-NEXT: unpcklpd {{.*#+}} xmm0 = xmm0[0,0]
; SSE-NEXT: retq
;
; AVX-LABEL: splat0_fadd_const_op1_v2f64:
; AVX: # %bb.0:
-; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vaddsd {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: retq
%b = fadd <2 x double> %vx, <double 42.0, double 12.0>
; SSE-LABEL: splat0_fsub_const_op0_v4f64:
; SSE: # %bb.0:
; SSE-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero
-; SSE-NEXT: subpd %xmm0, %xmm1
+; SSE-NEXT: subsd %xmm0, %xmm1
; SSE-NEXT: unpcklpd {{.*#+}} xmm1 = xmm1[0,0]
; SSE-NEXT: movapd %xmm1, %xmm0
; SSE-NEXT: retq
; AVX-LABEL: splat0_fsub_const_op0_v4f64:
; AVX: # %bb.0:
; AVX-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
-; AVX-NEXT: vsubpd %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vsubsd %xmm0, %xmm1, %xmm0
; AVX-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
define <4 x float> @splat0_fmul_const_op1_v4f32(<4 x float> %vx) {
; SSE-LABEL: splat0_fmul_const_op1_v4f32:
; SSE: # %bb.0:
-; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; SSE-NEXT: mulps %xmm0, %xmm1
-; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
-; SSE-NEXT: movaps %xmm1, %xmm0
+; SSE-NEXT: mulss {{.*}}(%rip), %xmm0
+; SSE-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; SSE-NEXT: retq
;
; AVX-LABEL: splat0_fmul_const_op1_v4f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
+; AVX-NEXT: vmulss {{.*}}(%rip), %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: retq
%b = fmul fast <4 x float> %vx, <float 6.0, float -1.0, float 1.0, float 7.0>
;
; AVX-LABEL: splat0_fdiv_const_op1_v8f32:
; AVX: # %bb.0:
-; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX-NEXT: vrcpps %ymm1, %ymm1
-; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX-NEXT: vsubps %xmm1, %xmm2, %xmm2
-; AVX-NEXT: vmulps %xmm2, %xmm1, %xmm2
-; AVX-NEXT: vaddps %xmm2, %xmm1, %xmm1
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq
define <8 x float> @splat0_fdiv_const_op0_v8f32(<8 x float> %vx) {
; SSE-LABEL: splat0_fdiv_const_op0_v8f32:
; SSE: # %bb.0:
-; SSE-NEXT: rcpps %xmm0, %xmm2
-; SSE-NEXT: mulps %xmm2, %xmm0
-; SSE-NEXT: movaps {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; SSE-NEXT: subps %xmm0, %xmm1
-; SSE-NEXT: mulps %xmm2, %xmm1
-; SSE-NEXT: addps %xmm2, %xmm1
+; SSE-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; SSE-NEXT: divss %xmm0, %xmm1
; SSE-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0,0,0]
; SSE-NEXT: movaps %xmm1, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: splat0_fdiv_const_op0_v8f32:
; AVX: # %bb.0:
-; AVX-NEXT: vrcpps %ymm0, %ymm1
-; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
-; AVX-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
-; AVX-NEXT: vsubps %xmm0, %xmm2, %xmm0
-; AVX-NEXT: vmulps %xmm0, %xmm1, %xmm0
-; AVX-NEXT: vaddps %xmm0, %xmm1, %xmm0
+; AVX-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
+; AVX-NEXT: vdivss %xmm0, %xmm1, %xmm0
; AVX-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX-NEXT: retq