; AVX-FAST-LABEL: fadd_reduce_v8f32:
; AVX-FAST: # %bb.0:
; AVX-FAST-NEXT: vextractf128 $1, %ymm1, %xmm0
-; AVX-FAST-NEXT: vaddps %ymm0, %ymm1, %ymm0
+; AVX-FAST-NEXT: vaddps %xmm0, %xmm1, %xmm0
; AVX-FAST-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
-; AVX-FAST-NEXT: vaddps %ymm1, %ymm0, %ymm0
-; AVX-FAST-NEXT: vhaddps %ymm0, %ymm0, %ymm0
-; AVX-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-FAST-NEXT: vaddps %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT: vhaddps %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vzeroupper
; AVX-FAST-NEXT: retq
%r = call fast float @llvm.experimental.vector.reduce.fadd.f32.f32.v8f32(float %a0, <8 x float> %a1)
; AVX-FAST-LABEL: fadd_reduce_v4f64:
; AVX-FAST: # %bb.0:
; AVX-FAST-NEXT: vextractf128 $1, %ymm1, %xmm0
-; AVX-FAST-NEXT: vaddpd %ymm0, %ymm1, %ymm0
-; AVX-FAST-NEXT: vhaddpd %ymm0, %ymm0, %ymm0
-; AVX-FAST-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX-FAST-NEXT: vaddpd %xmm0, %xmm1, %xmm0
+; AVX-FAST-NEXT: vhaddpd %xmm0, %xmm0, %xmm0
; AVX-FAST-NEXT: vzeroupper
; AVX-FAST-NEXT: retq
%r = call fast double @llvm.experimental.vector.reduce.fadd.f64.f64.v4f64(double %a0, <4 x double> %a1)
; AVX-SLOW-NEXT: vzeroupper
; AVX-SLOW-NEXT: retq
;
-; AVX1-FAST-LABEL: partial_reduction_add_v8i32:
-; AVX1-FAST: # %bb.0:
-; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
-; AVX1-FAST-NEXT: vmovd %xmm0, %eax
-; AVX1-FAST-NEXT: vzeroupper
-; AVX1-FAST-NEXT: retq
-;
-; AVX2-FAST-LABEL: partial_reduction_add_v8i32:
-; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-FAST-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX2-FAST-NEXT: vphaddd %ymm0, %ymm0, %ymm0
-; AVX2-FAST-NEXT: vmovd %xmm0, %eax
-; AVX2-FAST-NEXT: vzeroupper
-; AVX2-FAST-NEXT: retq
-;
-; AVX512-FAST-LABEL: partial_reduction_add_v8i32:
-; AVX512-FAST: # %bb.0:
-; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-FAST-NEXT: vpaddd %ymm1, %ymm0, %ymm0
-; AVX512-FAST-NEXT: vphaddd %ymm0, %ymm0, %ymm0
-; AVX512-FAST-NEXT: vmovd %xmm0, %eax
-; AVX512-FAST-NEXT: vzeroupper
-; AVX512-FAST-NEXT: retq
+; AVX-FAST-LABEL: partial_reduction_add_v8i32:
+; AVX-FAST: # %bb.0:
+; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-FAST-NEXT: vpaddd %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT: vphaddd %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT: vmovd %xmm0, %eax
+; AVX-FAST-NEXT: vzeroupper
+; AVX-FAST-NEXT: retq
%x23 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x0213 = add <8 x i32> %x, %x23
%x13 = shufflevector <8 x i32> %x0213, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
; AVX-SLOW-NEXT: vzeroupper
; AVX-SLOW-NEXT: retq
;
-; AVX1-FAST-LABEL: partial_reduction_sub_v8i32:
-; AVX1-FAST: # %bb.0:
-; AVX1-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX1-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
-; AVX1-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
-; AVX1-FAST-NEXT: vmovd %xmm0, %eax
-; AVX1-FAST-NEXT: vzeroupper
-; AVX1-FAST-NEXT: retq
-;
-; AVX2-FAST-LABEL: partial_reduction_sub_v8i32:
-; AVX2-FAST: # %bb.0:
-; AVX2-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX2-FAST-NEXT: vpsubd %ymm1, %ymm0, %ymm0
-; AVX2-FAST-NEXT: vphsubd %ymm0, %ymm0, %ymm0
-; AVX2-FAST-NEXT: vmovd %xmm0, %eax
-; AVX2-FAST-NEXT: vzeroupper
-; AVX2-FAST-NEXT: retq
-;
-; AVX512-FAST-LABEL: partial_reduction_sub_v8i32:
-; AVX512-FAST: # %bb.0:
-; AVX512-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
-; AVX512-FAST-NEXT: vpsubd %ymm1, %ymm0, %ymm0
-; AVX512-FAST-NEXT: vphsubd %ymm0, %ymm0, %ymm0
-; AVX512-FAST-NEXT: vmovd %xmm0, %eax
-; AVX512-FAST-NEXT: vzeroupper
-; AVX512-FAST-NEXT: retq
+; AVX-FAST-LABEL: partial_reduction_sub_v8i32:
+; AVX-FAST: # %bb.0:
+; AVX-FAST-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1]
+; AVX-FAST-NEXT: vpsubd %xmm1, %xmm0, %xmm0
+; AVX-FAST-NEXT: vphsubd %xmm0, %xmm0, %xmm0
+; AVX-FAST-NEXT: vmovd %xmm0, %eax
+; AVX-FAST-NEXT: vzeroupper
+; AVX-FAST-NEXT: retq
%x23 = shufflevector <8 x i32> %x, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
%x0213 = sub <8 x i32> %x, %x23
%x13 = shufflevector <8 x i32> %x0213, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>