; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict") #0
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f64(<2 x double> %a,
metadata !"fpexcept.strict") #0
; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-32-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-64-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptosi.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict") #0
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VL: # %bb.0:
; AVX512VL-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VL-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VL-NEXT: ret{{[l|q]}}
;
; AVX512DQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i16:
; AVX512VLDQ: # %bb.0:
; AVX512VLDQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512VLDQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512VLDQ-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512VLDQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX512VLDQ-NEXT: ret{{[l|q]}}
%ret = call <2 x i16> @llvm.experimental.constrained.fptoui.v2i16.v2f32(<2 x float> %a,
metadata !"fpexcept.strict") #0
; SSE-32-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
+; SSE-32-NEXT: packsswb %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
+; SSE-64-NEXT: packsswb %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512DQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f64_to_v2i8:
; SSE-32-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-32: # %bb.0:
; SSE-32-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-32-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl
; SSE-64-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-64: # %bb.0:
; SSE-64-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-64-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq
; AVX-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512DQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f64_to_v2i8:
; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
-; SSE-32-NEXT: packuswb %xmm0, %xmm0
+; SSE-32-NEXT: packssdw %xmm0, %xmm0
+; SSE-32-NEXT: packsswb %xmm0, %xmm0
; SSE-32-NEXT: retl
;
; SSE-64-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
-; SSE-64-NEXT: packuswb %xmm0, %xmm0
+; SSE-64-NEXT: packssdw %xmm0, %xmm0
+; SSE-64-NEXT: packsswb %xmm0, %xmm0
; SSE-64-NEXT: retq
;
; AVX-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptosi_v2f32_to_v2i8:
; SSE-32: # %bb.0:
; SSE-32-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-32-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-32-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: packuswb %xmm0, %xmm0
; SSE-32-NEXT: retl
; SSE-64: # %bb.0:
; SSE-64-NEXT: movq {{.*#+}} xmm0 = xmm0[0],zero
; SSE-64-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-64-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: packuswb %xmm0, %xmm0
; SSE-64-NEXT: retq
; AVX: # %bb.0:
; AVX-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: ret{{[l|q]}}
;
; AVX512F-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: ret{{[l|q]}}
;
; AVX512VL-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vmovq {{.*#+}} xmm0 = xmm0[0],zero
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: ret{{[l|q]}}
;
; AVX512VLDQ-LABEL: strict_vector_fptoui_v2f32_to_v2i8:
; SSE-LABEL: fptosi_2f32_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f32_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f32_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f32_to_2i8:
; AVX512DQ-LABEL: fptosi_2f32_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f32_to_2i8:
; SSE-LABEL: fptosi_2f32_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f32_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptosi <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
; SSE-LABEL: fptoui_2f32_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttps2dq %xmm0, %xmm0
-; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
; VEX-LABEL: fptoui_2f32_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttps2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f32_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f32_to_2i8:
; AVX512DQ-LABEL: fptoui_2f32_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f32_to_2i8:
; AVX-LABEL: fptoui_2f32_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttps2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptoui <2 x float> %a to <2 x i16>
ret <2 x i16> %cvt
; SSE-LABEL: fptosi_2f64_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
-; SSE-NEXT: packuswb %xmm0, %xmm0
+; SSE-NEXT: packssdw %xmm0, %xmm0
+; SSE-NEXT: packsswb %xmm0, %xmm0
; SSE-NEXT: retq
;
; VEX-LABEL: fptosi_2f64_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptosi_2f64_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptosi_2f64_to_2i8:
; AVX512DQ-LABEL: fptosi_2f64_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpacksswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptosi_2f64_to_2i8:
; SSE-LABEL: fptosi_2f64_to_2i16:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; SSE-NEXT: packssdw %xmm0, %xmm0
; SSE-NEXT: retq
;
; AVX-LABEL: fptosi_2f64_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackssdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptosi <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt
; SSE-LABEL: fptoui_2f64_to_2i8:
; SSE: # %bb.0:
; SSE-NEXT: cvttpd2dq %xmm0, %xmm0
-; SSE-NEXT: andpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: packuswb %xmm0, %xmm0
; SSE-NEXT: retq
; VEX-LABEL: fptoui_2f64_to_2i8:
; VEX: # %bb.0:
; VEX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; VEX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; VEX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; VEX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; VEX-NEXT: retq
;
; AVX512F-LABEL: fptoui_2f64_to_2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: fptoui_2f64_to_2i8:
; AVX512DQ-LABEL: fptoui_2f64_to_2i8:
; AVX512DQ: # %bb.0:
; AVX512DQ-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX512DQ-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512DQ-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512DQ-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX512DQ-NEXT: retq
;
; AVX512VLDQ-LABEL: fptoui_2f64_to_2i8:
; AVX-LABEL: fptoui_2f64_to_2i16:
; AVX: # %bb.0:
; AVX-NEXT: vcvttpd2dq %xmm0, %xmm0
-; AVX-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
%cvt = fptoui <2 x double> %a to <2 x i16>
ret <2 x i16> %cvt