return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones);
}
-/// Check if truncation with saturation form type \p SrcVT to \p DstVT
-/// is valid for the given \p Subtarget.
-static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT,
- const X86Subtarget &Subtarget) {
- if (!Subtarget.hasAVX512())
- return false;
-
- // FIXME: Scalar type may be supported if we move it to vector register.
- if (!SrcVT.isVector())
- return false;
-
- EVT SrcElVT = SrcVT.getScalarType();
- EVT DstElVT = DstVT.getScalarType();
- if (DstElVT != MVT::i8 && DstElVT != MVT::i16 && DstElVT != MVT::i32)
- return false;
- if (SrcVT.is512BitVector() || Subtarget.hasVLX())
- return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI();
- return false;
-}
-
/// Detect patterns of truncation with unsigned saturation:
///
/// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type).
static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL,
SelectionDAG &DAG,
const X86Subtarget &Subtarget) {
- if (!Subtarget.hasSSE2())
+ if (!Subtarget.hasSSE2() || !VT.isVector())
return SDValue();
- EVT SVT = VT.getScalarType();
+ EVT SVT = VT.getVectorElementType();
EVT InVT = In.getValueType();
- EVT InSVT = InVT.getScalarType();
- const TargetLowering &TLI = DAG.getTargetLoweringInfo();
- if (TLI.isTypeLegal(InVT) && TLI.isTypeLegal(VT) &&
- isSATValidOnAVX512Subtarget(InVT, VT, Subtarget)) {
- if (auto SSatVal = detectSSatPattern(In, VT))
- return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal);
- if (auto USatVal = detectUSatPattern(In, VT, DAG, DL))
- return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal);
- }
+ EVT InSVT = InVT.getVectorElementType();
// If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is
// split across two registers. We can use a packusdw+perm to clamp to 0-65535
(Subtarget.hasVLX() || InVT.getSizeInBits() > 256) &&
!(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256);
- if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) &&
- !PreferAVX512 &&
+ if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 &&
+ VT.getSizeInBits() >= 64 &&
(SVT == MVT::i8 || SVT == MVT::i16) &&
(InSVT == MVT::i16 || InSVT == MVT::i32)) {
if (auto USatVal = detectSSatPattern(In, VT, true)) {
// vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW).
// Only do this when the result is at least 64 bits or we'll leaving
// dangling PACKSSDW nodes.
- if (SVT == MVT::i8 && InSVT == MVT::i32 &&
- VT.getVectorNumElements() >= 8) {
+ if (SVT == MVT::i8 && InSVT == MVT::i32) {
EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16,
VT.getVectorNumElements());
SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL,
return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG,
Subtarget);
}
+
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 &&
+ Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI())) {
+ unsigned TruncOpc;
+ SDValue SatVal;
+ if (auto SSatVal = detectSSatPattern(In, VT)) {
+ SatVal = SSatVal;
+ TruncOpc = X86ISD::VTRUNCS;
+ } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) {
+ SatVal = USatVal;
+ TruncOpc = X86ISD::VTRUNCUS;
+ }
+ if (SatVal) {
+ unsigned ResElts = VT.getVectorNumElements();
+ // If the input type is less than 512 bits and we don't have VLX, we need
+ // to widen to 512 bits.
+ if (!Subtarget.hasVLX() && !InVT.is512BitVector()) {
+ unsigned NumConcats = 512 / InVT.getSizeInBits();
+ ResElts *= NumConcats;
+ SmallVector<SDValue, 4> ConcatOps(NumConcats, DAG.getUNDEF(InVT));
+ ConcatOps[0] = SatVal;
+ InVT = EVT::getVectorVT(*DAG.getContext(), InSVT,
+ NumConcats * InVT.getVectorNumElements());
+ SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps);
+ }
+ // Widen the result if its narrower than 128 bits.
+ if (ResElts * SVT.getSizeInBits() < 128)
+ ResElts = 128 / SVT.getSizeInBits();
+ EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts);
+ SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal);
+ return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getIntPtrConstant(0, DL));
+ }
+ }
+
return SDValue();
}
}
define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) {
-; ALL-LABEL: usat_trunc_wb_128:
-; ALL: ## %bb.0:
-; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
-; ALL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; ALL-NEXT: retq
+; KNL-LABEL: usat_trunc_wb_128:
+; KNL: ## %bb.0:
+; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
+; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; KNL-NEXT: retq
+;
+; SKX-LABEL: usat_trunc_wb_128:
+; SKX: ## %bb.0:
+; SKX-NEXT: vpmovuswb %xmm0, %xmm0
+; SKX-NEXT: retq
%x3 = icmp ult <8 x i16> %i, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%x6 = trunc <8 x i16> %x5 to <8 x i8>
define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) {
; KNL-LABEL: usat_trunc_db_256:
; KNL: ## %bb.0:
-; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; KNL-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; KNL-NEXT: vpmovdb %zmm0, %xmm0
+; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0
+; KNL-NEXT: vpmovusdb %zmm0, %xmm0
; KNL-NEXT: vzeroupper
; KNL-NEXT: retq
;
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $12, %k0, %k0
; AVX512F-NEXT: kshiftrw $12, %k0, %k1
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB4_1
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $28, %k0, %k0
; AVX512BW-NEXT: kshiftrd $28, %k0, %k1
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB5_1
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $60, %k0, %k0
; AVX512BW-NEXT: kshiftrq $60, %k0, %k1
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlw $14, %k0, %k0
; AVX512BW-NEXT: kshiftrw $14, %k0, %k1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB7_1
; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $30, %k0, %k0
; AVX512BW-NEXT: kshiftrd $30, %k0, %k1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB8_1
; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $62, %k0, %k0
; AVX512BW-NEXT: kshiftrq $62, %k0, %k1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: truncstore_v4i32_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB14_1
; AVX512BW-LABEL: truncstore_v4i32_v4i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $60, %k0, %k0
; AVX512BW-NEXT: kshiftrq $60, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $12, %k0, %k0
; AVX512F-NEXT: kshiftrw $12, %k0, %k1
-; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlw $12, %k0, %k0
; AVX512BW-NEXT: kshiftrw $12, %k0, %k1
-; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB4_1
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $28, %k0, %k0
; AVX512BW-NEXT: kshiftrd $28, %k0, %k1
-; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB5_1
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $60, %k0, %k0
; AVX512BW-NEXT: kshiftrq $60, %k0, %k1
-; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512F-NEXT: kshiftlw $14, %k0, %k0
; AVX512F-NEXT: kshiftrw $14, %k0, %k1
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlw $14, %k0, %k0
; AVX512BW-NEXT: kshiftrw $14, %k0, %k1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB7_1
; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $30, %k0, %k0
; AVX512BW-NEXT: kshiftrd $30, %k0, %k1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB8_1
; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $62, %k0, %k0
; AVX512BW-NEXT: kshiftrq $62, %k0, %k1
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: truncstore_v8i32_v8i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB11_1
; AVX512BW-LABEL: truncstore_v8i32_v8i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $24, %k0, %k0
; AVX512BW-NEXT: kshiftrd $24, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: truncstore_v8i32_v8i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB12_1
; AVX512BW-LABEL: truncstore_v8i32_v8i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: truncstore_v4i32_v4i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB13_1
; AVX512BW-LABEL: truncstore_v4i32_v4i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftld $28, %k0, %k0
; AVX512BW-NEXT: kshiftrd $28, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: truncstore_v4i32_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512F-NEXT: kmovw %k0, %eax
; AVX512F-NEXT: testb $1, %al
; AVX512F-NEXT: jne .LBB14_1
; AVX512BW-LABEL: truncstore_v4i32_v4i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $60, %k0, %k0
; AVX512BW-NEXT: kshiftrq $60, %k0, %k1
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-LABEL: truncstore_v16i16_v16i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kmovw %k0, %k1
-; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512BW-LABEL: truncstore_v8i16_v8i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
; AVX512BW-NEXT: vptestmw %zmm1, %zmm1, %k0
; AVX512BW-NEXT: kshiftlq $56, %k0, %k0
; AVX512BW-NEXT: kshiftrq $56, %k0, %k1
-; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1}
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_packus_v2i64_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_packus_v2i64_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512VL-NEXT: vpmovusqd %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v2i64_v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_packus_v2i64_v2i32:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BWVL-NEXT: vpmovusqd %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_packus_v2i64_v2i32:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: vpmovusqd %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <2 x i64> %a0, <i64 4294967295, i64 4294967295>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
; AVX512F-LABEL: trunc_packus_v2i64_v2i32_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_packus_v2i64_v2i32_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_packus_v4i64_v4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_packus_v4i64_v4i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_packus_v2i64_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_packus_v2i64_v2i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpmovusqw %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v2i64_v2i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_packus_v2i64_v2i16:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BWVL-NEXT: vpmovusqw %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_packus_v2i64_v2i16:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; SKX-NEXT: vpmovusqw %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <2 x i64> %a0, <i64 65535, i64 65535>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
; AVX512F-LABEL: trunc_packus_v2i64_v2i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_packus_v2i64_v2i16_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_packus_v4i64_v4i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v4i64_v4i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_packus_v4i64_v4i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_packus_v4i64_v4i16_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_packus_v2i64_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_packus_v2i64_v2i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpmovusqb %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v2i64_v2i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_packus_v2i64_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BWVL-NEXT: vpmovusqb %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_packus_v2i64_v2i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SKX-NEXT: vpmovusqb %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <2 x i64> %a0, <i64 255, i64 255>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
; AVX512F-LABEL: trunc_packus_v2i64_v2i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_packus_v2i64_v2i8_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_packus_v4i64_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v4i64_v4i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_packus_v4i64_v4i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_packus_v4i64_v4i8_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_packus_v4i32_v4i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_packus_v4i32_v4i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpmovusdb %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_packus_v4i32_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BWVL-NEXT: vpmovusdb %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_packus_v4i32_v4i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1
; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; SKX-NEXT: vpmovusdb %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <4 x i32> %a0, <i32 255, i32 255, i32 255, i32 255>
%2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 255, i32 255, i32 255, i32 255>
;
; AVX512F-LABEL: trunc_packus_v4i32_v4i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_packus_v4i32_v4i8_store:
;
; AVX512BW-LABEL: trunc_packus_v4i32_v4i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1
; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8_store:
; AVX512F-LABEL: trunc_ssat_v2i64_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512VL-NEXT: vpmovsqd %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BWVL-NEXT: vpmovsqd %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_ssat_v2i64_v2i32:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: vpmovsqd %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <2 x i64> %a0, <i64 2147483647, i64 2147483647>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 2147483647, i64 2147483647>
; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_ssat_v4i64_v4i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_ssat_v4i64_v4i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0
+; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_ssat_v2i64_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpmovsqw %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BWVL-NEXT: vpmovsqw %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_ssat_v2i64_v2i16:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; SKX-NEXT: vpmovsqw %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <2 x i64> %a0, <i64 32767, i64 32767>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 32767, i64 32767>
; AVX512F-LABEL: trunc_ssat_v2i64_v2i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_ssat_v4i64_v4i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_ssat_v4i64_v4i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_ssat_v2i64_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpmovsqb %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BWVL-NEXT: vpmovsqb %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_ssat_v2i64_v2i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SKX-NEXT: vpmovsqb %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <2 x i64> %a0, <i64 127, i64 127>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 127, i64 127>
; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127]
-; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488]
-; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_ssat_v4i64_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_ssat_v4i64_v4i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_ssat_v4i32_v4i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpmovsdb %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BWVL-NEXT: vpmovsdb %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_ssat_v4i32_v4i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; SKX-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; SKX-NEXT: vpmovsdb %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp slt <4 x i32> %a0, <i32 127, i32 127, i32 127, i32 127>
%2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 127, i32 127, i32 127, i32 127>
;
; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store:
;
; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127]
-; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168]
-; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store:
; AVX512F-LABEL: trunc_usat_v2i64_v2i32:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i32:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512VL-NEXT: vpmovusqd %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i32:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BWVL-NEXT: vpmovusqd %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i32:
; SKX: # %bb.0:
-; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; SKX-NEXT: vpmovusqd %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 4294967295, i64 4294967295>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 4294967295, i64 4294967295>
; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_usat_v2i64_v2i32_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_usat_v2i64_v2i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512VL-NEXT: vpmovusqw %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BWVL-NEXT: vpmovusqw %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i16:
; SKX: # %bb.0:
-; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; SKX-NEXT: vpmovusqw %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 65535, i64 65535>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 65535, i64 65535>
; AVX512F-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
-; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_usat_v2i64_v2i16_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15]
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_usat_v4i64_v4i16:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v4i64_v4i16:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v4i64_v4i16_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_usat_v4i64_v4i16_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v4i32_v4i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v4i32_v4i16:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: vpmovusdw %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v4i32_v4i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512BWVL-NEXT: vpmovusdw %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v4i32_v4i16:
; SKX: # %bb.0:
-; SKX-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; SKX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; SKX-NEXT: vpmovusdw %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp ult <4 x i32> %a0, <i32 65535, i32 65535, i32 65535, i32 65535>
%2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
;
; AVX512F-LABEL: trunc_usat_v4i32_v4i16_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v4i32_v4i16_store:
;
; AVX512BW-LABEL: trunc_usat_v4i32_v4i16_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535]
-; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16_store:
;
; AVX512F-LABEL: trunc_usat_v8i32_v8i16:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v8i32_v8i16:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535]
-; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_usat_v2i64_v2i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v2i64_v2i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpmovusqb %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v2i64_v2i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BWVL-NEXT: vpmovusqb %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v2i64_v2i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; SKX-NEXT: vpmovusqb %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp ult <2 x i64> %a0, <i64 255, i64 255>
%2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> <i64 255, i64 255>
; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_usat_v2i64_v2i8_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
-; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255]
-; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX512F-LABEL: trunc_usat_v4i64_v4i8:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v4i64_v4i8:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v4i64_v4i8_store:
; AVX512F: # %bb.0:
; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512F-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
; AVX512BW-LABEL: trunc_usat_v4i64_v4i8_store:
; AVX512BW: # %bb.0:
; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
-; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0
-; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0
+; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512F-LABEL: trunc_usat_v4i32_v4i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v4i32_v4i8:
; AVX512VL: # %bb.0:
-; AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512VL-NEXT: vpmovusdb %xmm0, %xmm0
; AVX512VL-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v4i32_v4i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8:
; AVX512BWVL: # %bb.0:
-; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BWVL-NEXT: vpmovusdb %xmm0, %xmm0
; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v4i32_v4i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0
-; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; SKX-NEXT: vpmovusdb %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp ult <4 x i32> %a0, <i32 255, i32 255, i32 255, i32 255>
%2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> <i32 255, i32 255, i32 255, i32 255>
;
; AVX512F-LABEL: trunc_usat_v4i32_v4i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512F-NEXT: vmovd %xmm0, (%rdi)
+; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512VL-LABEL: trunc_usat_v4i32_v4i8_store:
;
; AVX512BW-LABEL: trunc_usat_v4i32_v4i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255]
-; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0
-; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u]
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512BW-NEXT: vmovd %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8_store:
;
; AVX512F-LABEL: trunc_usat_v8i32_v8i8:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
;
; AVX512BW-LABEL: trunc_usat_v8i32_v8i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
;
; AVX512F-LABEL: trunc_usat_v8i32_v8i8_store:
; AVX512F: # %bb.0:
-; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512F-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, (%rdi)
; AVX512F-NEXT: vzeroupper
; AVX512F-NEXT: retq
;
; AVX512BW-LABEL: trunc_usat_v8i32_v8i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255]
-; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
; AVX-NEXT: retq
;
-; AVX512-LABEL: trunc_usat_v8i16_v8i8:
-; AVX512: # %bb.0:
-; AVX512-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
-; AVX512-NEXT: retq
+; AVX512F-LABEL: trunc_usat_v8i16_v8i8:
+; AVX512F: # %bb.0:
+; AVX512F-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX512F-NEXT: retq
+;
+; AVX512VL-LABEL: trunc_usat_v8i16_v8i8:
+; AVX512VL: # %bb.0:
+; AVX512VL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
+; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX512VL-NEXT: retq
+;
+; AVX512BW-LABEL: trunc_usat_v8i16_v8i8:
+; AVX512BW: # %bb.0:
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
+; AVX512BW-NEXT: vzeroupper
+; AVX512BW-NEXT: retq
+;
+; AVX512BWVL-LABEL: trunc_usat_v8i16_v8i8:
+; AVX512BWVL: # %bb.0:
+; AVX512BWVL-NEXT: vpmovuswb %xmm0, %xmm0
+; AVX512BWVL-NEXT: retq
;
; SKX-LABEL: trunc_usat_v8i16_v8i8:
; SKX: # %bb.0:
-; SKX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
-; SKX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; SKX-NEXT: vpmovuswb %xmm0, %xmm0
; SKX-NEXT: retq
%1 = icmp ult <8 x i16> %a0, <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
%2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>
;
; AVX512BW-LABEL: trunc_usat_v8i16_v8i8_store:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0
-; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0
+; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
; AVX512BW-NEXT: vmovq %xmm0, (%rdi)
+; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq
;
; AVX512BWVL-LABEL: trunc_usat_v8i16_v8i8_store:
;
; AVX512BW-LABEL: trunc_usat_v16i16_v16i8:
; AVX512BW: # %bb.0:
-; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0
-; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0
+; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0
+; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0
; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0
; AVX512BW-NEXT: vzeroupper
; AVX512BW-NEXT: retq