From 25eb219959f7750e896e9ffab279cd2bc77478b9 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 13 Oct 2019 19:07:28 +0000 Subject: [PATCH] [X86] Enable use of avx512 saturating truncate instructions in more cases. This enables use of the saturating truncate instructions when the result type is less than 128 bits. It also enables the use of saturating truncate instructions on KNL when the input is less than 512 bits. We can do this by widening the input and then extracting the result. llvm-svn: 374731 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 77 ++++---- llvm/test/CodeGen/X86/avx512-trunc.ll | 20 +- llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll | 76 ++----- llvm/test/CodeGen/X86/masked_store_trunc_usat.ll | 92 +++------ llvm/test/CodeGen/X86/vector-trunc-packus.ll | 138 +++++-------- llvm/test/CodeGen/X86/vector-trunc-ssat.ll | 196 +++++------------- llvm/test/CodeGen/X86/vector-trunc-usat.ll | 242 ++++++++++------------- 7 files changed, 299 insertions(+), 542 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b992b68..52d6f67 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -39713,26 +39713,6 @@ static SDValue foldVectorXorShiftIntoCmp(SDNode *N, SelectionDAG &DAG, return DAG.getNode(X86ISD::PCMPGT, SDLoc(N), VT, Shift.getOperand(0), Ones); } -/// Check if truncation with saturation form type \p SrcVT to \p DstVT -/// is valid for the given \p Subtarget. -static bool isSATValidOnAVX512Subtarget(EVT SrcVT, EVT DstVT, - const X86Subtarget &Subtarget) { - if (!Subtarget.hasAVX512()) - return false; - - // FIXME: Scalar type may be supported if we move it to vector register. - if (!SrcVT.isVector()) - return false; - - EVT SrcElVT = SrcVT.getScalarType(); - EVT DstElVT = DstVT.getScalarType(); - if (DstElVT != MVT::i8 && DstElVT != MVT::i16 && DstElVT != MVT::i32) - return false; - if (SrcVT.is512BitVector() || Subtarget.hasVLX()) - return SrcElVT.getSizeInBits() >= 32 || Subtarget.hasBWI(); - return false; -} - /// Detect patterns of truncation with unsigned saturation: /// /// 1. (truncate (umin (x, unsigned_max_of_dest_type)) to dest_type). @@ -39833,20 +39813,12 @@ static SDValue detectSSatPattern(SDValue In, EVT VT, bool MatchPackUS = false) { static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, SelectionDAG &DAG, const X86Subtarget &Subtarget) { - if (!Subtarget.hasSSE2()) + if (!Subtarget.hasSSE2() || !VT.isVector()) return SDValue(); - EVT SVT = VT.getScalarType(); + EVT SVT = VT.getVectorElementType(); EVT InVT = In.getValueType(); - EVT InSVT = InVT.getScalarType(); - const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - if (TLI.isTypeLegal(InVT) && TLI.isTypeLegal(VT) && - isSATValidOnAVX512Subtarget(InVT, VT, Subtarget)) { - if (auto SSatVal = detectSSatPattern(In, VT)) - return DAG.getNode(X86ISD::VTRUNCS, DL, VT, SSatVal); - if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) - return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); - } + EVT InSVT = InVT.getVectorElementType(); // If we're clamping a signed 32-bit vector to 0-255 and the 32-bit vector is // split across two registers. We can use a packusdw+perm to clamp to 0-65535 @@ -39875,16 +39847,15 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, (Subtarget.hasVLX() || InVT.getSizeInBits() > 256) && !(!Subtarget.useAVX512Regs() && VT.getSizeInBits() >= 256); - if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && - !PreferAVX512 && + if (isPowerOf2_32(VT.getVectorNumElements()) && !PreferAVX512 && + VT.getSizeInBits() >= 64 && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { if (auto USatVal = detectSSatPattern(In, VT, true)) { // vXi32 -> vXi8 must be performed as PACKUSWB(PACKSSDW,PACKSSDW). // Only do this when the result is at least 64 bits or we'll leaving // dangling PACKSSDW nodes. - if (SVT == MVT::i8 && InSVT == MVT::i32 && - VT.getVectorNumElements() >= 8) { + if (SVT == MVT::i8 && InSVT == MVT::i32) { EVT MidVT = EVT::getVectorVT(*DAG.getContext(), MVT::i16, VT.getVectorNumElements()); SDValue Mid = truncateVectorWithPACK(X86ISD::PACKSS, MidVT, USatVal, DL, @@ -39902,6 +39873,42 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, return truncateVectorWithPACK(X86ISD::PACKSS, VT, SSatVal, DL, DAG, Subtarget); } + + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (TLI.isTypeLegal(InVT) && InVT.isVector() && SVT != MVT::i1 && + Subtarget.hasAVX512() && (InSVT != MVT::i16 || Subtarget.hasBWI())) { + unsigned TruncOpc; + SDValue SatVal; + if (auto SSatVal = detectSSatPattern(In, VT)) { + SatVal = SSatVal; + TruncOpc = X86ISD::VTRUNCS; + } else if (auto USatVal = detectUSatPattern(In, VT, DAG, DL)) { + SatVal = USatVal; + TruncOpc = X86ISD::VTRUNCUS; + } + if (SatVal) { + unsigned ResElts = VT.getVectorNumElements(); + // If the input type is less than 512 bits and we don't have VLX, we need + // to widen to 512 bits. + if (!Subtarget.hasVLX() && !InVT.is512BitVector()) { + unsigned NumConcats = 512 / InVT.getSizeInBits(); + ResElts *= NumConcats; + SmallVector ConcatOps(NumConcats, DAG.getUNDEF(InVT)); + ConcatOps[0] = SatVal; + InVT = EVT::getVectorVT(*DAG.getContext(), InSVT, + NumConcats * InVT.getVectorNumElements()); + SatVal = DAG.getNode(ISD::CONCAT_VECTORS, DL, InVT, ConcatOps); + } + // Widen the result if its narrower than 128 bits. + if (ResElts * SVT.getSizeInBits() < 128) + ResElts = 128 / SVT.getSizeInBits(); + EVT TruncVT = EVT::getVectorVT(*DAG.getContext(), SVT, ResElts); + SDValue Res = DAG.getNode(TruncOpc, DL, TruncVT, SatVal); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); + } + } + return SDValue(); } diff --git a/llvm/test/CodeGen/X86/avx512-trunc.ll b/llvm/test/CodeGen/X86/avx512-trunc.ll index 1cce2a9..bf49987 100644 --- a/llvm/test/CodeGen/X86/avx512-trunc.ll +++ b/llvm/test/CodeGen/X86/avx512-trunc.ll @@ -713,11 +713,16 @@ define <16 x i16> @usat_trunc_dw_512(<16 x i32> %i) { } define <8 x i8> @usat_trunc_wb_128(<8 x i16> %i) { -; ALL-LABEL: usat_trunc_wb_128: -; ALL: ## %bb.0: -; ALL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; ALL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 -; ALL-NEXT: retq +; KNL-LABEL: usat_trunc_wb_128: +; KNL: ## %bb.0: +; KNL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; KNL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; KNL-NEXT: retq +; +; SKX-LABEL: usat_trunc_wb_128: +; SKX: ## %bb.0: +; SKX-NEXT: vpmovuswb %xmm0, %xmm0 +; SKX-NEXT: retq %x3 = icmp ult <8 x i16> %i, %x5 = select <8 x i1> %x3, <8 x i16> %i, <8 x i16> %x6 = trunc <8 x i16> %x5 to <8 x i8> @@ -740,9 +745,8 @@ define <16 x i16> @usat_trunc_qw_1024(<16 x i64> %i) { define <16 x i8> @usat_trunc_db_256(<8 x i32> %x) { ; KNL-LABEL: usat_trunc_db_256: ; KNL: ## %bb.0: -; KNL-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; KNL-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; KNL-NEXT: vpmovdb %zmm0, %xmm0 +; KNL-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 +; KNL-NEXT: vpmovusdb %zmm0, %xmm0 ; KNL-NEXT: vzeroupper ; KNL-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll index c170e07..cc73f56 100644 --- a/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll @@ -1717,9 +1717,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512F-NEXT: kshiftlw $12, %k0, %k0 ; AVX512F-NEXT: kshiftrw $12, %k0, %k1 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1740,9 +1738,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2021,9 +2017,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB4_1 @@ -2063,9 +2057,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $28, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $28, %k0, %k1 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2351,9 +2343,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB5_1 @@ -2393,9 +2383,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $60, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $60, %k0, %k1 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2544,11 +2532,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %p, <2 x i64> %mask ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512F-NEXT: kshiftlw $14, %k0, %k0 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2568,11 +2552,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %p, <2 x i64> %mask ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlw $14, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2708,12 +2688,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %p, <2 x i64> %mask ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB7_1 @@ -2739,12 +2714,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %p, <2 x i64> %mask ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $30, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $30, %k0, %k1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2887,11 +2857,7 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %p, <2 x i64> %mask) ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB8_1 @@ -2917,11 +2883,7 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %p, <2 x i64> %mask) ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $62, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $62, %k0, %k1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5417,12 +5379,9 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512F-LABEL: truncstore_v4i32_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] -; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] -; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB14_1 @@ -5458,14 +5417,11 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512BW-LABEL: truncstore_v4i32_v4i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $60, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $60, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] -; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] -; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll index 00fdbf2..ab22ba3b 100644 --- a/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll +++ b/llvm/test/CodeGen/X86/masked_store_trunc_usat.ll @@ -1464,8 +1464,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512F-NEXT: kshiftlw $12, %k0, %k0 ; AVX512F-NEXT: kshiftrw $12, %k0, %k1 -; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1485,8 +1484,7 @@ define void @truncstore_v4i64_v4i32(<4 x i64> %x, <4 x i32>* %p, <4 x i32> %mask ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlw $12, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $12, %k0, %k1 -; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1731,8 +1729,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB4_1 @@ -1772,8 +1769,7 @@ define void @truncstore_v4i64_v4i16(<4 x i64> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $28, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $28, %k0, %k1 -; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2023,8 +2019,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB5_1 @@ -2064,8 +2059,7 @@ define void @truncstore_v4i64_v4i8(<4 x i64> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $60, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $60, %k0, %k1 -; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2193,9 +2187,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %p, <2 x i64> %mask ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512F-NEXT: kshiftlw $14, %k0, %k0 ; AVX512F-NEXT: kshiftrw $14, %k0, %k1 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512F-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2214,9 +2206,7 @@ define void @truncstore_v2i64_v2i32(<2 x i64> %x, <2 x i32>* %p, <2 x i64> %mask ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlw $14, %k0, %k0 ; AVX512BW-NEXT: kshiftrw $14, %k0, %k1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu32 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2333,10 +2323,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %p, <2 x i64> %mask ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB7_1 @@ -2362,10 +2349,7 @@ define void @truncstore_v2i64_v2i16(<2 x i64> %x, <2 x i16>* %p, <2 x i64> %mask ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $30, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $30, %k0, %k1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512BW-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2488,9 +2472,7 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %p, <2 x i64> %mask) ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestmq %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB8_1 @@ -2516,9 +2498,7 @@ define void @truncstore_v2i64_v2i8(<2 x i64> %x, <2 x i8>* %p, <2 x i64> %mask) ; AVX512BW-NEXT: vptestmq %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $62, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $62, %k0, %k1 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -4279,10 +4259,9 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; AVX512F-LABEL: truncstore_v8i32_v8i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB11_1 @@ -4346,12 +4325,11 @@ define void @truncstore_v8i32_v8i16(<8 x i32> %x, <8 x i16>* %p, <8 x i32> %mask ; AVX512BW-LABEL: truncstore_v8i32_v8i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $24, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $24, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -4684,10 +4662,9 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; AVX512F-LABEL: truncstore_v8i32_v8i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB12_1 @@ -4751,12 +4728,11 @@ define void @truncstore_v8i32_v8i8(<8 x i32> %x, <8 x i8>* %p, <8 x i32> %mask) ; AVX512BW-LABEL: truncstore_v8i32_v8i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $56, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $56, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -4941,10 +4917,9 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512F-LABEL: truncstore_v4i32_v4i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] -; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB13_1 @@ -4980,12 +4955,11 @@ define void @truncstore_v4i32_v4i16(<4 x i32> %x, <4 x i16>* %p, <4 x i32> %mask ; AVX512BW-LABEL: truncstore_v4i32_v4i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftld $28, %k0, %k0 ; AVX512BW-NEXT: kshiftrd $28, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] -; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu16 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5169,10 +5143,9 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512F-LABEL: truncstore_v4i32_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512F-NEXT: vptestmd %zmm1, %zmm1, %k0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512F-NEXT: kmovw %k0, %eax ; AVX512F-NEXT: testb $1, %al ; AVX512F-NEXT: jne .LBB14_1 @@ -5208,12 +5181,11 @@ define void @truncstore_v4i32_v4i8(<4 x i32> %x, <4 x i8>* %p, <4 x i32> %mask) ; AVX512BW-LABEL: truncstore_v4i32_v4i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vptestmd %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $60, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $60, %k0, %k1 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -7070,10 +7042,10 @@ define void @truncstore_v16i16_v16i8(<16 x i16> %x, <16 x i8>* %p, <16 x i8> %ma ; AVX512BW-LABEL: truncstore_v16i16_v16i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; AVX512BW-NEXT: vptestmb %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kmovw %k0, %k1 -; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -7370,11 +7342,11 @@ define void @truncstore_v8i16_v8i8(<8 x i16> %x, <8 x i8>* %p, <8 x i16> %mask) ; AVX512BW-LABEL: truncstore_v8i16_v8i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; AVX512BW-NEXT: vptestmw %zmm1, %zmm1, %k0 ; AVX512BW-NEXT: kshiftlq $56, %k0, %k0 ; AVX512BW-NEXT: kshiftrq $56, %k0, %k1 -; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 ; AVX512BW-NEXT: vmovdqu8 %zmm0, (%rdi) {%k1} ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq diff --git a/llvm/test/CodeGen/X86/vector-trunc-packus.ll b/llvm/test/CodeGen/X86/vector-trunc-packus.ll index a95a81f..d382e0d 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-packus.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-packus.ll @@ -119,47 +119,42 @@ define <2 x i32> @trunc_packus_v2i64_v2i32(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_packus_v2i64_v2i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_packus_v2i64_v2i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VL-NEXT: vpmovusqd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_packus_v2i64_v2i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_packus_v2i64_v2i32: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BWVL-NEXT: vpmovusqd %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_packus_v2i64_v2i32: ; SKX: # %bb.0: -; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SKX-NEXT: vpmovusqd %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -277,11 +272,9 @@ define void @trunc_packus_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) { ; AVX512F-LABEL: trunc_packus_v2i64_v2i32_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -296,11 +289,9 @@ define void @trunc_packus_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) { ; AVX512BW-LABEL: trunc_packus_v2i64_v2i32_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -536,10 +527,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_packus_v4i64_v4i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -555,10 +545,9 @@ define <4 x i32> @trunc_packus_v4i64_v4i32(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_packus_v4i64_v4i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1132,48 +1121,40 @@ define <2 x i16> @trunc_packus_v2i64_v2i16(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_packus_v2i64_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_packus_v2i64_v2i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpmovusqw %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_packus_v2i64_v2i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_packus_v2i64_v2i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BWVL-NEXT: vpmovusqw %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_packus_v2i64_v2i16: ; SKX: # %bb.0: -; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; SKX-NEXT: vpmovusqw %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -1320,12 +1301,9 @@ define void @trunc_packus_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16> *%p1) { ; AVX512F-LABEL: trunc_packus_v2i64_v2i16_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1340,11 +1318,9 @@ define void @trunc_packus_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16> *%p1) { ; AVX512BW-LABEL: trunc_packus_v2i64_v2i16_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1602,10 +1578,9 @@ define <4 x i16> @trunc_packus_v4i64_v4i16(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_packus_v4i64_v4i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1620,10 +1595,9 @@ define <4 x i16> @trunc_packus_v4i64_v4i16(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_packus_v4i64_v4i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -1887,10 +1861,9 @@ define void @trunc_packus_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; AVX512F-LABEL: trunc_packus_v4i64_v4i16_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1906,10 +1879,9 @@ define void @trunc_packus_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; AVX512BW-LABEL: trunc_packus_v4i64_v4i16_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2878,47 +2850,40 @@ define <2 x i8> @trunc_packus_v2i64_v2i8(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_packus_v2i64_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_packus_v2i64_v2i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpmovusqb %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_packus_v2i64_v2i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_packus_v2i64_v2i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: vpmovusqb %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_packus_v2i64_v2i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsq %xmm1, %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; SKX-NEXT: vpmovusqb %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -3041,11 +3006,9 @@ define void @trunc_packus_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) { ; AVX512F-LABEL: trunc_packus_v2i64_v2i8_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -3060,11 +3023,9 @@ define void @trunc_packus_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) { ; AVX512BW-LABEL: trunc_packus_v2i64_v2i8_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -3303,10 +3264,9 @@ define <4 x i8> @trunc_packus_v4i64_v4i8(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_packus_v4i64_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -3321,10 +3281,9 @@ define <4 x i8> @trunc_packus_v4i64_v4i8(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_packus_v4i64_v4i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -3567,10 +3526,9 @@ define void @trunc_packus_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; AVX512F-LABEL: trunc_packus_v4i64_v4i8_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -3586,10 +3544,9 @@ define void @trunc_packus_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; AVX512BW-LABEL: trunc_packus_v4i64_v4i8_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5279,44 +5236,39 @@ define <4 x i8> @trunc_packus_v4i32_v4i8(<4 x i32> %a0) "min-legal-vector-width" ; ; AVX512F-LABEL: trunc_packus_v4i32_v4i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_packus_v4i32_v4i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512VL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpmovusdb %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_packus_v4i32_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; AVX512BWVL-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BWVL-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: vpmovusdb %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_packus_v4i32_v4i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 ; SKX-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; SKX-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SKX-NEXT: vpmovusdb %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <4 x i32> %a0, %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> @@ -5391,12 +5343,11 @@ define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; ; AVX512F-LABEL: trunc_packus_v4i32_v4i8_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512F-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_packus_v4i32_v4i8_store: @@ -5408,12 +5359,11 @@ define void @trunc_packus_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; ; AVX512BW-LABEL: trunc_packus_v4i32_v4i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 ; AVX512BW-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_packus_v4i32_v4i8_store: diff --git a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll index d9d9c36..543708c 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-ssat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-ssat.ll @@ -123,44 +123,32 @@ define <2 x i32> @trunc_ssat_v2i64_v2i32(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_ssat_v2i64_v2i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VL-NEXT: vpmovsqd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i32: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BWVL-NEXT: vpmovsqd %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_ssat_v2i64_v2i32: ; SKX: # %bb.0: -; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SKX-NEXT: vpmovsqd %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -282,11 +270,7 @@ define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) { ; AVX512F-LABEL: trunc_ssat_v2i64_v2i32_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -299,11 +283,7 @@ define void @trunc_ssat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) { ; AVX512BW-LABEL: trunc_ssat_v2i64_v2i32_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [2147483647,2147483647] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744071562067968,18446744071562067968] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -545,9 +525,7 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_ssat_v4i64_v4i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512F-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -561,9 +539,7 @@ define <4 x i32> @trunc_ssat_v4i64_v4i32(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_ssat_v4i64_v4i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 +; AVX512BW-NEXT: vpmovsqd %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1153,45 +1129,30 @@ define <2 x i16> @trunc_ssat_v2i64_v2i16(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_ssat_v2i64_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_ssat_v2i64_v2i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpmovsqw %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BWVL-NEXT: vpmovsqw %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_ssat_v2i64_v2i16: ; SKX: # %bb.0: -; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; SKX-NEXT: vpmovsqw %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -1342,12 +1303,7 @@ define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16> *%p1) { ; AVX512F-LABEL: trunc_ssat_v2i64_v2i16_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1360,11 +1316,7 @@ define void @trunc_ssat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16> *%p1) { ; AVX512BW-LABEL: trunc_ssat_v2i64_v2i16_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [32767,32767] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709518848,18446744073709518848] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1628,9 +1580,7 @@ define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_ssat_v4i64_v4i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1643,9 +1593,7 @@ define <4 x i16> @trunc_ssat_v4i64_v4i16(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -1915,9 +1863,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; AVX512F-LABEL: trunc_ssat_v4i64_v4i16_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1931,9 +1877,7 @@ define void @trunc_ssat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; AVX512BW-LABEL: trunc_ssat_v4i64_v4i16_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovsqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2666,44 +2610,30 @@ define <2 x i8> @trunc_ssat_v2i64_v2i8(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_ssat_v2i64_v2i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpmovsqb %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_ssat_v2i64_v2i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: vpmovsqb %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_ssat_v2i64_v2i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminsq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpmaxsq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; SKX-NEXT: vpmovsqb %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -2830,11 +2760,7 @@ define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) { ; AVX512F-LABEL: trunc_ssat_v2i64_v2i8_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX512F-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX512F-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2847,11 +2773,7 @@ define void @trunc_ssat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8> *%p1) { ; AVX512BW-LABEL: trunc_ssat_v2i64_v2i8_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [127,127] -; AVX512BW-NEXT: vpminsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [18446744073709551488,18446744073709551488] -; AVX512BW-NEXT: vpmaxsq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -3097,9 +3019,7 @@ define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_ssat_v4i64_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -3112,9 +3032,7 @@ define <4 x i8> @trunc_ssat_v4i64_v4i8(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -3364,9 +3282,7 @@ define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; AVX512F-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -3380,9 +3296,7 @@ define void @trunc_ssat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; AVX512BW-LABEL: trunc_ssat_v4i64_v4i8_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmaxsq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovsqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -5189,41 +5103,31 @@ define <4 x i8> @trunc_ssat_v4i32_v4i8(<4 x i32> %a0) { ; ; AVX512F-LABEL: trunc_ssat_v4i32_v4i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] -; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] -; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpmovsdb %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] -; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] -; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: vpmovsdb %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_ssat_v4i32_v4i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; SKX-NEXT: vpmaxsd {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SKX-NEXT: vpmovsdb %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp slt <4 x i32> %a0, %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> @@ -5300,12 +5204,10 @@ define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; ; AVX512F-LABEL: trunc_ssat_v4i32_v4i8_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] -; AVX512F-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] -; AVX512F-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpmovsdb %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_ssat_v4i32_v4i8_store: @@ -5315,12 +5217,10 @@ define void @trunc_ssat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; ; AVX512BW-LABEL: trunc_ssat_v4i32_v4i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [127,127,127,127] -; AVX512BW-NEXT: vpminsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [4294967168,4294967168,4294967168,4294967168] -; AVX512BW-NEXT: vpmaxsd %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovsdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_ssat_v4i32_v4i8_store: diff --git a/llvm/test/CodeGen/X86/vector-trunc-usat.ll b/llvm/test/CodeGen/X86/vector-trunc-usat.ll index 82488d7..7afdb04 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-usat.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-usat.ll @@ -84,37 +84,32 @@ define <2 x i32> @trunc_usat_v2i64_v2i32(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_usat_v2i64_v2i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_usat_v2i64_v2i32: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512VL-NEXT: vpmovusqd %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_usat_v2i64_v2i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i32: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BWVL-NEXT: vpmovusqd %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_usat_v2i64_v2i32: ; SKX: # %bb.0: -; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; SKX-NEXT: vpmovusqd %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp ult <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -195,9 +190,7 @@ define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) { ; AVX512F-LABEL: trunc_usat_v2i64_v2i32_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512F-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -210,9 +203,7 @@ define void @trunc_usat_v2i64_v2i32_store(<2 x i64> %a0, <2 x i32>* %p1) { ; AVX512BW-LABEL: trunc_usat_v2i64_v2i32_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [4294967295,4294967295] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] +; AVX512BW-NEXT: vpmovusqd %zmm0, %ymm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -791,38 +782,30 @@ define <2 x i16> @trunc_usat_v2i64_v2i16(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_usat_v2i64_v2i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_usat_v2i64_v2i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512VL-NEXT: vpmovusqw %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_usat_v2i64_v2i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BWVL-NEXT: vpmovusqw %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_usat_v2i64_v2i16: ; SKX: # %bb.0: -; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; SKX-NEXT: vpmovusqw %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp ult <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -930,10 +913,7 @@ define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16>* %p1) { ; AVX512F-LABEL: trunc_usat_v2i64_v2i16_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] -; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -946,9 +926,7 @@ define void @trunc_usat_v2i64_v2i16_store(<2 x i64> %a0, <2 x i16>* %p1) { ; AVX512BW-LABEL: trunc_usat_v2i64_v2i16_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [65535,65535] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,8,9,8,9,10,11,8,9,10,11,12,13,14,15] +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1131,8 +1109,7 @@ define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_usat_v4i64_v4i16: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -1145,8 +1122,7 @@ define <4 x i16> @trunc_usat_v4i64_v4i16(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_usat_v4i64_v4i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -1335,8 +1311,7 @@ define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; AVX512F-LABEL: trunc_usat_v4i64_v4i16_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1350,8 +1325,7 @@ define void @trunc_usat_v4i64_v4i16_store(<4 x i64> %a0, <4 x i16> *%p1) { ; AVX512BW-LABEL: trunc_usat_v4i64_v4i16_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqw %zmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -1691,34 +1665,33 @@ define <4 x i16> @trunc_usat_v4i32_v4i16(<4 x i32> %a0) { ; ; AVX512F-LABEL: trunc_usat_v4i32_v4i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] -; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_usat_v4i32_v4i16: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: vpmovusdw %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_usat_v4i32_v4i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] -; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BWVL-NEXT: vpmovusdw %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_usat_v4i32_v4i16: ; SKX: # %bb.0: -; SKX-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; SKX-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vpmovusdw %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp ult <4 x i32> %a0, %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> @@ -1779,10 +1752,10 @@ define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, <4 x i16> *%p1) { ; ; AVX512F-LABEL: trunc_usat_v4i32_v4i16_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] -; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_usat_v4i32_v4i16_store: @@ -1792,10 +1765,10 @@ define void @trunc_usat_v4i32_v4i16_store(<4 x i32> %a0, <4 x i16> *%p1) { ; ; AVX512BW-LABEL: trunc_usat_v4i32_v4i16_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [65535,65535,65535,65535] -; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpackusdw %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i16_store: @@ -1891,9 +1864,8 @@ define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) { ; ; AVX512F-LABEL: trunc_usat_v8i32_v8i16: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -1906,9 +1878,8 @@ define <8 x i16> @trunc_usat_v8i32_v8i16(<8 x i32> %a0) { ; ; AVX512BW-LABEL: trunc_usat_v8i32_v8i16: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [65535,65535,65535,65535,65535,65535,65535,65535] -; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdw %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovusdw %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2156,37 +2127,30 @@ define <2 x i8> @trunc_usat_v2i64_v2i8(<2 x i64> %a0) { ; AVX512F-LABEL: trunc_usat_v2i64_v2i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_usat_v2i64_v2i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpmovusqb %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_usat_v2i64_v2i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v2i64_v2i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: vpmovusqb %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_usat_v2i64_v2i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminuq {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; SKX-NEXT: vpmovusqb %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp ult <2 x i64> %a0, %2 = select <2 x i1> %1, <2 x i64> %a0, <2 x i64> @@ -2272,9 +2236,7 @@ define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8>* %p1) { ; AVX512F-LABEL: trunc_usat_v2i64_v2i8_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512F-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2287,9 +2249,7 @@ define void @trunc_usat_v2i64_v2i8_store(<2 x i64> %a0, <2 x i8>* %p1) { ; AVX512BW-LABEL: trunc_usat_v2i64_v2i8_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm1 = [255,255] -; AVX512BW-NEXT: vpminuq %zmm1, %zmm0, %zmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,8,u,u,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vpextrw $0, %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -2453,8 +2413,7 @@ define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_usat_v4i64_v4i8: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -2467,8 +2426,7 @@ define <4 x i8> @trunc_usat_v4i64_v4i8(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_usat_v4i64_v4i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -2636,8 +2594,7 @@ define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; AVX512F-LABEL: trunc_usat_v4i64_v4i8_store: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512F-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512F-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -2651,8 +2608,7 @@ define void @trunc_usat_v4i64_v4i8_store(<4 x i64> %a0, <4 x i8> *%p1) { ; AVX512BW-LABEL: trunc_usat_v4i64_v4i8_store: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpminuq {{.*}}(%rip){1to8}, %zmm0, %zmm0 -; AVX512BW-NEXT: vpmovqb %zmm0, %xmm0 +; AVX512BW-NEXT: vpmovusqb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -3758,34 +3714,31 @@ define <4 x i8> @trunc_usat_v4i32_v4i8(<4 x i32> %a0) { ; ; AVX512F-LABEL: trunc_usat_v4i32_v4i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_usat_v4i32_v4i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512VL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512VL-NEXT: vpmovusdb %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: trunc_usat_v4i32_v4i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8: ; AVX512BWVL: # %bb.0: -; AVX512BWVL-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; AVX512BWVL-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BWVL-NEXT: vpmovusdb %xmm0, %xmm0 ; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_usat_v4i32_v4i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminud {{.*}}(%rip){1to4}, %xmm0, %xmm0 -; SKX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; SKX-NEXT: vpmovusdb %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp ult <4 x i32> %a0, %2 = select <4 x i1> %1, <4 x i32> %a0, <4 x i32> @@ -3846,10 +3799,10 @@ define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; ; AVX512F-LABEL: trunc_usat_v4i32_v4i8_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512F-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512F-NEXT: vmovd %xmm0, (%rdi) +; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; ; AVX512VL-LABEL: trunc_usat_v4i32_v4i8_store: @@ -3859,10 +3812,10 @@ define void @trunc_usat_v4i32_v4i8_store(<4 x i32> %a0, <4 x i8> *%p1) { ; ; AVX512BW-LABEL: trunc_usat_v4i32_v4i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} xmm1 = [255,255,255,255] -; AVX512BW-NEXT: vpminud %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,4,8,12,u,u,u,u,u,u,u,u,u,u,u,u] +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovd %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v4i32_v4i8_store: @@ -3965,9 +3918,8 @@ define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) { ; ; AVX512F-LABEL: trunc_usat_v8i32_v8i8: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq ; @@ -3979,9 +3931,8 @@ define <8 x i8> @trunc_usat_v8i32_v8i8(<8 x i32> %a0) { ; ; AVX512BW-LABEL: trunc_usat_v8i32_v8i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; @@ -4090,9 +4041,8 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512F-LABEL: trunc_usat_v8i32_v8i8_store: ; AVX512F: # %bb.0: -; AVX512F-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512F-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512F-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512F-NEXT: vmovq %xmm0, (%rdi) ; AVX512F-NEXT: vzeroupper ; AVX512F-NEXT: retq @@ -4105,9 +4055,8 @@ define void @trunc_usat_v8i32_v8i8_store(<8 x i32> %a0, <8 x i8> *%p1) { ; ; AVX512BW-LABEL: trunc_usat_v8i32_v8i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpbroadcastd {{.*#+}} ymm1 = [255,255,255,255,255,255,255,255] -; AVX512BW-NEXT: vpminud %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovusdb %zmm0, %xmm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq @@ -4444,16 +4393,34 @@ define <8 x i8> @trunc_usat_v8i16_v8i8(<8 x i16> %a0) { ; AVX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512-LABEL: trunc_usat_v8i16_v8i8: -; AVX512: # %bb.0: -; AVX512-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX512F-LABEL: trunc_usat_v8i16_v8i8: +; AVX512F: # %bb.0: +; AVX512F-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512F-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512F-NEXT: retq +; +; AVX512VL-LABEL: trunc_usat_v8i16_v8i8: +; AVX512VL: # %bb.0: +; AVX512VL-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 +; AVX512VL-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512VL-NEXT: retq +; +; AVX512BW-LABEL: trunc_usat_v8i16_v8i8: +; AVX512BW: # %bb.0: +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; AVX512BW-NEXT: vzeroupper +; AVX512BW-NEXT: retq +; +; AVX512BWVL-LABEL: trunc_usat_v8i16_v8i8: +; AVX512BWVL: # %bb.0: +; AVX512BWVL-NEXT: vpmovuswb %xmm0, %xmm0 +; AVX512BWVL-NEXT: retq ; ; SKX-LABEL: trunc_usat_v8i16_v8i8: ; SKX: # %bb.0: -; SKX-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; SKX-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; SKX-NEXT: vpmovuswb %xmm0, %xmm0 ; SKX-NEXT: retq %1 = icmp ult <8 x i16> %a0, %2 = select <8 x i1> %1, <8 x i16> %a0, <8 x i16> @@ -4509,9 +4476,10 @@ define void @trunc_usat_v8i16_v8i8_store(<8 x i16> %a0, <8 x i8> *%p1) { ; ; AVX512BW-LABEL: trunc_usat_v8i16_v8i8_store: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpackuswb %xmm0, %xmm0, %xmm0 +; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 +; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 ; AVX512BW-NEXT: vmovq %xmm0, (%rdi) +; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq ; ; AVX512BWVL-LABEL: trunc_usat_v8i16_v8i8_store: @@ -4601,8 +4569,8 @@ define <16 x i8> @trunc_usat_v16i16_v16i8(<16 x i16> %a0) { ; ; AVX512BW-LABEL: trunc_usat_v16i16_v16i8: ; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpminuw {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 +; AVX512BW-NEXT: vpmovuswb %zmm0, %ymm0 ; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512BW-NEXT: vzeroupper ; AVX512BW-NEXT: retq -- 2.7.4