From: Craig Topper Date: Sat, 5 Jan 2019 18:48:11 +0000 (+0000) Subject: [X86] Allow LowerTRUNCATE to use PACKUS/PACKSS for v16i16->v16i8 truncate when -mpref... X-Git-Tag: llvmorg-8.0.0-rc1~864 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3f48dbf72e2267b85bab1d5924f264569c4db09f;p=platform%2Fupstream%2Fllvm.git [X86] Allow LowerTRUNCATE to use PACKUS/PACKSS for v16i16->v16i8 truncate when -mprefer-vector-width-256 is in effect and BWI is not available. llvm-svn: 350473 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 25a93e9..4056b49 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -17949,9 +17949,10 @@ static SDValue truncateVectorWithPACK(unsigned Opcode, EVT DstVT, SDValue In, const X86Subtarget &Subtarget) { assert((Opcode == X86ISD::PACKSS || Opcode == X86ISD::PACKUS) && "Unexpected PACK opcode"); + assert(DstVT.isVector() && "VT not a vector?"); // Requires SSE2 but AVX512 has fast vector truncate. - if (!Subtarget.hasSSE2() || Subtarget.hasAVX512() || !DstVT.isVector()) + if (!Subtarget.hasSSE2()) return SDValue(); EVT SrcVT = In.getValueType(); @@ -36899,6 +36900,7 @@ static SDValue combineTruncateWithSat(SDValue In, EVT VT, const SDLoc &DL, return DAG.getNode(X86ISD::VTRUNCUS, DL, VT, USatVal); } if (VT.isVector() && isPowerOf2_32(VT.getVectorNumElements()) && + !Subtarget.hasAVX512() && (SVT == MVT::i8 || SVT == MVT::i16) && (InSVT == MVT::i16 || InSVT == MVT::i32)) { if (auto USatVal = detectSSatPattern(In, VT, true)) { diff --git a/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll b/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll index b4f8e5b..b4d452f 100644 --- a/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll +++ b/llvm/test/CodeGen/X86/prefer-avx256-mask-extend.ll @@ -48,11 +48,9 @@ define <16 x i8> @testv16i1_sext_v16i8(<8 x i32>* %p, <8 x i32>* %q) { ; AVX256-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} ; AVX256-NEXT: vpmovdw %ymm1, %xmm1 -; AVX256-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX256-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX256-NEXT: vpmovdw %ymm0, %xmm0 -; AVX256-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX256-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX256-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX256-NEXT: vzeroupper ; AVX256-NEXT: retq ; diff --git a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll index 92f6e27..7f4480c 100644 --- a/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll +++ b/llvm/test/CodeGen/X86/prefer-avx256-mask-shuffle.ll @@ -34,11 +34,9 @@ define <16 x i1> @shuf16i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0(<8 x i32>* %a, <8 ; AVX256VL-NEXT: kshiftrw $8, %k0, %k2 ; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} ; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1 -; AVX256VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX256VL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX256VL-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX256VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX256VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX256VL-NEXT: vzeroupper ; AVX256VL-NEXT: retq ; @@ -169,11 +167,9 @@ define <32 x i1> @shuf32i1_3_6_22_12_3_7_7_0_3_6_1_13_3_21_7_0_3_6_22_12_3_7_7_0 ; AVX256VL-NEXT: kshiftrw $8, %k0, %k2 ; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm1 {%k2} {z} ; AVX256VL-NEXT: vpmovdw %ymm1, %xmm1 -; AVX256VL-NEXT: vpsrlw $8, %xmm1, %xmm1 ; AVX256VL-NEXT: vmovdqa32 %ymm0, %ymm0 {%k1} {z} ; AVX256VL-NEXT: vpmovdw %ymm0, %xmm0 -; AVX256VL-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX256VL-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 +; AVX256VL-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 ; AVX256VL-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0 ; AVX256VL-NEXT: retq ;