From: Igor Breger Date: Wed, 14 Sep 2016 08:04:28 +0000 (+0000) Subject: [AVX512BW] Change truncStore action (v16i16->v16i18). It can be legal only with AVX512VL. X-Git-Tag: llvmorg-4.0.0-rc1~9837 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=74813fc19c41160d320587d6b816bace70c4a6bf;p=platform%2Fupstream%2Fllvm.git [AVX512BW] Change truncStore action (v16i16->v16i18). It can be legal only with AVX512VL. Differential Revision: http://reviews.llvm.org/D24547 llvm-svn: 281445 --- diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 987447d..5f7dbb6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -1500,9 +1500,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::UMIN, MVT::v32i16, Legal); setTruncStoreAction(MVT::v32i16, MVT::v32i8, Legal); - setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); - if (Subtarget.hasVLX()) + if (Subtarget.hasVLX()) { + setTruncStoreAction(MVT::v16i16, MVT::v16i8, Legal); setTruncStoreAction(MVT::v8i16, MVT::v8i8, Legal); + } LegalizeAction Action = Subtarget.hasVLX() ? Legal : Custom; for (auto VT : { MVT::v32i8, MVT::v16i8, MVT::v16i16, MVT::v8i16 }) { diff --git a/llvm/test/CodeGen/X86/vector-trunc.ll b/llvm/test/CodeGen/X86/vector-trunc.ll index cd71e65..7180006 100644 --- a/llvm/test/CodeGen/X86/vector-trunc.ll +++ b/llvm/test/CodeGen/X86/vector-trunc.ll @@ -470,12 +470,74 @@ entry: } ;PR25684 -;define void @trunc16i16_16i8(<16 x i16> %a) { -;entry: -; %0 = trunc <16 x i16> %a to <16 x i8> -; store <16 x i8> %0, <16 x i8>* undef, align 4 -; ret void -;} +define void @trunc16i16_16i8(<16 x i16> %a) { +; SSE2-LABEL: trunc16i16_16i8: +; SSE2: # BB#0: # %entry +; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] +; SSE2-NEXT: pand %xmm2, %xmm1 +; SSE2-NEXT: pand %xmm2, %xmm0 +; SSE2-NEXT: packuswb %xmm1, %xmm0 +; SSE2-NEXT: movdqu %xmm0, (%rax) +; SSE2-NEXT: retq +; +; SSSE3-LABEL: trunc16i16_16i8: +; SSSE3: # BB#0: # %entry +; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; SSSE3-NEXT: pshufb %xmm2, %xmm1 +; SSSE3-NEXT: pshufb %xmm2, %xmm0 +; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSSE3-NEXT: movdqu %xmm0, (%rax) +; SSSE3-NEXT: retq +; +; SSE41-LABEL: trunc16i16_16i8: +; SSE41: # BB#0: # %entry +; SSE41-NEXT: movdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; SSE41-NEXT: pshufb %xmm2, %xmm1 +; SSE41-NEXT: pshufb %xmm2, %xmm0 +; SSE41-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; SSE41-NEXT: movdqu %xmm0, (%rax) +; SSE41-NEXT: retq +; +; AVX1-LABEL: trunc16i16_16i8: +; AVX1: # BB#0: # %entry +; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 +; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX1-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX1-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX1-NEXT: vmovdqu %xmm0, (%rax) +; AVX1-NEXT: vzeroupper +; AVX1-NEXT: retq +; +; AVX2-LABEL: trunc16i16_16i8: +; AVX2: # BB#0: # %entry +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 +; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = <0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u> +; AVX2-NEXT: vpshufb %xmm2, %xmm1, %xmm1 +; AVX2-NEXT: vpshufb %xmm2, %xmm0, %xmm0 +; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] +; AVX2-NEXT: vmovdqu %xmm0, (%rax) +; AVX2-NEXT: vzeroupper +; AVX2-NEXT: retq +; +; AVX512F-LABEL: trunc16i16_16i8: +; AVX512F: # BB#0: # %entry +; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 +; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 +; AVX512F-NEXT: vmovdqu %xmm0, (%rax) +; AVX512F-NEXT: retq +; +; AVX512BW-LABEL: trunc16i16_16i8: +; AVX512BW: # BB#0: # %entry +; AVX512BW-NEXT: # kill: %YMM0 %YMM0 %ZMM0 +; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 +; AVX512BW-NEXT: vmovdqu %xmm0, (%rax) +; AVX512BW-NEXT: retq +entry: + %0 = trunc <16 x i16> %a to <16 x i8> + store <16 x i8> %0, <16 x i8>* undef, align 4 + ret void +} define void @trunc32i16_32i8(<32 x i16> %a) { ; SSE2-LABEL: trunc32i16_32i8: @@ -1010,3 +1072,4 @@ entry: %1 = shufflevector <16 x i8> %0, <16 x i8> %0, <16 x i32> ret <16 x i8> %1 } +