From f7b978a71bc057d19966b16945eb5fd5068b309e Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Wed, 1 May 2019 10:58:38 +0000 Subject: [PATCH] [X86][SSE] Move extract_subvector(pshufb) fold to SimplifyDemandedVectorEltsForTargetNode This lets us hit more cases than combineExtractSubvector and allows us reuse more code. llvm-svn: 359669 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 15 +++------------ llvm/test/CodeGen/X86/var-permute-128.ll | 31 +++++++++++++------------------ 2 files changed, 16 insertions(+), 30 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 53823e2..c4f4b03 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -33406,6 +33406,9 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( // TODO: Handle 512-bit -> 128/256-bit ops as well. if (VT.is256BitVector() && DemandedElts.lshr(NumElts / 2) == 0) { switch (Opc) { + // Target Shuffles. + case X86ISD::PSHUFB: + // Horizontal Ops. case X86ISD::HADD: case X86ISD::HSUB: case X86ISD::FHADD: @@ -42817,18 +42820,6 @@ static SDValue combineExtractSubvector(SDNode *N, SelectionDAG &DAG, InVec.getOperand(0).getSimpleValueType().is128BitVector()) { return DAG.getNode(InOpcode, SDLoc(N), VT, InVec.getOperand(0)); } - if (InOpcode == ISD::BITCAST) { - // TODO - do this for target shuffles in general. - SDValue InVecBC = peekThroughOneUseBitcasts(InVec); - if (InVecBC.getOpcode() == X86ISD::PSHUFB && VT.is128BitVector()) { - SDLoc DL(N); - SDValue SubPSHUFB = - DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, - extract128BitVector(InVecBC.getOperand(0), 0, DAG, DL), - extract128BitVector(InVecBC.getOperand(1), 0, DAG, DL)); - return DAG.getBitcast(VT, SubPSHUFB); - } - } } return SDValue(); diff --git a/llvm/test/CodeGen/X86/var-permute-128.ll b/llvm/test/CodeGen/X86/var-permute-128.ll index e557b7c..4d48527 100644 --- a/llvm/test/CodeGen/X86/var-permute-128.ll +++ b/llvm/test/CodeGen/X86/var-permute-128.ll @@ -1027,13 +1027,11 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in ; AVX2-LABEL: var_shuffle_v16i8_from_v32i8_v16i8: ; AVX2: # %bb.0: ; AVX2-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 -; AVX2-NEXT: vpshufb %ymm1, %ymm2, %ymm2 -; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm3 -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] -; AVX2-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX2-NEXT: vpshufb %xmm1, %xmm2, %xmm2 +; AVX2-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX2-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1 -; AVX2-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 +; AVX2-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX2-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq @@ -1041,13 +1039,11 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in ; AVX512-LABEL: var_shuffle_v16i8_from_v32i8_v16i8: ; AVX512: # %bb.0: ; AVX512-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; AVX512-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 -; AVX512-NEXT: vpshufb %ymm1, %ymm2, %ymm2 -; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm3 -; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] -; AVX512-NEXT: vpshufb %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512-NEXT: vpshufb %xmm1, %xmm2, %xmm2 +; AVX512-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX512-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %ymm1 -; AVX512-NEXT: vpblendvb %ymm1, %ymm0, %ymm2, %ymm0 +; AVX512-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512-NEXT: vzeroupper ; AVX512-NEXT: retq @@ -1055,13 +1051,12 @@ define <16 x i8> @var_shuffle_v16i8_from_v32i8_v16i8(<32 x i8> %v, <16 x i8> %in ; AVX512VLBW-LABEL: var_shuffle_v16i8_from_v32i8_v16i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 -; AVX512VLBW-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm2 -; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm2, %ymm2 -; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm3 -; AVX512VLBW-NEXT: vpblendd {{.*#+}} ymm0 = ymm3[0,1,2,3],ymm0[4,5,6,7] +; AVX512VLBW-NEXT: vextracti128 $1, %ymm0, %xmm2 +; AVX512VLBW-NEXT: vpshufb %xmm1, %xmm2, %xmm2 +; AVX512VLBW-NEXT: vpshufb %xmm1, %xmm0, %xmm0 ; AVX512VLBW-NEXT: vpcmpgtb {{.*}}(%rip), %ymm1, %k1 -; AVX512VLBW-NEXT: vpshufb %ymm1, %ymm0, %ymm2 {%k1} -; AVX512VLBW-NEXT: vmovdqa %xmm2, %xmm0 +; AVX512VLBW-NEXT: vmovdqu8 %ymm2, %ymm0 {%k1} +; AVX512VLBW-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 ; AVX512VLBW-NEXT: vzeroupper ; AVX512VLBW-NEXT: retq ; -- 2.7.4