From ebb7ddc6330b28b8c7e53d4c68d3684c127a41c1 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 19 Aug 2019 05:45:39 +0000 Subject: [PATCH] [X86] Teach lower1BitShuffle to match right shifts with upper zero elements on types that don't natively support KSHIFT. We can support these by widening to a supported type, then shifting all the way to the left and then back to the right to ensure that we shift in zeroes. llvm-svn: 369232 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 39 +++++++++++++++++---------------- llvm/test/CodeGen/X86/kshift.ll | 23 +++++++------------ 2 files changed, 28 insertions(+), 34 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index ae3aed0..f0a4cf2 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -16640,25 +16640,26 @@ static SDValue lower1BitShuffle(const SDLoc &DL, ArrayRef Mask, unsigned Opcode; int ShiftAmt = match1BitShuffleAsKSHIFT(Opcode, Mask, Offset, Zeroable); if (ShiftAmt >= 0) { - // FIXME: We can't easily widen an illegal right shift if we need to shift - // in zeroes. - if (Opcode == X86ISD::KSHIFTR && - (NumElts >= 16 || (Subtarget.hasDQI() && NumElts == 8))) - return DAG.getNode(Opcode, DL, VT, V, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); - if (Opcode == X86ISD::KSHIFTL) { - // If this is a shift left we can widen the VT to a suported kshiftl. - MVT WideVT = VT; - if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) - WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; - SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, - DAG.getUNDEF(WideVT), V, - DAG.getIntPtrConstant(0, DL)); - Res = DAG.getNode(Opcode, DL, WideVT, Res, - DAG.getConstant(ShiftAmt, DL, MVT::i8)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, - DAG.getIntPtrConstant(0, DL)); - } + MVT WideVT = VT; + if ((!Subtarget.hasDQI() && NumElts == 8) || NumElts < 8) + WideVT = Subtarget.hasDQI() ? MVT::v8i1 : MVT::v16i1; + SDValue Res = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, WideVT, + DAG.getUNDEF(WideVT), V, + DAG.getIntPtrConstant(0, DL)); + // Widened right shifts need two shifts to ensure we shift in zeroes. + if (Opcode == X86ISD::KSHIFTR && WideVT != VT) { + int WideElts = WideVT.getVectorNumElements(); + // Shift left to put the original vector in the MSBs of the new size. + Res = DAG.getNode(X86ISD::KSHIFTL, DL, WideVT, Res, + DAG.getConstant(WideElts - NumElts, DL, MVT::i8)); + // Increase the shift amount to account for the left shift. + ShiftAmt += WideElts - NumElts; + } + + Res = DAG.getNode(Opcode, DL, WideVT, Res, + DAG.getConstant(ShiftAmt, DL, MVT::i8)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res, + DAG.getIntPtrConstant(0, DL)); } Offset += NumElts; // Increment for next iteration. } diff --git a/llvm/test/CodeGen/X86/kshift.ll b/llvm/test/CodeGen/X86/kshift.ll index 01db683..eced654 100644 --- a/llvm/test/CodeGen/X86/kshift.ll +++ b/llvm/test/CodeGen/X86/kshift.ll @@ -295,13 +295,10 @@ define i64 @kshiftl_v64i1_63(<64 x i8> %x, <64 x i8> %y) { define i8 @kshiftr_v8i1_1(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftr_v8i1_1: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,2,3,4,5,6,7,15] -; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3 -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm3, %zmm3, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftlw $8, %k0, %k0 +; KNL-NEXT: kshiftrw $9, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper @@ -643,14 +640,10 @@ define i8 @kshiftl_v8i1_u0123456(<8 x i64> %x, <8 x i64> %y) { define i8 @kshiftr_v8i1_1u3u567z(<8 x i64> %x, <8 x i64> %y) { ; KNL-LABEL: kshiftr_v8i1_1u3u567z: ; KNL: # %bb.0: -; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k1 -; KNL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL-NEXT: vpternlogq $255, %zmm2, %zmm2, %zmm2 {%k1} {z} -; KNL-NEXT: vmovdqa64 {{.*#+}} zmm3 = <1,u,3,u,5,6,7,15> -; KNL-NEXT: vpermi2q %zmm0, %zmm2, %zmm3 -; KNL-NEXT: vpsllq $63, %zmm3, %zmm0 -; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k1 -; KNL-NEXT: vptestmq %zmm0, %zmm0, %k0 {%k1} +; KNL-NEXT: vptestnmq %zmm0, %zmm0, %k0 +; KNL-NEXT: kshiftlw $8, %k0, %k0 +; KNL-NEXT: kshiftrw $9, %k0, %k1 +; KNL-NEXT: vptestnmq %zmm1, %zmm1, %k0 {%k1} ; KNL-NEXT: kmovw %k0, %eax ; KNL-NEXT: # kill: def $al killed $al killed $eax ; KNL-NEXT: vzeroupper -- 2.7.4