From e855efe42407dd67f6a513927d0669cb7a66f448 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 27 Jun 2020 15:06:49 +0100 Subject: [PATCH] [X86][AVX] SimplifyDemandedVectorEltsForTargetNode - reduce width of X86ISD::VPERMIL2 If we don't need the elements of the upper lanes, reduce the width of the X86ISD::VPERMIL2 node. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 20 +++++++++++++++++++- llvm/test/CodeGen/X86/var-permute-256.ll | 5 +---- 2 files changed, 20 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 2e23bcb..fd69791 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -37165,7 +37165,25 @@ bool X86TargetLowering::SimplifyDemandedVectorEltsForTargetNode( SDValue Insert = insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); return TLO.CombineTo(Op, Insert); - } + } + case X86ISD::VPERMIL2: { + SDLoc DL(Op); + MVT ExtVT = VT.getSimpleVT(); + ExtVT = MVT::getVectorVT(ExtVT.getScalarType(), + ExtSizeInBits / ExtVT.getScalarSizeInBits()); + SDValue Ext0 = + extractSubVector(Op.getOperand(0), 0, TLO.DAG, DL, ExtSizeInBits); + SDValue Ext1 = + extractSubVector(Op.getOperand(1), 0, TLO.DAG, DL, ExtSizeInBits); + SDValue Ext2 = + extractSubVector(Op.getOperand(2), 0, TLO.DAG, DL, ExtSizeInBits); + SDValue ExtOp = + TLO.DAG.getNode(Opc, DL, ExtVT, Ext0, Ext1, Ext2, Op.getOperand(3)); + SDValue UndefVec = TLO.DAG.getUNDEF(VT); + SDValue Insert = + insertSubVector(UndefVec, ExtOp, 0, TLO.DAG, DL, ExtSizeInBits); + return TLO.CombineTo(Op, Insert); + } } } diff --git a/llvm/test/CodeGen/X86/var-permute-256.ll b/llvm/test/CodeGen/X86/var-permute-256.ll index 6a9f47d..ff099c1 100644 --- a/llvm/test/CodeGen/X86/var-permute-256.ll +++ b/llvm/test/CodeGen/X86/var-permute-256.ll @@ -1104,11 +1104,8 @@ entry: define <4 x i32> @var_shuffle_v4i32_from_v8i32(<8 x i32> %v, <4 x i32> %indices) unnamed_addr nounwind { ; XOP-LABEL: var_shuffle_v4i32_from_v8i32: ; XOP: # %bb.0: # %entry -; XOP-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 ; XOP-NEXT: vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,2,3] -; XOP-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 -; XOP-NEXT: vpermil2ps $0, %ymm1, %ymm2, %ymm0, %ymm0 -; XOP-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 +; XOP-NEXT: vpermil2ps $0, %xmm1, %xmm2, %xmm0, %xmm0 ; XOP-NEXT: vzeroupper ; XOP-NEXT: retq ; -- 2.7.4