From 6736096ac3efd149c34dc95a8ab3a8105349a535 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 17 Sep 2016 18:50:54 +0000 Subject: [PATCH] [X86][SSE] Improve target shuffle mask extraction Add ability to extract vXi64 'vzext_movl' masks on 32-bit targets llvm-svn: 281834 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 24 +++++++++++++--------- .../CodeGen/X86/vector-shuffle-combining-avx.ll | 9 +------- .../CodeGen/X86/vector-shuffle-combining-xop.ll | 5 +---- 3 files changed, 16 insertions(+), 22 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b757290..a905188 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -4763,6 +4763,7 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode, MVT VT = MaskNode.getSimpleValueType(); assert(VT.isVector() && "Can't produce a non-vector with a build_vector!"); + unsigned NumMaskElts = VT.getSizeInBits() / MaskEltSizeInBits; // Split an APInt element into MaskEltSizeInBits sized pieces and // insert into the shuffle mask. @@ -4794,17 +4795,20 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode, if (MaskNode.getOpcode() == X86ISD::VZEXT_MOVL && MaskNode.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR) { - - // TODO: Handle (MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0 - if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) != 0) - return false; - unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits; - SDValue MaskOp = MaskNode.getOperand(0).getOperand(0); if (auto *CN = dyn_cast(MaskOp)) { - SplitElementToMask(CN->getAPIntValue()); - RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0); - return true; + if ((MaskEltSizeInBits % VT.getScalarSizeInBits()) == 0) { + RawMask.push_back(CN->getZExtValue()); + RawMask.append(NumMaskElts - 1, 0); + return true; + } + + if ((VT.getScalarSizeInBits() % MaskEltSizeInBits) == 0) { + unsigned ElementSplit = VT.getScalarSizeInBits() / MaskEltSizeInBits; + SplitElementToMask(CN->getAPIntValue()); + RawMask.append((VT.getVectorNumElements() - 1) * ElementSplit, 0); + return true; + } } return false; } @@ -4815,7 +4819,7 @@ static bool getTargetShuffleMaskIndices(SDValue MaskNode, // We can always decode if the buildvector is all zero constants, // but can't use isBuildVectorAllZeros as it might contain UNDEFs. if (all_of(MaskNode->ops(), X86::isZeroNode)) { - RawMask.append(VT.getSizeInBits() / MaskEltSizeInBits, 0); + RawMask.append(NumMaskElts, 0); return true; } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll index 9112fea..2d1bf08 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-avx.ll @@ -258,10 +258,6 @@ define <8 x float> @combine_vpermilvar_8f32_movsldup(<8 x float> %a0) { define <2 x double> @combine_vpermilvar_2f64_identity(<2 x double> %a0) { ; X32-LABEL: combine_vpermilvar_2f64_identity: ; X32: # BB#0: -; X32-NEXT: movl $2, %eax -; X32-NEXT: vmovd %eax, %xmm1 -; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 -; X32-NEXT: vpermilpd %xmm1, %xmm0, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermilvar_2f64_identity: @@ -365,10 +361,7 @@ define <4 x float> @combine_vpermilvar_4f32_as_insertps(<4 x float> %a0) { define <2 x double> @constant_fold_vpermilvar_pd() { ; X32-LABEL: constant_fold_vpermilvar_pd: ; X32: # BB#0: -; X32-NEXT: movl $2, %eax -; X32-NEXT: vmovd %eax, %xmm0 -; X32-NEXT: vmovapd {{.*#+}} xmm1 = [1.000000e+00,2.000000e+00] -; X32-NEXT: vpermilpd %xmm0, %xmm1, %xmm0 +; X32-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0] ; X32-NEXT: retl ; ; X64-LABEL: constant_fold_vpermilvar_pd: diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll index 4b760d1..121fafc 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining-xop.ll @@ -15,10 +15,7 @@ declare <16 x i8> @llvm.x86.xop.vpperm(<16 x i8>, <16 x i8>, <16 x i8>) nounwind define <2 x double> @combine_vpermil2pd_identity(<2 x double> %a0, <2 x double> %a1) { ; X32-LABEL: combine_vpermil2pd_identity: ; X32: # BB#0: -; X32-NEXT: movl $2, %eax -; X32-NEXT: vmovd %eax, %xmm2 -; X32-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm1, %xmm0 -; X32-NEXT: vpermil2pd $0, %xmm2, %xmm0, %xmm0, %xmm0 +; X32-NEXT: vmovaps %xmm1, %xmm0 ; X32-NEXT: retl ; ; X64-LABEL: combine_vpermil2pd_identity: -- 2.7.4