From 0dcb366268eebecaad88dcc6ae8166b0f61e9b7f Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 7 Jul 2014 09:06:58 +0000 Subject: [PATCH] [x86] Teach the new vector shuffle lowering code to handle what is essentially a DAG combine that never gets a chance to run. We might typically expect DAG combining to remove shuffles-of-splats and other similar patterns, but we don't get a chance to run the DAG combiner when we recursively form sub-shuffles during the lowering of a shuffle. So instead hand-roll a really important combine directly into the lowering code to detect shuffles-of-splats, especially shuffles of an all-zero splat which needn't even have the same element width, etc. This lets the new vector shuffle lowering handle shuffles which implement things like zero-extension really nicely. This will become even more important when I wire the legalization of zero-extension to vector shuffles with the new widening legalization strategy. llvm-svn: 212444 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 41 +++++++++++++++++++++++++ llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll | 17 ++++++++++ 2 files changed, 58 insertions(+) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b372950..61accea 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7914,6 +7914,47 @@ static SDValue lowerVectorShuffle(SDValue Op, const X86Subtarget *Subtarget, return DAG.getVectorShuffle(VT, dl, V1, V2, NewMask); } + // Check for a shuffle of a splat, and return just the splat. While DAG + // combining will do a similar transformation, this shows up with the + // internally created shuffles and so we handle it specially here as we won't + // have another chance to DAG-combine the generic shuffle instructions. + if (V2IsUndef) { + SDValue V = V1; + + // Look through any bitcasts. These can't change the size, just the number + // of elements which we check later. + while (V.getOpcode() == ISD::BITCAST) + V = V->getOperand(0); + + // A splat should always show up as a build vector node. + if (V.getOpcode() == ISD::BUILD_VECTOR) { + SDValue Base; + bool AllSame = true; + for (unsigned i = 0; i != V->getNumOperands(); ++i) + if (V->getOperand(i).getOpcode() != ISD::UNDEF) { + Base = V->getOperand(i); + break; + } + // Splat of , return + if (!Base) + return V1; + for (unsigned i = 0; i != V->getNumOperands(); ++i) + if (V->getOperand(i) != Base) { + AllSame = false; + break; + } + // Splat of , return , provided that the + // number of elements match or the value splatted is a zero constant. + if (AllSame) { + if (V.getValueType().getVectorNumElements() == (unsigned)NumElements) + return V1; + if (auto *C = dyn_cast(Base)) + if (C->isNullValue()) + return V1; + } + } + } + // For integer vector shuffles, try to collapse them into a shuffle of fewer // lanes but wider integers. We cap this to not form integers larger than i64 // but it might be interesting to form i128 integers to handle flipping the diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index f392a62..0c317c3 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -172,3 +172,20 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_31_30_29_28_11_10_09_08_23_22_21_20( %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle } + +define <16 x i8> @zext_to_v8i16_shuffle(<16 x i8> %a) { +; CHECK-SSE2-LABEL: @zext_to_v8i16_shuffle +; CHECK-SSE2: pxor %xmm1, %xmm1 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 + %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %shuffle +} + +define <16 x i8> @zext_to_v4i32_shuffle(<16 x i8> %a) { +; CHECK-SSE2-LABEL: @zext_to_v4i32_shuffle +; CHECK-SSE2: pxor %xmm1, %xmm1 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 +; CHECK-SSE2-NEXT: punpcklbw %xmm1, %xmm0 + %shuffle = shufflevector <16 x i8> %a, <16 x i8> zeroinitializer, <16 x i32> + ret <16 x i8> %shuffle +} -- 2.7.4