From 37a18821cd8b0df5ac877659d2d397f0e268407c Mon Sep 17 00:00:00 2001 From: Chandler Carruth Date: Mon, 4 Aug 2014 01:14:24 +0000 Subject: [PATCH] [x86] Handle single input shuffles in the SSSE3 case more intelligently. I spent some time looking into a better or more principled way to handle this. For example, by detecting arbitrary "unneeded" ORs... But really, there wasn't any point. We just shouldn't build blatantly wrong code so late in the pipeline rather than adding more stages and logic later on to fix it. Avoiding this is just too simple. llvm-svn: 214680 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++++ llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll | 4 ---- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 5ae752c..94c19fb 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -7931,6 +7931,10 @@ static SDValue lowerV16I8VectorShuffle(SDValue Op, SDValue V1, SDValue V2, } V1 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V1, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V1Mask)); + if (isSingleInputShuffleMask(Mask)) + return V1; // Single inputs are easy. + + // Otherwise, blend the two. V2 = DAG.getNode(X86ISD::PSHUFB, DL, MVT::v16i8, V2, DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v16i8, V2Mask)); return DAG.getNode(ISD::OR, DL, MVT::v16i8, V1, V2); diff --git a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll index 49620e9..693a276 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-128-v16.ll @@ -179,9 +179,7 @@ define <16 x i8> @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12( ; ; SSSE3-LABEL: @shuffle_v16i8_03_02_01_00_07_06_05_04_11_10_09_08_15_14_13_12 ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12] -; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> ret <16 x i8> %shuffle @@ -275,9 +273,7 @@ define <16 x i8> @trunc_v4i32_shuffle(<16 x i8> %a) { ; ; SSSE3-LABEL: @trunc_v4i32_shuffle ; SSSE3: # BB#0: -; SSSE3-NEXT: pshufb {{.*}} # xmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero ; SSSE3-NEXT: pshufb {{.*}} # xmm0 = xmm0[0,4,8,12],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero -; SSSE3-NEXT: por %xmm1, %xmm0 ; SSSE3-NEXT: retq %shuffle = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> ret <16 x i8> %shuffle -- 2.7.4