From ecb04335996898caaa3f87cde5d797bde8da2a4d Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Sat, 20 Feb 2016 14:39:45 +0000 Subject: [PATCH] [X86][SSE] Fixed issue with commutation of 'faux unary' target shuffles (PR26667) Fixed a bug introduced by D16683 when a binary shuffle is simplified to a unary shuffle (with undef/zero sentinel mask indices) - if this resulted in only the second input being used combineX86ShuffleChain failed to take this into account and still referenced the first input. llvm-svn: 261434 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 9 ++++----- llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll | 2 ++ 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 49ded80..3d63d01 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -23506,15 +23506,15 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG, /// into either a single instruction if there is a special purpose instruction /// for this operation, or into a PSHUFB instruction which is a fully general /// instruction but should only be used to replace chains over a certain depth. -static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef Mask, - int Depth, bool HasPSHUFB, SelectionDAG &DAG, +static bool combineX86ShuffleChain(SDValue Input, SDValue Root, + ArrayRef Mask, int Depth, + bool HasPSHUFB, SelectionDAG &DAG, TargetLowering::DAGCombinerInfo &DCI, const X86Subtarget &Subtarget) { assert(!Mask.empty() && "Cannot combine an empty shuffle mask!"); // Find the operand that enters the chain. Note that multiple uses are OK // here, we're not going to remove the operand we find. - SDValue Input = Op.getOperand(0); while (Input.getOpcode() == ISD::BITCAST) Input = Input.getOperand(0); @@ -23814,7 +23814,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, DAG, DCI, Subtarget)) return true; - // Minor canonicalization of the accumulated shuffle mask to make it easier // to match below. All this does is detect masks with sequential pairs of // elements, and shrink them to the half-width mask. It does this in a loop @@ -23826,7 +23825,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root, WidenedMask.clear(); } - return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI, + return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasPSHUFB, DAG, DCI, Subtarget); } diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll index 61e2e4c..4efec2c 100644 --- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll +++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll @@ -391,10 +391,12 @@ declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) { ; X32-LABEL: test_mm256_permute2f128_ps: ; X32: # BB#0: +; X32-NEXT: vmovaps %ymm1, %ymm0 ; X32-NEXT: retl ; ; X64-LABEL: test_mm256_permute2f128_ps: ; X64: # BB#0: +; X64-NEXT: vmovaps %ymm1, %ymm0 ; X64-NEXT: retq %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 50) ret <8 x float> %res -- 2.7.4