[X86][SSE] Fixed issue with commutation of 'faux unary' target shuffles (PR26667)

author Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 20 Feb 2016 14:39:45 +0000 (14:39 +0000)

committer Simon Pilgrim <llvm-dev@redking.me.uk>

Sat, 20 Feb 2016 14:39:45 +0000 (14:39 +0000)
author Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 20 Feb 2016 14:39:45 +0000 (14:39 +0000)
committer Simon Pilgrim <llvm-dev@redking.me.uk>
Sat, 20 Feb 2016 14:39:45 +0000 (14:39 +0000)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp

index 49ded80..3d63d01 100644 (file)
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23506,15 +23506,15 @@ static SDValue PerformShuffleCombine256(SDNode *N, SelectionDAG &DAG,
  /// into either a single instruction if there is a special purpose instruction
  /// for this operation, or into a PSHUFB instruction which is a fully general
  /// instruction but should only be used to replace chains over a certain depth.
-static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
-                                   int Depth, bool HasPSHUFB, SelectionDAG &DAG,
+static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
+                                   ArrayRef<int> Mask, int Depth,
+                                   bool HasPSHUFB, SelectionDAG &DAG,
                                     TargetLowering::DAGCombinerInfo &DCI,
                                     const X86Subtarget &Subtarget) {
    assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
  
    // Find the operand that enters the chain. Note that multiple uses are OK
    // here, we're not going to remove the operand we find.
-  SDValue Input = Op.getOperand(0);
    while (Input.getOpcode() == ISD::BITCAST)
      Input = Input.getOperand(0);
  
@@ -23814,7 +23814,6 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
                                      DAG, DCI, Subtarget))
      return true;
  
-
    // Minor canonicalization of the accumulated shuffle mask to make it easier
    // to match below. All this does is detect masks with sequential pairs of
    // elements, and shrink them to the half-width mask. It does this in a loop
@@ -23826,7 +23825,7 @@ static bool combineX86ShufflesRecursively(SDValue Op, SDValue Root,
      WidenedMask.clear();
    }
  
-  return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
+  return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
                                  Subtarget);
  }
  
diff --git a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll

index 61e2e4c..4efec2c 100644 (file)
--- a/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
+++ b/llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll
@@ -391,10 +391,12 @@ declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>,
  define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) {
  ; X32-LABEL: test_mm256_permute2f128_ps:
  ; X32:       # BB#0:
+; X32-NEXT:    vmovaps %ymm1, %ymm0
  ; X32-NEXT:    retl
  ;
  ; X64-LABEL: test_mm256_permute2f128_ps:
  ; X64:       # BB#0:
+; X64-NEXT:    vmovaps %ymm1, %ymm0
  ; X64-NEXT:    retq
    %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 50)
    ret <8 x float> %res
author	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 20 Feb 2016 14:39:45 +0000 (14:39 +0000)
committer	Simon Pilgrim <llvm-dev@redking.me.uk>
	Sat, 20 Feb 2016 14:39:45 +0000 (14:39 +0000)
llvm/lib/Target/X86/X86ISelLowering.cpp		patch \| blob \| history
llvm/test/CodeGen/X86/avx-intrinsics-fast-isel.ll		patch \| blob \| history