/// into either a single instruction if there is a special purpose instruction
/// for this operation, or into a PSHUFB instruction which is a fully general
/// instruction but should only be used to replace chains over a certain depth.
-static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
- int Depth, bool HasPSHUFB, SelectionDAG &DAG,
+static bool combineX86ShuffleChain(SDValue Input, SDValue Root,
+ ArrayRef<int> Mask, int Depth,
+ bool HasPSHUFB, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI,
const X86Subtarget &Subtarget) {
assert(!Mask.empty() && "Cannot combine an empty shuffle mask!");
// Find the operand that enters the chain. Note that multiple uses are OK
// here, we're not going to remove the operand we find.
- SDValue Input = Op.getOperand(0);
while (Input.getOpcode() == ISD::BITCAST)
Input = Input.getOperand(0);
DAG, DCI, Subtarget))
return true;
-
// Minor canonicalization of the accumulated shuffle mask to make it easier
// to match below. All this does is detect masks with sequential pairs of
// elements, and shrink them to the half-width mask. It does this in a loop
WidenedMask.clear();
}
- return combineX86ShuffleChain(Op, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
+ return combineX86ShuffleChain(Input0, Root, Mask, Depth, HasPSHUFB, DAG, DCI,
Subtarget);
}
define <8 x float> @test_mm256_permute2f128_ps(<8 x float> %a0, <8 x float> %a1) {
; X32-LABEL: test_mm256_permute2f128_ps:
; X32: # BB#0:
+; X32-NEXT: vmovaps %ymm1, %ymm0
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_permute2f128_ps:
; X64: # BB#0:
+; X64-NEXT: vmovaps %ymm1, %ymm0
; X64-NEXT: retq
%res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 50)
ret <8 x float> %res