From cc316f013a72e49ec97ad7f50164be88f611b941 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 16 Aug 2016 12:52:06 +0000 Subject: [PATCH] [X86][SSE] Add support for combining v2f64 target shuffles to VZEXT_MOVL byte rotations The combine was only matching v2i64 as it assumed lowering to MOVQ - but we have v2f64 patterns that match in a similar fashion llvm-svn: 278794 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++--- llvm/test/CodeGen/X86/vector-shuffle-combining.ll | 6 +----- 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 04b068f..5014680 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -24884,11 +24884,11 @@ static bool matchUnaryVectorShuffle(MVT MaskVT, ArrayRef Mask, bool FloatDomain = MaskVT.isFloatingPoint() || (!Subtarget.hasAVX2() && MaskVT.is256BitVector()); - // Match a 128-bit integer vector against a VZEXT_MOVL (MOVQ) instruction. - if (!FloatDomain && MaskVT.is128BitVector() && + // Match a 128-bit vector against a VZEXT_MOVL instruction. + if (MaskVT.is128BitVector() && Subtarget.hasSSE2() && isTargetShuffleEquivalent(Mask, {0, SM_SentinelZero})) { Shuffle = X86ISD::VZEXT_MOVL; - ShuffleVT = MVT::v2i64; + ShuffleVT = MaskVT; return true; } diff --git a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll index 128ead3..6e8fc5e 100644 --- a/llvm/test/CodeGen/X86/vector-shuffle-combining.ll +++ b/llvm/test/CodeGen/X86/vector-shuffle-combining.ll @@ -2837,16 +2837,12 @@ define void @combine_scalar_load_with_blend_with_zero(double* %a0, <4 x float>* ; SSE41-LABEL: combine_scalar_load_with_blend_with_zero: ; SSE41: # BB#0: ; SSE41-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero -; SSE41-NEXT: xorpd %xmm1, %xmm1 -; SSE41-NEXT: blendpd {{.*#+}} xmm1 = xmm0[0],xmm1[1] -; SSE41-NEXT: movapd %xmm1, (%rsi) +; SSE41-NEXT: movapd %xmm0, (%rsi) ; SSE41-NEXT: retq ; ; AVX-LABEL: combine_scalar_load_with_blend_with_zero: ; AVX: # BB#0: ; AVX-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero -; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1] ; AVX-NEXT: vmovapd %xmm0, (%rsi) ; AVX-NEXT: retq %1 = load double, double* %a0, align 8 -- 2.7.4