From 822d2e35e7729ea844580152ff635262d2cf44e9 Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Tue, 5 Feb 2019 17:02:49 +0000 Subject: [PATCH] [X86][AVX] Attempt to combine shuffles to subvector broadcast load llvm-svn: 353189 --- llvm/lib/Target/X86/X86ISelLowering.cpp | 18 ++++++++++++++++++ llvm/test/CodeGen/X86/subvector-broadcast.ll | 6 ++---- 2 files changed, 20 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index b55db2b..2cfc931 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -31070,6 +31070,24 @@ static SDValue combineX86ShuffleChain(ArrayRef Inputs, SDValue Root, bool IsEVEXShuffle = RootSizeInBits == 512 || (Subtarget.hasVLX() && RootSizeInBits >= 128); + // Attempt to match a subvector broadcast. + // shuffle(insert_subvector(undef, sub, 0), undef, 0, 0, 0, 0) + if (UnaryShuffle && + (BaseMaskEltSizeInBits == 128 || BaseMaskEltSizeInBits == 256)) { + SmallVector BroadcastMask(NumBaseMaskElts, 0); + if (isTargetShuffleEquivalent(BaseMask, BroadcastMask)) { + SDValue Src = Inputs[0]; + if (Src.getOpcode() == ISD::INSERT_SUBVECTOR && + Src.getOperand(0).isUndef() && + Src.getOperand(1).getValueSizeInBits() == BaseMaskEltSizeInBits && + MayFoldLoad(Src.getOperand(1)) && isNullConstant(Src.getOperand(2))) { + return DAG.getBitcast(RootVT, DAG.getNode(X86ISD::SUBV_BROADCAST, DL, + Src.getValueType(), + Src.getOperand(1))); + } + } + } + // TODO - handle 128/256-bit lane shuffles of 512-bit vectors. // Handle 128-bit lane shuffles of 256-bit vectors. diff --git a/llvm/test/CodeGen/X86/subvector-broadcast.ll b/llvm/test/CodeGen/X86/subvector-broadcast.ll index c2aeb07..0e66f60 100644 --- a/llvm/test/CodeGen/X86/subvector-broadcast.ll +++ b/llvm/test/CodeGen/X86/subvector-broadcast.ll @@ -1726,8 +1726,7 @@ define <8 x double> @broadcast_v8f64_v2f64_u1u10101(<2 x double>* %vp) { ; X32-AVX512-LABEL: broadcast_v8f64_v2f64_u1u10101: ; X32-AVX512: # %bb.0: ; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax -; X32-AVX512-NEXT: vmovapd (%eax), %xmm0 -; X32-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X32-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; X32-AVX512-NEXT: retl ; ; X64-AVX1-LABEL: broadcast_v8f64_v2f64_u1u10101: @@ -1745,8 +1744,7 @@ define <8 x double> @broadcast_v8f64_v2f64_u1u10101(<2 x double>* %vp) { ; ; X64-AVX512-LABEL: broadcast_v8f64_v2f64_u1u10101: ; X64-AVX512: # %bb.0: -; X64-AVX512-NEXT: vmovapd (%rdi), %xmm0 -; X64-AVX512-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,0,1,0,1] +; X64-AVX512-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3] ; X64-AVX512-NEXT: retq %vec = load <2 x double>, <2 x double>* %vp %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> -- 2.7.4