From 68aeaab888094fa192bf9257ffd5929b93778a70 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Mon, 2 Mar 2020 22:28:43 -0800 Subject: [PATCH] [X86] Don't count the chain uses when forming broadcast loads in lowerBuildVectorAsBroadcast. The build_vector needs to be the only user of the data, but the chain will likely have another use. So we can't make sure the build_vector is the only user of the node. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 10 +++++++--- llvm/test/CodeGen/X86/pr35443.ll | 3 +-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 4c3fd0d..cef768e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -8621,10 +8621,12 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, bool ConstSplatVal = (Ld.getOpcode() == ISD::Constant || Ld.getOpcode() == ISD::ConstantFP); + bool IsLoad = ISD::isNormalLoad(Ld.getNode()); // Make sure that all of the users of a non-constant load are from the // BUILD_VECTOR node. - if (!ConstSplatVal && !BVOp->isOnlyUserOf(Ld.getNode())) + // FIXME: Is the use count needed for non-constant, non-load case? + if (!ConstSplatVal && !IsLoad && !BVOp->isOnlyUserOf(Ld.getNode())) return SDValue(); unsigned ScalarSize = Ld.getValueSizeInBits(); @@ -8674,8 +8676,6 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, } } - bool IsLoad = ISD::isNormalLoad(Ld.getNode()); - // Handle AVX2 in-register broadcasts. if (!IsLoad && Subtarget.hasInt256() && (ScalarSize == 32 || (IsGE256 && ScalarSize == 64))) @@ -8685,6 +8685,10 @@ static SDValue lowerBuildVectorAsBroadcast(BuildVectorSDNode *BVOp, if (!IsLoad) return SDValue(); + // Make sure the non-chain result is only used by this build vector. + if (!Ld->hasNUsesOfValue(NumElts - NumUndefElts, 0)) + return SDValue(); + if (ScalarSize == 32 || (IsGE256 && ScalarSize == 64) || (Subtarget.hasVLX() && ScalarSize == 64)) return DAG.getNode(X86ISD::VBROADCAST, dl, VT, Ld); diff --git a/llvm/test/CodeGen/X86/pr35443.ll b/llvm/test/CodeGen/X86/pr35443.ll index a42a14b..01001b0 100644 --- a/llvm/test/CodeGen/X86/pr35443.ll +++ b/llvm/test/CodeGen/X86/pr35443.ll @@ -8,8 +8,7 @@ define void @pr35443() { ; CHECK-LABEL: pr35443: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: movzbl ac+{{.*}}(%rip), %eax -; CHECK-NEXT: vmovd %eax, %xmm0 +; CHECK-NEXT: vpbroadcastb ac+{{.*}}(%rip), %xmm0 ; CHECK-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero ; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1 ; CHECK-NEXT: vpsubq %ymm0, %ymm1, %ymm0 -- 2.7.4