From 2d5e281b0fa0b5babcd48e47d64e94224aea4a1f Mon Sep 17 00:00:00 2001 From: Simon Pilgrim Date: Mon, 27 Jan 2020 16:43:59 +0000 Subject: [PATCH] [X86][AVX] Add a more aggressive SimplifyMultipleUseDemandedBits to simplify masked store masks. Fixes a poor codegen issue noticed in PR11210. --- llvm/lib/Target/X86/X86ISelLowering.cpp | 11 +++++++++-- llvm/test/CodeGen/X86/masked_store.ll | 2 -- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 26a11dc..153a22e 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -41716,9 +41716,16 @@ static SDValue combineMaskedStore(SDNode *N, SelectionDAG &DAG, // simplify ops leading up to it. We only demand the MSB of each lane. SDValue Mask = Mst->getMask(); if (Mask.getScalarValueSizeInBits() != 1) { - APInt DemandedMask(APInt::getSignMask(VT.getScalarSizeInBits())); - if (TLI.SimplifyDemandedBits(Mask, DemandedMask, DCI)) + APInt DemandedBits(APInt::getSignMask(VT.getScalarSizeInBits())); + if (TLI.SimplifyDemandedBits(Mask, DemandedBits, DCI)) return SDValue(N, 0); + APInt DemandedElts = APInt::getAllOnesValue(VT.getVectorNumElements()); + if (SDValue NewMask = TLI.SimplifyMultipleUseDemandedBits( + Mask, DemandedBits, DemandedElts, DAG, 0)) + return DAG.getMaskedStore(Mst->getChain(), SDLoc(N), Mst->getValue(), + Mst->getBasePtr(), Mst->getOffset(), NewMask, + Mst->getMemoryVT(), Mst->getMemOperand(), + Mst->getAddressingMode()); } SDValue Value = Mst->getValue(); diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index efb061f..dc78006 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -5126,8 +5126,6 @@ define void @PR11210(<4 x float> %x, <4 x float>* %ptr, <4 x float> %y, <2 x i64 ; ; AVX1OR2-LABEL: PR11210: ; AVX1OR2: ## %bb.0: -; AVX1OR2-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; AVX1OR2-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2 ; AVX1OR2-NEXT: vmaskmovps %xmm0, %xmm2, (%rdi) ; AVX1OR2-NEXT: vmaskmovps %xmm1, %xmm2, (%rdi) ; AVX1OR2-NEXT: retq -- 2.7.4