From 50b35caf30513174640493c72047d9f9bdc4d985 Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sun, 2 Jun 2019 22:52:38 +0000 Subject: [PATCH] [DAGCombiner][X86] Fold away masked store and scatter with all zeroes mask. Similar to what was done for masked load and gather. llvm-svn: 362342 --- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 29 +++++++++++++-------- llvm/test/CodeGen/X86/masked_gather_scatter.ll | 35 +++----------------------- llvm/test/CodeGen/X86/masked_store.ll | 22 +++------------- 3 files changed, 24 insertions(+), 62 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index a866dbb..5da66eb 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -8006,14 +8006,19 @@ static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { } SDValue DAGCombiner::visitMSCATTER(SDNode *N) { - if (Level >= AfterLegalizeTypes) - return SDValue(); - MaskedScatterSDNode *MSC = cast(N); SDValue Mask = MSC->getMask(); - SDValue Data = MSC->getValue(); + SDValue Data = MSC->getValue(); + SDValue Chain = MSC->getChain(); SDLoc DL(N); + // Zap scatters with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Chain; + + if (Level >= AfterLegalizeTypes) + return SDValue(); + // If the MSCATTER data type requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -8031,8 +8036,6 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { EVT LoVT, HiVT; std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0)); - SDValue Chain = MSC->getChain(); - EVT MemoryVT = MSC->getMemoryVT(); unsigned Alignment = MSC->getOriginalAlignment(); @@ -8065,15 +8068,20 @@ SDValue DAGCombiner::visitMSCATTER(SDNode *N) { } SDValue DAGCombiner::visitMSTORE(SDNode *N) { - if (Level >= AfterLegalizeTypes) - return SDValue(); - MaskedStoreSDNode *MST = cast(N); SDValue Mask = MST->getMask(); - SDValue Data = MST->getValue(); + SDValue Data = MST->getValue(); + SDValue Chain = MST->getChain(); EVT VT = Data.getValueType(); SDLoc DL(N); + // Zap masked stores with a zero mask. + if (ISD::isBuildVectorAllZeros(Mask.getNode())) + return Chain; + + if (Level >= AfterLegalizeTypes) + return SDValue(); + // If the MSTORE data type requires splitting and the mask is provided by a // SETCC, then split both nodes and its operands before legalization. This // prevents the type legalizer from unrolling SETCC into scalar comparisons @@ -8087,7 +8095,6 @@ SDValue DAGCombiner::visitMSTORE(SDNode *N) { SDValue MaskLo, MaskHi, Lo, Hi; std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); - SDValue Chain = MST->getChain(); SDValue Ptr = MST->getBasePtr(); EVT MemoryVT = MST->getMemoryVT(); diff --git a/llvm/test/CodeGen/X86/masked_gather_scatter.ll b/llvm/test/CodeGen/X86/masked_gather_scatter.ll index 2d6b19b..2c4294e 100644 --- a/llvm/test/CodeGen/X86/masked_gather_scatter.ll +++ b/llvm/test/CodeGen/X86/masked_gather_scatter.ll @@ -2966,38 +2966,9 @@ define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %f } define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) { -; KNL_64-LABEL: zero_mask: -; KNL_64: # %bb.0: -; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 -; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL_64-NEXT: kxorw %k0, %k0, %k1 -; KNL_64-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} -; KNL_64-NEXT: vzeroupper -; KNL_64-NEXT: retq -; -; KNL_32-LABEL: zero_mask: -; KNL_32: # %bb.0: -; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; KNL_32-NEXT: vpsllq $32, %xmm1, %xmm1 -; KNL_32-NEXT: vpsraq $32, %zmm1, %zmm1 -; KNL_32-NEXT: kxorw %k0, %k0, %k1 -; KNL_32-NEXT: vscatterqpd %zmm0, (,%zmm1) {%k1} -; KNL_32-NEXT: vzeroupper -; KNL_32-NEXT: retl -; -; SKX-LABEL: zero_mask: -; SKX: # %bb.0: -; SKX-NEXT: kxorw %k0, %k0, %k1 -; SKX-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} -; SKX-NEXT: retq -; -; SKX_32-LABEL: zero_mask: -; SKX_32: # %bb.0: -; SKX_32-NEXT: vpsllq $32, %xmm1, %xmm1 -; SKX_32-NEXT: vpsraq $32, %xmm1, %xmm1 -; SKX_32-NEXT: kxorw %k0, %k0, %k1 -; SKX_32-NEXT: vscatterqpd %xmm0, (,%xmm1) {%k1} -; SKX_32-NEXT: retl +; ALL-LABEL: zero_mask: +; ALL: # %bb.0: +; ALL-NEXT: ret{{[l|q]}} call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer) ret void } diff --git a/llvm/test/CodeGen/X86/masked_store.ll b/llvm/test/CodeGen/X86/masked_store.ll index 180197c..984b995 100644 --- a/llvm/test/CodeGen/X86/masked_store.ll +++ b/llvm/test/CodeGen/X86/masked_store.ll @@ -5510,25 +5510,9 @@ define void @zero_mask(<2 x double>* %addr, <2 x double> %val) { ; SSE: ## %bb.0: ; SSE-NEXT: retq ; -; AVX1OR2-LABEL: zero_mask: -; AVX1OR2: ## %bb.0: -; AVX1OR2-NEXT: vxorpd %xmm1, %xmm1, %xmm1 -; AVX1OR2-NEXT: vmaskmovpd %xmm0, %xmm1, (%rdi) -; AVX1OR2-NEXT: retq -; -; AVX512F-LABEL: zero_mask: -; AVX512F: ## %bb.0: -; AVX512F-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: kxorw %k0, %k0, %k1 -; AVX512F-NEXT: vmovupd %zmm0, (%rdi) {%k1} -; AVX512F-NEXT: vzeroupper -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: zero_mask: -; AVX512VL: ## %bb.0: -; AVX512VL-NEXT: kxorw %k0, %k0, %k1 -; AVX512VL-NEXT: vmovupd %xmm0, (%rdi) {%k1} -; AVX512VL-NEXT: retq +; AVX-LABEL: zero_mask: +; AVX: ## %bb.0: +; AVX-NEXT: retq call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %addr, i32 4, <2 x i1> zeroinitializer) ret void } -- 2.7.4