From 1e63dd0b44998721fefae9f690882af927ca3c2a Mon Sep 17 00:00:00 2001 From: Sanjay Patel Date: Tue, 4 Jun 2019 15:15:59 +0000 Subject: [PATCH] [SelectionDAG][x86] limit post-legalization store merging by type The proposal in D62498 showed that x86 would benefit from vector store splitting, but that may conflict with the generic DAG combiner's store merging transforms. Add memory type to the existing TLI hook that enables the merging transforms, so we can limit those changes to scalars only for x86. llvm-svn: 362507 --- llvm/include/llvm/CodeGen/TargetLowering.h | 10 ++++++---- llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2 +- llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h | 2 +- llvm/lib/Target/X86/X86ISelLowering.h | 6 +++++- llvm/test/CodeGen/X86/vector-trunc-widen.ll | 4 ++-- 5 files changed, 15 insertions(+), 9 deletions(-) diff --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h index d00cc16..ad17fd8 100644 --- a/llvm/include/llvm/CodeGen/TargetLowering.h +++ b/llvm/include/llvm/CodeGen/TargetLowering.h @@ -435,10 +435,12 @@ public: return false; } - /// Allow store merging after legalization in addition to before legalization. - /// This may catch stores that do not exist earlier (eg, stores created from - /// intrinsics). - virtual bool mergeStoresAfterLegalization() const { return true; } + /// Allow store merging for the specified type after legalization in addition + /// to before legalization. This may transform stores that do not exist + /// earlier (for example, stores created from intrinsics). + virtual bool mergeStoresAfterLegalization(EVT MemVT) const { + return true; + } /// Returns if it's reasonable to merge stores to MemVT size. virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 1d1699c..33ef68c 100644 --- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -16085,7 +16085,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Always perform this optimization before types are legal. If the target // prefers, also try this after legalization to catch stores that were created // by intrinsics or other nodes. - if (!LegalTypes || (TLI.mergeStoresAfterLegalization())) { + if (!LegalTypes || (TLI.mergeStoresAfterLegalization(ST->getMemoryVT()))) { while (true) { // There can be multiple store sequences on the same chain. // Keep trying to merge store sequences until we are unable to do so diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h index 74d5d80..a17f5da 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h +++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.h @@ -234,7 +234,7 @@ public: // MergeConsecutiveStores() merges two stores; LegalizeStoreOps() un-merges; // MergeConsecutiveStores() re-merges, etc. ) to warrant turning it off for // now. - bool mergeStoresAfterLegalization() const override { return false; } + bool mergeStoresAfterLegalization(EVT) const override { return false; } bool isFsqrtCheap(SDValue Operand, SelectionDAG &DAG) const override { return true; diff --git a/llvm/lib/Target/X86/X86ISelLowering.h b/llvm/lib/Target/X86/X86ISelLowering.h index 7eed866..42b5b06 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.h +++ b/llvm/lib/Target/X86/X86ISelLowering.h @@ -799,7 +799,11 @@ namespace llvm { /// This method returns the name of a target specific DAG node. const char *getTargetNodeName(unsigned Opcode) const override; - bool mergeStoresAfterLegalization() const override { return true; } + /// Do not merge vector stores after legalization because that may conflict + /// with x86-specific store splitting optimizations. + bool mergeStoresAfterLegalization(EVT MemVT) const override { + return !MemVT.isVector(); + } bool canMergeStoresTo(unsigned AddressSpace, EVT MemVT, const SelectionDAG &DAG) const override; diff --git a/llvm/test/CodeGen/X86/vector-trunc-widen.ll b/llvm/test/CodeGen/X86/vector-trunc-widen.ll index 1eff810..54ebdbe 100644 --- a/llvm/test/CodeGen/X86/vector-trunc-widen.ll +++ b/llvm/test/CodeGen/X86/vector-trunc-widen.ll @@ -2076,8 +2076,8 @@ define void @store_merge_split(<8 x i32> %w1, <8 x i32> %w2, i64 %idx, <8 x i16> ; AVX2-NEXT: vpshufb %ymm2, %ymm1, %ymm1 ; AVX2-NEXT: vpermq {{.*#+}} ymm1 = ymm1[0,2,2,3] ; AVX2-NEXT: shlq $4, %rdi -; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 -; AVX2-NEXT: vmovdqu %ymm0, (%rsi,%rdi) +; AVX2-NEXT: vmovdqu %xmm0, (%rsi,%rdi) +; AVX2-NEXT: vmovdqu %xmm1, 16(%rsi,%rdi) ; AVX2-NEXT: vzeroupper ; AVX2-NEXT: retq ; -- 2.7.4