From 8f997b4f0118a8f9cd5c1071b61bfbc35a7e7e85 Mon Sep 17 00:00:00 2001 From: Philip Reames Date: Wed, 11 Mar 2020 11:12:28 -0700 Subject: [PATCH] [GC] Loosen ordering on statepoint reloads to allow CSE We just removed a broken duplicate elimination algorithm in D75964, and after landed that it occurred to me that duplicate elimination is simply CSE. SelectionDAG has a build in CSE, so why wasn't that triggering? Well, it turns out we were overly conservative in the memory states for our reloads and CSE (rightly) considers the incoming memory state for a load part of the identity of the load. By loosening the chain and allowing reordering, we also allow CSE. As shown in the test case, doing iterative CSE as we go is enough to eliminate duplicate stores in later statepoints as well. We key our (block local) slot map by SDValue, so commoning a previous pair of loads at construction time means we also common following stores. Differential Revision: https://reviews.llvm.org/D76013 --- llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp | 14 ++++++++------ llvm/test/CodeGen/X86/statepoint-duplicates-export.ll | 13 +++++-------- 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp index 8301489..40033db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/StatepointLowering.cpp @@ -973,10 +973,13 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { unsigned Index = *DerivedPtrLocation; SDValue SpillSlot = DAG.getTargetFrameIndex(Index, getFrameIndexTy()); - // Note: We know all of these reloads are independent, but don't bother to - // exploit that chain wise. DAGCombine will happily do so as needed, so - // doing it here would be a small compile time win at most. - SDValue Chain = getRoot(); + // All the reloads are independent and are reading memory only modified by + // statepoints (i.e. no other aliasing stores); informing SelectionDAG of + // this this let's CSE kick in for free and allows reordering of instructions + // if possible. The lowering for statepoint sets the root, so this is + // ordering all reloads with the either a) the statepoint node itself, or b) + // the entry of the current block for an invoke statepoint. + const SDValue Chain = DAG.getRoot(); // != Builder.getRoot() auto &MF = DAG.getMachineFunction(); auto &MFI = MF.getFrameInfo(); @@ -991,8 +994,7 @@ void SelectionDAGBuilder::visitGCRelocate(const GCRelocateInst &Relocate) { SDValue SpillLoad = DAG.getLoad(LoadVT, getCurSDLoc(), Chain, SpillSlot, LoadMMO); - - DAG.setRoot(SpillLoad.getValue(1)); + PendingLoads.push_back(SpillLoad.getValue(1)); assert(SpillLoad.getNode()); setValue(&Relocate, SpillLoad); diff --git a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll index ccc0d61..f61604b 100644 --- a/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll +++ b/llvm/test/CodeGen/X86/statepoint-duplicates-export.ll @@ -44,19 +44,16 @@ next: define i1 @test2(i32 addrspace(1)* %arg) gc "statepoint-example" { ; CHECK-LABEL: test2: ; CHECK: # %bb.0: # %entry -; CHECK-NEXT: subq $24, %rsp -; CHECK-NEXT: .cfi_def_cfa_offset 32 -; CHECK-NEXT: movq %rdi, {{[0-9]+}}(%rsp) +; CHECK-NEXT: pushq %rax +; CHECK-NEXT: .cfi_def_cfa_offset 16 +; CHECK-NEXT: movq %rdi, (%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp2: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: movq %rax, {{[0-9]+}}(%rsp) ; CHECK-NEXT: callq func ; CHECK-NEXT: .Ltmp3: -; CHECK-NEXT: movq {{[0-9]+}}(%rsp), %rax -; CHECK-NEXT: orq {{[0-9]+}}(%rsp), %rax +; CHECK-NEXT: cmpq $0, (%rsp) ; CHECK-NEXT: sete %al -; CHECK-NEXT: addq $24, %rsp +; CHECK-NEXT: popq %rcx ; CHECK-NEXT: .cfi_def_cfa_offset 8 ; CHECK-NEXT: retq entry: -- 2.7.4