From e996cf7dce2c86d40fec263d82545dd363d9f445 Mon Sep 17 00:00:00 2001 From: Jay Foad Date: Thu, 7 Oct 2021 15:11:31 +0100 Subject: [PATCH] [AMDGPU] Preserve MachineDominatorTree in SILowerControlFlow Updating the MachineDominatorTree is easy since SILowerControlFlow only splits and removes basic blocks. This should save a bit of compile time because previously we would recompute the dominator tree from scratch after this pass. Another reason for doing this is that SILowerControlFlow preserves LiveIntervals which transitively requires MachineDominatorTree. I think that means that SILowerControlFlow is obliged to preserve MachineDominatorTree too as explained here: https://lists.llvm.org/pipermail/llvm-dev/2020-November/146923.html although it does not seem to have caused any problems in practice yet. Differential Revision: https://reviews.llvm.org/D111313 --- llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp | 14 ++++++++++++++ llvm/test/CodeGen/AMDGPU/llc-pipeline.ll | 4 ---- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp index 0f2836e..7461ac7 100644 --- a/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp +++ b/llvm/lib/Target/AMDGPU/SILowerControlFlow.cpp @@ -52,6 +52,7 @@ #include "MCTargetDesc/AMDGPUMCTargetDesc.h" #include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/LiveIntervals.h" +#include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" using namespace llvm; @@ -69,6 +70,7 @@ private: const SIRegisterInfo *TRI = nullptr; const SIInstrInfo *TII = nullptr; LiveIntervals *LIS = nullptr; + MachineDominatorTree *MDT = nullptr; MachineRegisterInfo *MRI = nullptr; SetVector LoweredEndCf; DenseSet LoweredIf; @@ -141,6 +143,7 @@ public: void getAnalysisUsage(AnalysisUsage &AU) const override { // Should preserve the same set that TwoAddressInstructions does. + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); AU.addPreservedID(LiveVariablesID); @@ -471,6 +474,14 @@ MachineBasicBlock *SILowerControlFlow::emitEndCf(MachineInstr &MI) { MachineBasicBlock *SplitBB = &MBB; if (NeedBlockSplit) { SplitBB = MBB.splitAt(MI, /*UpdateLiveIns*/true, LIS); + if (MDT && SplitBB != &MBB) { + MachineDomTreeNode *MBBNode = (*MDT)[&MBB]; + SmallVector Children(MBBNode->begin(), + MBBNode->end()); + MachineDomTreeNode *SplitBBNode = MDT->addNewBlock(SplitBB, &MBB); + for (MachineDomTreeNode *Child : Children) + MDT->changeImmediateDominator(Child, SplitBBNode); + } Opcode = OrTermrOpc; InsPt = MI; } @@ -757,6 +768,8 @@ bool SILowerControlFlow::removeMBBifRedundant(MachineBasicBlock &MBB) { for (auto &I : MBB.instrs()) LIS->RemoveMachineInstrFromMaps(I); } + if (MDT) + MDT->eraseNode(&MBB); MBB.clear(); MBB.eraseFromParent(); if (FallThrough && !FallThrough->isLayoutSuccessor(Succ)) { @@ -780,6 +793,7 @@ bool SILowerControlFlow::runOnMachineFunction(MachineFunction &MF) { // This doesn't actually need LiveIntervals, but we can preserve them. LIS = getAnalysisIfAvailable(); + MDT = getAnalysisIfAvailable(); MRI = &MF.getRegInfo(); BoolRC = TRI->getBoolRC(); diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 3e19a86..d137f8a 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -316,7 +316,6 @@ ; GCN-O1-NEXT: Eliminate PHI nodes for register allocation ; GCN-O1-NEXT: SI Lower control flow pseudo instructions ; GCN-O1-NEXT: Two-Address instruction pass -; GCN-O1-NEXT: MachineDominator Tree Construction ; GCN-O1-NEXT: Slot index numbering ; GCN-O1-NEXT: Live Interval Analysis ; GCN-O1-NEXT: Machine Natural Loop Construction @@ -600,7 +599,6 @@ ; GCN-O1-OPTS-NEXT: Eliminate PHI nodes for register allocation ; GCN-O1-OPTS-NEXT: SI Lower control flow pseudo instructions ; GCN-O1-OPTS-NEXT: Two-Address instruction pass -; GCN-O1-OPTS-NEXT: MachineDominator Tree Construction ; GCN-O1-OPTS-NEXT: Slot index numbering ; GCN-O1-OPTS-NEXT: Live Interval Analysis ; GCN-O1-OPTS-NEXT: Machine Natural Loop Construction @@ -886,7 +884,6 @@ ; GCN-O2-NEXT: Eliminate PHI nodes for register allocation ; GCN-O2-NEXT: SI Lower control flow pseudo instructions ; GCN-O2-NEXT: Two-Address instruction pass -; GCN-O2-NEXT: MachineDominator Tree Construction ; GCN-O2-NEXT: Slot index numbering ; GCN-O2-NEXT: Live Interval Analysis ; GCN-O2-NEXT: Machine Natural Loop Construction @@ -1186,7 +1183,6 @@ ; GCN-O3-NEXT: Eliminate PHI nodes for register allocation ; GCN-O3-NEXT: SI Lower control flow pseudo instructions ; GCN-O3-NEXT: Two-Address instruction pass -; GCN-O3-NEXT: MachineDominator Tree Construction ; GCN-O3-NEXT: Slot index numbering ; GCN-O3-NEXT: Live Interval Analysis ; GCN-O3-NEXT: Machine Natural Loop Construction -- 2.7.4