From 349b5943b475cf87d6dbf8a190e33814a0d8199c Mon Sep 17 00:00:00 2001 From: Daniil Fukalov Date: Tue, 25 Sep 2018 18:37:38 +0000 Subject: [PATCH] [RegAllocGreedy] avoid using physreg candidates that cannot be correctly spilled For the AMDGPU target if a MBB contains exec mask restore preamble, SplitEditor may get state when it cannot insert a spill instruction. E.g. for a MIR bb.100: %1 = S_OR_SAVEEXEC_B64 %2, implicit-def $exec, implicit-def $scc, implicit $exec and if the regalloc will try to allocate a virtreg to the physreg already assigned to virtreg %1, it should insert spill instruction before the S_OR_SAVEEXEC_B64 instruction. But it is not possible since can generate incorrect code in terms of exec mask. The change makes regalloc to ignore such physreg candidates. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D52052 llvm-svn: 343004 --- llvm/lib/CodeGen/RegAllocGreedy.cpp | 41 +++++++--- llvm/lib/CodeGen/SplitKit.h | 17 +++++ llvm/test/CodeGen/AMDGPU/spill-before-exec.mir | 100 +++++++++++++++++++++++++ 3 files changed, 149 insertions(+), 9 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/spill-before-exec.mir diff --git a/llvm/lib/CodeGen/RegAllocGreedy.cpp b/llvm/lib/CodeGen/RegAllocGreedy.cpp index 54f8008..64da848 100644 --- a/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -449,8 +449,8 @@ private: BlockFrequency calcSpillCost(); bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); - void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); - void growRegion(GlobalSplitCandidate &Cand); + bool addThroughConstraints(InterferenceCache::Cursor, ArrayRef); + bool growRegion(GlobalSplitCandidate &Cand); bool splitCanCauseEvictionChain(unsigned Evictee, GlobalSplitCandidate &Cand, unsigned BBNumber, const AllocationOrder &Order); @@ -1203,6 +1203,13 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, } else if (Intf.first() < BI.LastInstr) { ++Ins; } + + // Abort if the spill cannot be inserted at the MBB' start + if (((BC.Entry == SpillPlacement::MustSpill) || + (BC.Entry == SpillPlacement::PrefSpill)) && + SlotIndex::isEarlierInstr(BI.FirstInstr, + SA->getFirstSplitPoint(BC.Number))) + return false; } // Interference for the live-out value. @@ -1232,7 +1239,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, /// addThroughConstraints - Add constraints and links to SpillPlacer from the /// live-through blocks in Blocks. -void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, +bool RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, ArrayRef Blocks) { const unsigned GroupSize = 8; SpillPlacement::BlockConstraint BCS[GroupSize]; @@ -1256,6 +1263,12 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, assert(B < GroupSize && "Array overflow"); BCS[B].Number = Number; + // Abort if the spill cannot be inserted at the MBB' start + MachineBasicBlock *MBB = MF->getBlockNumbered(Number); + if (!MBB->empty() && + SlotIndex::isEarlierInstr(LIS->getInstructionIndex(MBB->instr_front()), + SA->getFirstSplitPoint(Number))) + return false; // Interference for the live-in value. if (Intf.first() <= Indexes->getMBBStartIdx(Number)) BCS[B].Entry = SpillPlacement::MustSpill; @@ -1276,9 +1289,10 @@ void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, SpillPlacer->addConstraints(makeArrayRef(BCS, B)); SpillPlacer->addLinks(makeArrayRef(TBS, T)); + return true; } -void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { +bool RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Keep track of through blocks that have not been added to SpillPlacer. BitVector Todo = SA->getThroughBlocks(); SmallVectorImpl &ActiveBlocks = Cand.ActiveBlocks; @@ -1314,9 +1328,10 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { // Compute through constraints from the interference, or assume that all // through blocks prefer spilling when forming compact regions. auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); - if (Cand.PhysReg) - addThroughConstraints(Cand.Intf, NewBlocks); - else + if (Cand.PhysReg) { + if (!addThroughConstraints(Cand.Intf, NewBlocks)) + return false; + } else // Provide a strong negative bias on through blocks to prevent unwanted // liveness on loop backedges. SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); @@ -1326,6 +1341,7 @@ void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { SpillPlacer->iterate(); } LLVM_DEBUG(dbgs() << ", v=" << Visited); + return true; } /// calcCompactRegion - Compute the set of edge bundles that should be live @@ -1356,7 +1372,11 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { return false; } - growRegion(Cand); + if (!growRegion(Cand)) { + LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); + return false; + } + SpillPlacer->finish(); if (!Cand.LiveBundles.any()) { @@ -1886,7 +1906,10 @@ unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, }); continue; } - growRegion(Cand); + if (!growRegion(Cand)) { + LLVM_DEBUG(dbgs() << ", cannot spill all interferences.\n"); + continue; + } SpillPlacer->finish(); diff --git a/llvm/lib/CodeGen/SplitKit.h b/llvm/lib/CodeGen/SplitKit.h index 8fbe724..bcc8f8c 100644 --- a/llvm/lib/CodeGen/SplitKit.h +++ b/llvm/lib/CodeGen/SplitKit.h @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveInterval.h" +#include "llvm/CodeGen/LiveIntervals.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/SlotIndexes.h" @@ -76,6 +77,18 @@ public: /// Returns the last insert point as an iterator for \pCurLI in \pMBB. MachineBasicBlock::iterator getLastInsertPointIter(const LiveInterval &CurLI, MachineBasicBlock &MBB); + + /// Return the base index of the first insert point in \pMBB. + SlotIndex getFirstInsertPoint(MachineBasicBlock &MBB) { + SlotIndex Res = LIS.getMBBStartIdx(&MBB); + if (!MBB.empty()) { + MachineBasicBlock::iterator MII = MBB.SkipPHIsLabelsAndDebug(MBB.begin()); + if (MII != MBB.end()) + Res = LIS.getInstructionIndex(*MII); + } + return Res; + } + }; /// SplitAnalysis - Analyze a LiveInterval, looking for live range splitting @@ -225,6 +238,10 @@ public: MachineBasicBlock::iterator getLastSplitPointIter(MachineBasicBlock *BB) { return IPA.getLastInsertPointIter(*CurLI, *BB); } + + SlotIndex getFirstSplitPoint(unsigned Num) { + return IPA.getFirstInsertPoint(*MF.getBlockNumbered(Num)); + } }; /// SplitEditor - Edit machine code and LiveIntervals for live range diff --git a/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir new file mode 100644 index 0000000..0726a25 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/spill-before-exec.mir @@ -0,0 +1,100 @@ +# REQUIRES: asserts +# RUN: llc -mtriple=amdgcn--- -verify-machineinstrs -debug-only=regalloc -run-pass=greedy -o /dev/null %s 2>&1 | FileCheck %s + +--- +# Check that physreg candidate is not used since cannot be spilled in a block, +# e.g. before exec mask preamble +# CHECK: , cannot spill all interferences. + +name: foo +tracksRegLiveness: true +registers: + - { id: 0, class: sreg_64 } + - { id: 1100, class: sreg_128 } + - { id: 1101, class: sreg_128 } + - { id: 1102, class: sreg_128 } + - { id: 1103, class: sreg_128 } + - { id: 1104, class: sreg_128 } + - { id: 1105, class: sreg_128 } + - { id: 1106, class: sreg_128 } + - { id: 1107, class: sreg_128 } + - { id: 1108, class: sreg_128 } + - { id: 1109, class: sreg_128 } + - { id: 1110, class: sreg_128 } + - { id: 1111, class: sreg_128 } + - { id: 1112, class: sreg_128 } + - { id: 1113, class: sreg_128 } + - { id: 1114, class: sreg_128 } + - { id: 1115, class: sreg_128 } + - { id: 1116, class: sreg_128 } + - { id: 1117, class: sreg_128 } + - { id: 1118, class: sreg_128 } + - { id: 1119, class: sreg_128 } + - { id: 1120, class: sreg_128 } + - { id: 1121, class: sreg_128 } +body: | + bb.0: + successors: %bb.1 + liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr100_sgpr101, $sgpr102_sgpr103 + %0:sreg_64 = COPY $sgpr102_sgpr103 + %1100 = COPY $sgpr100_sgpr101_sgpr102_sgpr103 + %1101 = COPY %1100 + %1102 = COPY %1100 + %1103 = COPY %1100 + %1104 = COPY %1100 + %1105 = COPY %1100 + %1106 = COPY %1100 + %1107 = COPY %1100 + %1108 = COPY %1100 + %1109 = COPY %1100 + %1110 = COPY %1100 + %1111 = COPY %1100 + %1112 = COPY %1100 + %1113 = COPY %1100 + %1114 = COPY %1100 + %1115 = COPY %1100 + %1116 = COPY %1100 + %1117 = COPY %1100 + %1118 = COPY %1100 + %1119 = COPY %1100 + %1120 = COPY %1100 + %1121 = COPY %1100 + S_BRANCH %bb.1 + + bb.1: + liveins: $sgpr96_sgpr97, $sgpr98_sgpr99, $sgpr102_sgpr103 + %0 = S_OR_SAVEEXEC_B64 $sgpr96_sgpr97, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64_term $exec, %0, implicit-def $scc + SI_MASK_BRANCH %bb.100, implicit $exec + S_BRANCH %bb.2 + + bb.2: + liveins: $sgpr98_sgpr99, $sgpr102_sgpr103 + %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr98_sgpr99, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64_term $exec, %0, implicit-def $scc + SI_MASK_BRANCH %bb.100, implicit $exec + S_BRANCH %bb.200 + + bb.100: + liveins: $sgpr102_sgpr103 + %0:sreg_64 = S_OR_SAVEEXEC_B64 $sgpr102_sgpr103, implicit-def $exec, implicit-def $scc, implicit $exec + $exec = S_XOR_B64_term $exec, %0, implicit-def $scc + S_BRANCH %bb.200 + + bb.200: + S_CMP_EQ_U64 %1100.sub0_sub1, %1101.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1102.sub0_sub1, %1103.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1104.sub0_sub1, %1105.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1106.sub0_sub1, %1107.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1108.sub0_sub1, %1109.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1110.sub0_sub1, %1111.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1112.sub0_sub1, %1113.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1114.sub0_sub1, %1115.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1116.sub0_sub1, %1117.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1118.sub0_sub1, %1119.sub2_sub3, implicit-def $scc + S_CMP_EQ_U64 %1120.sub0_sub1, %1121.sub2_sub3, implicit-def $scc + + $vgpr0 = V_MOV_B32_e32 0, implicit $exec + S_SETPC_B64_return %0, implicit $vgpr0 + +... -- 2.7.4