From a27007eb4fd0854cc2861c25c082f3cb64d8593e Mon Sep 17 00:00:00 2001 From: Tom Stellard Date: Mon, 2 May 2016 16:23:09 +0000 Subject: [PATCH] AMDGPU/SI: Use hazard recognizer to detect DPP hazards Reviewers: arsenm Subscribers: arsenm, llvm-commits Differential Revision: http://reviews.llvm.org/D18603 llvm-svn: 268247 --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 26 ++++++++++++ llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h | 1 + llvm/lib/Target/AMDGPU/SIInsertWaits.cpp | 55 ------------------------- llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll | 8 +++- 4 files changed, 33 insertions(+), 57 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index 7257279..58a9a28 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -47,6 +47,9 @@ GCNHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (SIInstrInfo::isVMEM(*MI) && checkVMEMHazards(MI) > 0) return NoopHazard; + if (SIInstrInfo::isDPP(*MI) && checkDPPHazards(MI) > 0) + return NoopHazard; + return NoHazard; } @@ -61,6 +64,9 @@ unsigned GCNHazardRecognizer::PreEmitNoops(MachineInstr *MI) { if (SIInstrInfo::isVMEM(*MI)) return std::max(0, checkVMEMHazards(MI)); + if (SIInstrInfo::isDPP(*MI)) + return std::max(0, checkDPPHazards(MI)); + return 0; } @@ -175,3 +181,23 @@ int GCNHazardRecognizer::checkVMEMHazards(MachineInstr* VMEM) { } return WaitStatesNeeded; } + +int GCNHazardRecognizer::checkDPPHazards(MachineInstr *DPP) { + const AMDGPUSubtarget &ST = MF.getSubtarget(); + const SIRegisterInfo *TRI = + static_cast(ST.getRegisterInfo()); + + // Check for DPP VGPR read after VALU VGPR write. + int DppVgprWaitStates = 2; + int WaitStatesNeeded = 0; + + for (const MachineOperand &Use : DPP->uses()) { + if (!Use.isReg() || !TRI->isVGPR(MF.getRegInfo(), Use.getReg())) + continue; + int WaitStatesNeededForUse = + DppVgprWaitStates - getWaitStatesSinceDef(Use.getReg()); + WaitStatesNeeded = std::max(WaitStatesNeeded, WaitStatesNeededForUse); + } + + return WaitStatesNeeded; +} diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h index e75c350..4ab2480 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.h @@ -40,6 +40,7 @@ class GCNHazardRecognizer final : public ScheduleHazardRecognizer { int checkSMRDHazards(MachineInstr *SMRD); int checkVMEMHazards(MachineInstr* VMEM); + int checkDPPHazards(MachineInstr *DPP); public: GCNHazardRecognizer(const MachineFunction &MF); // We can only issue one instruction per cycle. diff --git a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp index ead4c9e..75adb2b 100644 --- a/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp +++ b/llvm/lib/Target/AMDGPU/SIInsertWaits.cpp @@ -127,18 +127,6 @@ private: /// \brief Insert S_NOP between an instruction writing M0 and S_SENDMSG. void handleSendMsg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I); - /// \param DPP The DPP instruction - /// \param SearchI The iterator to start look for hazards. - /// \param SearchMBB The basic block we are operating on. - /// \param WaitStates Then number of wait states that need to be inserted - /// When a hazard is detected. - void insertDPPWaitStates(MachineBasicBlock::iterator DPP, - MachineBasicBlock::reverse_iterator SearchI, - MachineBasicBlock *SearchMBB, - unsigned WaitStates); - - void insertDPPWaitStates(MachineBasicBlock::iterator DPP); - /// Return true if there are LGKM instrucitons that haven't been waited on /// yet. bool hasOutstandingLGKM() const; @@ -522,45 +510,6 @@ void SIInsertWaits::handleSendMsg(MachineBasicBlock &MBB, } } -void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP, - MachineBasicBlock::reverse_iterator SearchI, - MachineBasicBlock *SearchMBB, - unsigned WaitStates) { - - MachineBasicBlock::reverse_iterator E = SearchMBB->rend(); - - for (; WaitStates > 0; --WaitStates, ++SearchI) { - - // If we have reached the start of the block, we need to check predecessors. - if (SearchI == E) { - for (MachineBasicBlock *Pred : SearchMBB->predecessors()) { - // We only need to check fall-through blocks. Branch instructions - // give us enough wait states. - if (Pred->getFirstTerminator() == Pred->end()) { - insertDPPWaitStates(DPP, Pred->rbegin(), Pred, WaitStates); - break; - } - } - return; - } - - for (MachineOperand &Op : SearchI->operands()) { - if (!Op.isReg() || !Op.isDef()) - continue; - - if (DPP->readsRegister(Op.getReg(), TRI)) { - TII->insertWaitStates(*DPP->getParent(), DPP, WaitStates); - return; - } - } - } -} - -void SIInsertWaits::insertDPPWaitStates(MachineBasicBlock::iterator DPP) { - MachineBasicBlock::reverse_iterator I(DPP); - insertDPPWaitStates(DPP, I, DPP->getParent(), 2); -} - // FIXME: Insert waits listed in Table 4.2 "Required User-Inserted Wait States" // around other non-memory instructions. bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { @@ -630,10 +579,6 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) { } } - if (TII->isDPP(*I)) { - insertDPPWaitStates(I); - } - // Record pre-existing, explicitly requested waits if (I->getOpcode() == AMDGPU::S_WAITCNT) { handleExistingWait(*I); diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll index 9b47775..a85fc7e 100644 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll +++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.mov.dpp.ll @@ -1,4 +1,5 @@ -; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI %s +; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-OPT %s +; RUN: llc -O0 -march=amdgcn -mcpu=tonga -verify-machineinstrs -show-mc-encoding < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOOPT %s ; FIXME: The register allocator / scheduler should be able to avoid these hazards. @@ -26,7 +27,10 @@ define void @dpp_wait_states(i32 addrspace(1)* %out, i32 %in) { } ; VI-LABEL: {{^}}dpp_first_in_bb: -; VI: s_nop 1 +; VI: ; %endif +; VI-OPT: s_mov_b32 +; VI-OPT: s_mov_b32 +; VI-NOOPT: s_nop 1 ; VI: v_mov_b32_dpp [[VGPR0:v[0-9]+]], v{{[0-9]+}} quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 ; VI: s_nop 1 ; VI: v_mov_b32_dpp [[VGPR1:v[0-9]+]], [[VGPR0]] quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1 bound_ctrl:0 -- 2.7.4