From f898edd117daec42762c19dd733189307414d88a Mon Sep 17 00:00:00 2001 From: Carl Ritson Date: Mon, 10 Sep 2018 10:14:48 +0000 Subject: [PATCH] [AMDGPU] Prevent sequences of non-instructions disrupting GCNHazardRecognizer wait state counting Summary: This fixes a bug where a large number of implicit def instructions can fill the GCNHazardRecognizer lookahead buffer causing required NOPs to not be inserted. Reviewers: nhaehnle, arsenm Reviewed By: arsenm Subscribers: sheredom, kzhuravl, jvesely, wdng, yaxunl, dstuttard, tpr, t-tye, llvm-commits Differential Revision: https://reviews.llvm.org/D51726 Change-Id: Ie75338f94de704ee5816b05afd0c922c6748a95b llvm-svn: 341798 --- llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp | 11 ++++- llvm/test/CodeGen/AMDGPU/hazard.mir | 67 ++++++++++++++++++++++++++ 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp index f236f10..c6396de 100644 --- a/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp +++ b/llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp @@ -215,6 +215,14 @@ void GCNHazardRecognizer::AdvanceCycle() { if (!CurrCycleInstr) return; + // Do not track non-instructions which do not affect the wait states. + // If included, these instructions can lead to buffer overflow such that + // detectable hazards are missed. + if (CurrCycleInstr->getOpcode() == AMDGPU::IMPLICIT_DEF) + return; + else if (CurrCycleInstr->isDebugInstr()) + return; + unsigned NumWaitStates = TII.getNumWaitStates(*CurrCycleInstr); // Keep track of emitted instructions @@ -253,8 +261,7 @@ int GCNHazardRecognizer::getWaitStatesSince( return WaitStates; unsigned Opcode = MI->getOpcode(); - if (Opcode == AMDGPU::DBG_VALUE || Opcode == AMDGPU::IMPLICIT_DEF || - Opcode == AMDGPU::INLINEASM) + if (Opcode == AMDGPU::INLINEASM) continue; } ++WaitStates; diff --git a/llvm/test/CodeGen/AMDGPU/hazard.mir b/llvm/test/CodeGen/AMDGPU/hazard.mir index 82e5c6d..605231d 100644 --- a/llvm/test/CodeGen/AMDGPU/hazard.mir +++ b/llvm/test/CodeGen/AMDGPU/hazard.mir @@ -1,4 +1,5 @@ # RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN -check-prefix=VI %s +# RUN: llc -march=amdgcn -mcpu=gfx803 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN -check-prefix=GFX8 %s # RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -run-pass post-RA-hazard-rec %s -o - | FileCheck -check-prefix=GCN -check-prefix=GFX9 %s # GCN-LABEL: name: hazard_implicit_def @@ -58,3 +59,69 @@ body: | $vgpr0 = V_INTERP_P1_F32 killed $vgpr4, 0, 0, implicit $m0, implicit $exec SI_RETURN_TO_EPILOG killed $vgpr5, killed $vgpr0 ... + +# IMPLICIT_DEF/DBG_VALUE instructions should not prevent the hazard recognizer +# from adding s_nop instructions between m0 update and s_sendmsg. + +# GCN-LABEL: name: hazard-lookahead-implicit-def +# GCN: $vgpr6 = IMPLICIT_DEF +# GFX8-NEXT: S_NOP 0 +# GFX9-NEXT: S_NOP 0 +# GCN: S_SENDMSG 3, implicit $exec, implicit $m0 +--- +name: hazard-lookahead-implicit-def +body: | + bb.0: + $m0 = S_MOV_B32 killed $sgpr12 + $vgpr0 = IMPLICIT_DEF + $vgpr1 = IMPLICIT_DEF + $vgpr2 = IMPLICIT_DEF + $vgpr3 = IMPLICIT_DEF + $vgpr4 = IMPLICIT_DEF + $vgpr5 = IMPLICIT_DEF + $vgpr6 = IMPLICIT_DEF + S_SENDMSG 3, implicit $exec, implicit $m0 + S_ENDPGM +... + +# GCN-LABEL: name: hazard-lookahead-dbg-value +# GCN: DBG_VALUE 6 +# GFX8-NEXT: S_NOP 0 +# GFX9-NEXT: S_NOP 0 +# GCN: S_SENDMSG 3, implicit $exec, implicit $m0 +--- +name: hazard-lookahead-dbg-value +body: | + bb.0: + $m0 = S_MOV_B32 killed $sgpr12 + DBG_VALUE 0 + DBG_VALUE 1 + DBG_VALUE 2 + DBG_VALUE 3 + DBG_VALUE 4 + DBG_VALUE 5 + DBG_VALUE 6 + S_SENDMSG 3, implicit $exec, implicit $m0 + S_ENDPGM +... + +# GCN-LABEL: name: hazard-lookahead-dbg-label +# GCN: DBG_LABEL 6 +# GFX8-NEXT: S_NOP 0 +# GFX9-NEXT: S_NOP 0 +# GCN: S_SENDMSG 3, implicit $exec, implicit $m0 +--- +name: hazard-lookahead-dbg-label +body: | + bb.0: + $m0 = S_MOV_B32 killed $sgpr12 + DBG_LABEL 0 + DBG_LABEL 1 + DBG_LABEL 2 + DBG_LABEL 3 + DBG_LABEL 4 + DBG_LABEL 5 + DBG_LABEL 6 + S_SENDMSG 3, implicit $exec, implicit $m0 + S_ENDPGM +... -- 2.7.4