From 913837eaa3d14a5fb68179f8210dc097383a048c Mon Sep 17 00:00:00 2001 From: Austin Kerbow Date: Sun, 22 Jan 2023 21:26:08 -0800 Subject: [PATCH] [ScheduleDAG] Fix removing edges with weak deps In SUnit::removePred edges are removed from the Preds and Succs lists before updating the bookkeeping. This could result in incorrect values for NumPreds/SuccsLeft and cause WeakPreds/SuccsLeft to underflow, since the incorrect SDep will be used to update these values. Reviewed By: rampitec Differential Revision: https://reviews.llvm.org/D142325 --- llvm/lib/CodeGen/ScheduleDAG.cpp | 14 ++++---- .../CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir | 42 ++++++++++++++++++++++ 2 files changed, 50 insertions(+), 6 deletions(-) create mode 100644 llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir diff --git a/llvm/lib/CodeGen/ScheduleDAG.cpp b/llvm/lib/CodeGen/ScheduleDAG.cpp index 696b290..ef886ad 100644 --- a/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -183,8 +183,6 @@ void SUnit::removePred(const SDep &D) { SUnit *N = D.getSUnit(); SmallVectorImpl::iterator Succ = llvm::find(N->Succs, P); assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); - N->Succs.erase(Succ); - Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { assert(NumPreds > 0 && "NumPreds will underflow!"); @@ -193,21 +191,25 @@ void SUnit::removePred(const SDep &D) { --N->NumSuccs; } if (!N->isScheduled) { - if (D.isWeak()) + if (D.isWeak()) { + assert(WeakPredsLeft > 0 && "WeakPredsLeft will underflow!"); --WeakPredsLeft; - else { + } else { assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); --NumPredsLeft; } } if (!isScheduled) { - if (D.isWeak()) + if (D.isWeak()) { + assert(WeakSuccsLeft > 0 && "WeakSuccsLeft will underflow!"); --N->WeakSuccsLeft; - else { + } else { assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); --N->NumSuccsLeft; } } + N->Succs.erase(Succ); + Preds.erase(I); if (P.getLatency() != 0) { this->setDepthDirty(); N->setHeightDirty(); diff --git a/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir new file mode 100644 index 0000000..9ef4cf5 --- /dev/null +++ b/llvm/test/CodeGen/AMDGPU/sched-barrier-hang-weak-dep.mir @@ -0,0 +1,42 @@ +# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +# RUN: llc -march=amdgcn -mcpu=gfx908 -run-pass=machine-scheduler -verify-misched -o - %s | FileCheck %s + +# This would hang after removing edges from the SCHED_BARRIER since the number +# of Preds/Succs would be left in an inconsistent state. + +--- +name: sched_barrier_cluster_dep_hang +tracksRegLiveness: true +body: | + ; CHECK-LABEL: name: sched_barrier_cluster_dep_hang + ; CHECK: bb.0: + ; CHECK-NEXT: successors: %bb.1(0x80000000) + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF + ; CHECK-NEXT: [[DEF1:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF + ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: bb.1: + ; CHECK-NEXT: SCHED_BARRIER 128 + ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 512, 0, implicit $exec :: (load (s32)) + ; CHECK-NEXT: [[GLOBAL_LOAD_DWORD_SADDR1:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[DEF]], [[DEF1]], 0, 0, implicit $exec :: (load (s32)) + ; CHECK-NEXT: SCHED_BARRIER 128 + ; CHECK-NEXT: [[V_MUL_LO_U32_e64_:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR]], [[GLOBAL_LOAD_DWORD_SADDR]], implicit $exec + ; CHECK-NEXT: [[V_MUL_LO_U32_e64_1:%[0-9]+]]:vgpr_32 = nsw V_MUL_LO_U32_e64 [[GLOBAL_LOAD_DWORD_SADDR1]], [[GLOBAL_LOAD_DWORD_SADDR1]], implicit $exec + ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_]], [[DEF]], 512, 0, implicit $exec :: (store (s32)) + ; CHECK-NEXT: GLOBAL_STORE_DWORD_SADDR [[DEF1]], [[V_MUL_LO_U32_e64_1]], [[DEF]], 0, 0, implicit $exec :: (store (s32)) + ; CHECK-NEXT: S_ENDPGM 0 + bb.0: + %0:sreg_64 = IMPLICIT_DEF + %1:vgpr_32 = IMPLICIT_DEF + + bb.1: + SCHED_BARRIER 128 + %3:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 0, 0, implicit $exec :: (load (s32)) + %5:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %0, %1, 512, 0, implicit $exec :: (load (s32)) + SCHED_BARRIER 128 + %6:vgpr_32 = nsw V_MUL_LO_U32_e64 %5, %5, implicit $exec + %4:vgpr_32 = nsw V_MUL_LO_U32_e64 %3, %3, implicit $exec + GLOBAL_STORE_DWORD_SADDR %1, %6, %0, 512, 0, implicit $exec :: (store (s32)) + GLOBAL_STORE_DWORD_SADDR %1, %4, %0, 0, 0, implicit $exec :: (store (s32)) + S_ENDPGM 0 +... -- 2.7.4