From 635993f07bd617818097286eca0bdacb91a9afb5 Mon Sep 17 00:00:00 2001 From: Stanislav Mekhanoshin Date: Tue, 23 Feb 2021 15:26:12 -0800 Subject: [PATCH] [AMDGPU] Skip unclusterd rescheduling w/o ld/st We are attempting rescheduling without load store clustering if occupancy limits were not met with clustering. Skip this for regions which do not have any loads or stores at all. In a set of kernels I am experimenting with this improves scheduling time by ~30%. Differential Revision: https://reviews.llvm.org/D97342 --- llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp | 22 +++++++++++++++++++--- llvm/lib/Target/AMDGPU/GCNSchedStrategy.h | 4 ++++ 2 files changed, 23 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp index 0477856..9b6ec89 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.cpp @@ -20,7 +20,8 @@ using namespace llvm; GCNMaxOccupancySchedStrategy::GCNMaxOccupancySchedStrategy( const MachineSchedContext *C) : - GenericScheduler(C), TargetOccupancy(0), MF(nullptr) { } + GenericScheduler(C), TargetOccupancy(0), HasClusteredNodes(false), + MF(nullptr) { } void GCNMaxOccupancySchedStrategy::initialize(ScheduleDAGMI *DAG) { GenericScheduler::initialize(DAG); @@ -279,6 +280,15 @@ SUnit *GCNMaxOccupancySchedStrategy::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); + if (!HasClusteredNodes && SU->getInstr()->mayLoadOrStore()) { + for (SDep &Dep : SU->Preds) { + if (Dep.isCluster()) { + HasClusteredNodes = true; + break; + } + } + } + LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") " << *SU->getInstr()); return SU; @@ -320,6 +330,10 @@ void GCNScheduleDAGMILive::schedule() { PressureBefore.print(dbgs())); } + GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; + // Set HasClusteredNodes to true for late stages where we are not interested + // in it anymore. That way pickNode() will not scan SDep's when not needed. + S.HasClusteredNodes = Stage >= UnclusteredReschedule; ScheduleDAGMILive::schedule(); Regions[RegionIdx] = std::make_pair(RegionBegin, RegionEnd); RescheduleRegions[RegionIdx] = false; @@ -328,7 +342,6 @@ void GCNScheduleDAGMILive::schedule() { return; // Check the results of scheduling. - GCNMaxOccupancySchedStrategy &S = (GCNMaxOccupancySchedStrategy&)*SchedImpl; auto PressureAfter = getRealRegPressure(); LLVM_DEBUG(dbgs() << "Pressure after scheduling: "; @@ -379,6 +392,8 @@ void GCNScheduleDAGMILive::schedule() { PressureAfter.less(ST, PressureBefore) || !RescheduleRegions[RegionIdx]) { Pressure[RegionIdx] = PressureAfter; + if (!S.HasClusteredNodes && (Stage + 1) == UnclusteredReschedule) + RescheduleRegions[RegionIdx] = false; return; } else { LLVM_DEBUG(dbgs() << "New pressure will result in more spilling.\n"); @@ -386,7 +401,8 @@ void GCNScheduleDAGMILive::schedule() { } LLVM_DEBUG(dbgs() << "Attempting to revert scheduling.\n"); - RescheduleRegions[RegionIdx] = true; + RescheduleRegions[RegionIdx] = S.HasClusteredNodes || + (Stage + 1) != UnclusteredReschedule; RegionEnd = RegionBegin; for (MachineInstr *MI : Unsched) { if (MI->isDebugInstr()) diff --git a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h index 2d81d99..c96f73e 100644 --- a/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h +++ b/llvm/lib/Target/AMDGPU/GCNSchedStrategy.h @@ -50,6 +50,10 @@ class GCNMaxOccupancySchedStrategy final : public GenericScheduler { unsigned TargetOccupancy; + // schedule() have seen a clustered memory operation. Set it to false + // before a region scheduling to know if the region had such clusters. + bool HasClusteredNodes; + MachineFunction *MF; public: -- 2.7.4