From aba8983c9d86793b3388e7966389e51708fba9bd Mon Sep 17 00:00:00 2001 From: Francesco Petrogalli Date: Fri, 13 Jan 2023 11:16:37 +0100 Subject: [PATCH] Recommit [SchedBoundary] Add dump method for resource usage. Summary: As supporting information, I have added an example that describes how the indexes of the vector of resources SchedBoundary::ReservedCycles are tracked by the field SchedBoundary::ReservedCyclesIndex. This has a minor rework of https://github.com/llvm/llvm-project/commit/b39a9a94f420a25a239ae03097c255900cbd660e which was reverted in https://github.com/llvm/llvm-project/commit/df6ae1779fafd9984e144a27315d6dd65b32c325 becasue the llc invocation of the test was missing the argument `-mtriple`. See for example the failure at https://lab.llvm.org/buildbot#builders/231/builds/7245 that reported the following when targeting a non-aarch64 native build: 'cortex-a55' is not a recognized processor for this target (ignoring processor) Reviewers: jroelofs Subscribers: Differential Revision: https://reviews.llvm.org/D141367 --- llvm/include/llvm/CodeGen/MachineScheduler.h | 31 ++++++++++- llvm/lib/CodeGen/MachineScheduler.cpp | 27 ++++++++++ llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir | 60 ++++++++++++++++++++++ 3 files changed, 116 insertions(+), 2 deletions(-) create mode 100644 llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir diff --git a/llvm/include/llvm/CodeGen/MachineScheduler.h b/llvm/include/llvm/CodeGen/MachineScheduler.h index 8000c9d..997c3a4 100644 --- a/llvm/include/llvm/CodeGen/MachineScheduler.h +++ b/llvm/include/llvm/CodeGen/MachineScheduler.h @@ -674,8 +674,33 @@ private: // scheduled instruction. SmallVector ReservedCycles; - // For each PIdx, stores first index into ReservedCycles that corresponds to - // it. + /// For each PIdx, stores first index into ReservedCycles that corresponds to + /// it. + /// + /// For example, consider the following 3 resources (ResourceCount = + /// 3): + /// + /// +------------+--------+ + /// |ResourceName|NumUnits| + /// +------------+--------+ + /// | X | 2 | + /// +------------+--------+ + /// | Y | 3 | + /// +------------+--------+ + /// | Z | 1 | + /// +------------+--------+ + /// + /// In this case, the total number of resource instances is 6. The + /// vector \ref ReservedCycles will have a slot for each instance. The + /// vector \ref ReservedCyclesIndex will track at what index the first + /// instance of the resource is found in the vector of \ref + /// ReservedCycles: + /// + /// Indexes of instances in ReservedCycles + /// 0 1 2 3 4 5 + /// ReservedCyclesIndex[0] = 0; [X0, X1, + /// ReservedCyclesIndex[1] = 2; Y0, Y1, Y2 + /// ReservedCyclesIndex[2] = 5; Z SmallVector ReservedCyclesIndex; // For each PIdx, stores the resource group IDs of its subunits @@ -802,6 +827,8 @@ public: /// available instruction, or NULL if there are multiple candidates. SUnit *pickOnlyChoice(); + /// Dump the state of the information that tracks resource usage. + void dumpReservedCycles() const; void dumpScheduledState() const; }; diff --git a/llvm/lib/CodeGen/MachineScheduler.cpp b/llvm/lib/CodeGen/MachineScheduler.cpp index e5cd462..e8c9562 100644 --- a/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/llvm/lib/CodeGen/MachineScheduler.cpp @@ -95,6 +95,9 @@ cl::opt ViewMISchedDAGs( cl::desc("Pop up a window to show MISched dags after they are processed")); cl::opt PrintDAGs("misched-print-dags", cl::Hidden, cl::desc("Print schedule DAGs")); +cl::opt MISchedDumpReservedCycles( + "misched-dump-reserved-cycles", cl::Hidden, cl::init(false), + cl::desc("Dump resource usage at schedule boundary.")); #else const bool ViewMISchedDAGs = false; const bool PrintDAGs = false; @@ -2589,6 +2592,28 @@ SUnit *SchedBoundary::pickOnlyChoice() { } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) + +/// Dump the content of the \ref ReservedCycles vector for the +/// resources that are used in the basic block. +/// +LLVM_DUMP_METHOD void SchedBoundary::dumpReservedCycles() const { + if (!SchedModel->hasInstrSchedModel()) + return; + + unsigned ResourceCount = SchedModel->getNumProcResourceKinds(); + unsigned StartIdx = 0; + + for (unsigned ResIdx = 0; ResIdx < ResourceCount; ++ResIdx) { + const unsigned NumUnits = SchedModel->getProcResource(ResIdx)->NumUnits; + std::string ResName = SchedModel->getResourceName(ResIdx); + for (unsigned UnitIdx = 0; UnitIdx < NumUnits; ++UnitIdx) { + dbgs() << ResName << "(" << UnitIdx + << ") = " << ReservedCycles[StartIdx + UnitIdx] << "\n"; + } + StartIdx += NumUnits; + } +} + // This is useful information to dump after bumpNode. // Note that the Queue contents are more useful before pickNodeFromQueue. LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { @@ -2611,6 +2636,8 @@ LLVM_DUMP_METHOD void SchedBoundary::dumpScheduledState() const { << "\n ExpectedLatency: " << ExpectedLatency << "c\n" << (IsResourceLimited ? " - Resource" : " - Latency") << " limited.\n"; + if (MISchedDumpReservedCycles) + dumpReservedCycles(); } #endif diff --git a/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir b/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir new file mode 100644 index 0000000..cd35a20 --- /dev/null +++ b/llvm/test/CodeGen/AArch64/dump-reserved-cycles.mir @@ -0,0 +1,60 @@ +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 -misched-dump-reserved-cycles=true \ +# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s + +# RUN: llc -mtriple=aarch64-none-linux-gnu -mcpu=cortex-a55 -misched-dump-reserved-cycles=false\ +# RUN: -run-pass=machine-scheduler -debug-only=machine-scheduler -o - %s 2>&1 | FileCheck %s --check-prefix=NODUMP + +# REQUIRES: asserts +--- +name: f +tracksRegLiveness: true +body: | + bb.0: + liveins: $x0, $x1, $x2 + $x3 = ADDXrr $x0, $x0 + $x4 = ADDXrr $x1, $x1 + $x5 = ADDXrr $x2, $x2 + +# It is enough to check the last one of the printout of the state of +# the schedule boundary, just before the final schedule is printed. + +# CHECK-LABEL: Ready @1c +# CHECK-NEXT: CortexA55UnitALU +1x1u +# CHECK-NEXT: BotQ.A @1c +# CHECK-NEXT: Retired: 3 +# CHECK-NEXT: Executed: 1c +# CHECK-NEXT: Critical: 1c, 3 MOps +# CHECK-NEXT: ExpectedLatency: 0c +# CHECK-NEXT: - Latency limited. +# CHECK-NEXT: CortexA55UnitALU(0) = 1 +# CHECK-NEXT: CortexA55UnitALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitB(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitDiv(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPALU(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPALU(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPDIV(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPMAC(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitFPMAC(1) = 4294967295 +# CHECK-NEXT: CortexA55UnitLd(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitMAC(0) = 4294967295 +# CHECK-NEXT: CortexA55UnitSt(0) = 4294967295 +# CHECK-NEXT: ** ScheduleDAGMILive::schedule picking next node +# CHECK-NEXT: *** Final schedule for %bb.0 *** +# CHECK-NEXT: SU(0): $x3 = ADDXrr $x0, $x0 +# CHECK-NEXT: SU(1): $x4 = ADDXrr $x1, $x1 +# CHECK-NEXT: SU(2): $x5 = ADDXrr $x2, $x2 + +# NODUMP-LABEL: Ready @1c +# NODUMP-NEXT: CortexA55UnitALU +1x1u +# NODUMP-NEXT: BotQ.A @1c +# NODUMP-NEXT: Retired: 3 +# NODUMP-NEXT: Executed: 1c +# NODUMP-NEXT: Critical: 1c, 3 MOps +# NODUMP-NEXT: ExpectedLatency: 0c +# NODUMP-NEXT: - Latency limited. +# NODUMP-NEXT: ** ScheduleDAGMILive::schedule picking next node +# NODUMP-NEXT: *** Final schedule for %bb.0 *** +# NODUMP-NEXT: SU(0): $x3 = ADDXrr $x0, $x0 +# NODUMP-NEXT: SU(1): $x4 = ADDXrr $x1, $x1 +# NODUMP-NEXT: SU(2): $x5 = ADDXrr $x2, $x2 + -- 2.7.4