and the register file.
Below is an example of ``-all-stats`` output generated by :program:`llvm-mca`
-for the dot-product example discussed in the previous sections.
+for 300 iterations of the dot-product example discussed in the previous
+sections.
.. code-block:: none
1, 306 (50.2%)
2, 297 (48.7%)
-
Scheduler's queue usage:
- JALU01, 0/20
- JFPU01, 18/18
- JLSAGU, 0/12
+ [1] Resource name.
+ [2] Average number of used buffer entries.
+ [3] Maximum number of used buffer entries.
+ [4] Total number of buffer entries.
+
+ [1] [2] [3] [4]
+ JALU01 0 0 20
+ JFPU01 17 18 18
+ JLSAGU 0 0 12
Retire Control Unit - number of cycles where we saw N instructions retired:
this case, of the 610 simulated cycles, single instructions were issued 306
times (50.2%) and there were 7 cycles where no instructions were issued.
-The *Scheduler's queue usage* table shows that the maximum number of buffer
-entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01
+The *Scheduler's queue usage* table shows that the average and maximum number of
+buffer entries (i.e., scheduler queue entries) used at runtime. Resource JFPU01
reached its maximum (18 of 18 queue entries). Note that AMD Jaguar implements
three schedulers:
# CHECK-NEXT: 2, 1 (10.0%)
# CHECK: Scheduler's queue usage:
-# CHECK-NEXT: JALU01, 1/20
-# CHECK-NEXT: JFPU01, 1/18
-# CHECK-NEXT: JLSAGU, 1/12
+# CHECK-NEXT: [1] Resource name.
+# CHECK-NEXT: [2] Average number of used buffer entries.
+# CHECK-NEXT: [3] Maximum number of used buffer entries.
+# CHECK-NEXT: [4] Total number of buffer entries.
+
+# CHECK: [1] [2] [3] [4]
+# CHECK-NEXT: JALU01 0 1 20
+# CHECK-NEXT: JFPU01 0 1 18
+# CHECK-NEXT: JLSAGU 0 1 12
# CHECK: Resources:
# CHECK-NEXT: [0] - JALU0
# FULLREPORT-NEXT: 1, 100 (97.1%)
# FULLREPORT: Scheduler's queue usage:
-# FULLREPORT-NEXT: JALU01, 20/20
-# FULLREPORT-NEXT: JFPU01, 0/18
-# FULLREPORT-NEXT: JLSAGU, 0/12
+# FULLREPORT-NEXT: [1] Resource name.
+# FULLREPORT-NEXT: [2] Average number of used buffer entries.
+# FULLREPORT-NEXT: [3] Maximum number of used buffer entries.
+# FULLREPORT-NEXT: [4] Total number of buffer entries.
+
+# FULLREPORT: [1] [2] [3] [4]
+# FULLREPORT-NEXT: JALU01 15 20 20
+# FULLREPORT-NEXT: JFPU01 0 0 18
+# FULLREPORT-NEXT: JLSAGU 0 0 12
# FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired:
# FULLREPORT-NEXT: [# retired], [# cycles]
# FULL-NEXT: 1, 100 (97.1%)
# FULL: Scheduler's queue usage:
-# FULL-NEXT: JALU01, 20/20
-# FULL-NEXT: JFPU01, 0/18
-# FULL-NEXT: JLSAGU, 0/12
+# FULL-NEXT: [1] Resource name.
+# FULL-NEXT: [2] Average number of used buffer entries.
+# FULL-NEXT: [3] Maximum number of used buffer entries.
+# FULL-NEXT: [4] Total number of buffer entries.
+
+# FULL: [1] [2] [3] [4]
+# FULL-NEXT: JALU01 15 20 20
+# FULL-NEXT: JFPU01 0 0 18
+# FULL-NEXT: JLSAGU 0 0 12
# FULL: Retire Control Unit - number of cycles where we saw N instructions retired:
# FULL-NEXT: [# retired], [# cycles]
# FULLREPORT-NEXT: 1, 100 (97.1%)
# FULLREPORT: Scheduler's queue usage:
-# FULLREPORT-NEXT: JALU01, 20/20
-# FULLREPORT-NEXT: JFPU01, 0/18
-# FULLREPORT-NEXT: JLSAGU, 0/12
+# FULLREPORT-NEXT: [1] Resource name.
+# FULLREPORT-NEXT: [2] Average number of used buffer entries.
+# FULLREPORT-NEXT: [3] Maximum number of used buffer entries.
+# FULLREPORT-NEXT: [4] Total number of buffer entries.
+
+# FULLREPORT: [1] [2] [3] [4]
+# FULLREPORT-NEXT: JALU01 15 20 20
+# FULLREPORT-NEXT: JFPU01 0 0 18
+# FULLREPORT-NEXT: JLSAGU 0 0 12
# FULLREPORT: Retire Control Unit - number of cycles where we saw N instructions retired:
# FULLREPORT-NEXT: [# retired], [# cycles]
# ALL-NEXT: 1, 100 (97.1%)
# ALL: Scheduler's queue usage:
-# ALL-NEXT: JALU01, 20/20
-# ALL-NEXT: JFPU01, 0/18
-# ALL-NEXT: JLSAGU, 0/12
+# ALL-NEXT: [1] Resource name.
+# ALL-NEXT: [2] Average number of used buffer entries.
+# ALL-NEXT: [3] Maximum number of used buffer entries.
+# ALL-NEXT: [4] Total number of buffer entries.
+
+# ALL: [1] [2] [3] [4]
+# ALL-NEXT: JALU01 15 20 20
+# ALL-NEXT: JFPU01 0 0 18
+# ALL-NEXT: JLSAGU 0 0 12
# ALL: Retire Control Unit - number of cycles where we saw N instructions retired:
# ALL-NEXT: [# retired], [# cycles]
# ALL-NEXT: 0, 3 (75.0%)
# ALL-NEXT: 1, 1 (25.0%)
-# BDW: Scheduler's queue usage:
-# BDW-NEXT: BWPortAny, 1/60
-
-# HSW: Scheduler's queue usage:
-# HSW-NEXT: HWPortAny, 1/60
+# SLM: Scheduler's queue usage:
+# SLM-NEXT: No scheduler resources used.
-# KNL: Scheduler's queue usage:
-# KNL-NEXT: HWPortAny, 1/60
+# BDW: Scheduler's queue usage:
+# BDW-NEXT: [1] Resource name.
+# BDW-NEXT: [2] Average number of used buffer entries.
+# BDW-NEXT: [3] Maximum number of used buffer entries.
+# BDW-NEXT: [4] Total number of buffer entries.
# BTVER2: Scheduler's queue usage:
-# BTVER2-NEXT: JALU01, 1/20
-# BTVER2-NEXT: JFPU01, 0/18
-# BTVER2-NEXT: JLSAGU, 0/12
+# BTVER2-NEXT: [1] Resource name.
+# BTVER2-NEXT: [2] Average number of used buffer entries.
+# BTVER2-NEXT: [3] Maximum number of used buffer entries.
+# BTVER2-NEXT: [4] Total number of buffer entries.
-# SLM: Scheduler's queue usage:
-# SLM-NEXT: No scheduler resources used.
+# HSW: Scheduler's queue usage:
+# HSW-NEXT: [1] Resource name.
+# HSW-NEXT: [2] Average number of used buffer entries.
+# HSW-NEXT: [3] Maximum number of used buffer entries.
+# HSW-NEXT: [4] Total number of buffer entries.
# IVB: Scheduler's queue usage:
-# IVB-NEXT: SBPortAny, 1/54
+# IVB-NEXT: [1] Resource name.
+# IVB-NEXT: [2] Average number of used buffer entries.
+# IVB-NEXT: [3] Maximum number of used buffer entries.
+# IVB-NEXT: [4] Total number of buffer entries.
-# SNB: Scheduler's queue usage:
-# SNB-NEXT: SBPortAny, 1/54
+# KNL: Scheduler's queue usage:
+# KNL-NEXT: [1] Resource name.
+# KNL-NEXT: [2] Average number of used buffer entries.
+# KNL-NEXT: [3] Maximum number of used buffer entries.
+# KNL-NEXT: [4] Total number of buffer entries.
# SKX: Scheduler's queue usage:
-# SKX-NEXT: SKLPortAny, 1/60
+# SKX-NEXT: [1] Resource name.
+# SKX-NEXT: [2] Average number of used buffer entries.
+# SKX-NEXT: [3] Maximum number of used buffer entries.
+# SKX-NEXT: [4] Total number of buffer entries.
# SKX-AVX512: Scheduler's queue usage:
-# SKX-AVX512-NEXT: SKXPortAny, 1/60
+# SKX-AVX512-NEXT: [1] Resource name.
+# SKX-AVX512-NEXT: [2] Average number of used buffer entries.
+# SKX-AVX512-NEXT: [3] Maximum number of used buffer entries.
+# SKX-AVX512-NEXT: [4] Total number of buffer entries.
+
+# SNB: Scheduler's queue usage:
+# SNB-NEXT: [1] Resource name.
+# SNB-NEXT: [2] Average number of used buffer entries.
+# SNB-NEXT: [3] Maximum number of used buffer entries.
+# SNB-NEXT: [4] Total number of buffer entries.
# ZNVER1: Scheduler's queue usage:
-# ZNVER1-NEXT: ZnAGU, 0/28
-# ZNVER1-NEXT: ZnALU, 1/56
-# ZNVER1-NEXT: ZnFPU, 0/36
+# ZNVER1-NEXT: [1] Resource name.
+# ZNVER1-NEXT: [2] Average number of used buffer entries.
+# ZNVER1-NEXT: [3] Maximum number of used buffer entries.
+# ZNVER1-NEXT: [4] Total number of buffer entries.
+
+# BDW: [1] [2] [3] [4]
+# BDW-NEXT: BWPortAny 0 1 60
+
+# HSW: [1] [2] [3] [4]
+# HSW-NEXT: HWPortAny 0 1 60
+
+# KNL: [1] [2] [3] [4]
+# KNL-NEXT: HWPortAny 0 1 60
+
+# BTVER2: [1] [2] [3] [4]
+# BTVER2-NEXT: JALU01 0 1 20
+# BTVER2-NEXT: JFPU01 0 0 18
+# BTVER2-NEXT: JLSAGU 0 0 12
+
+# IVB: [1] [2] [3] [4]
+# IVB-NEXT: SBPortAny 0 1 54
+
+# SNB: [1] [2] [3] [4]
+# SNB-NEXT: SBPortAny 0 1 54
+
+# SKX: [1] [2] [3] [4]
+# SKX-NEXT: SKLPortAny 0 1 60
+
+# SKX-AVX512: [1] [2] [3] [4]
+# SKX-AVX512-NEXT: SKXPortAny 0 1 60
+
+# ZNVER1: [1] [2] [3] [4]
+# ZNVER1-NEXT: ZnAGU 0 0 28
+# ZNVER1-NEXT: ZnALU 0 1 56
+# ZNVER1-NEXT: ZnFPU 0 0 36
#include "Views/SchedulerStatistics.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/FormattedStream.h"
using namespace llvm;
void SchedulerStatistics::onReservedBuffers(ArrayRef<unsigned> Buffers) {
for (const unsigned Buffer : Buffers) {
- if (BufferedResources.find(Buffer) != BufferedResources.end()) {
- BufferUsage &BU = BufferedResources[Buffer];
- BU.SlotsInUse++;
- BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
- continue;
- }
-
- BufferedResources.insert(
- std::pair<unsigned, BufferUsage>(Buffer, {1U, 1U}));
+ BufferUsage &BU = Usage[Buffer];
+ BU.SlotsInUse++;
+ BU.MaxUsedSlots = std::max(BU.MaxUsedSlots, BU.SlotsInUse);
}
}
void SchedulerStatistics::onReleasedBuffers(ArrayRef<unsigned> Buffers) {
- for (const unsigned Buffer : Buffers) {
- assert(BufferedResources.find(Buffer) != BufferedResources.end() &&
- "Buffered resource not in map?");
- BufferUsage &BU = BufferedResources[Buffer];
- BU.SlotsInUse--;
- }
+ for (const unsigned Buffer : Buffers)
+ Usage[Buffer].SlotsInUse--;
}
-void SchedulerStatistics::printSchedulerStatistics(
- llvm::raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nSchedulers - number of cycles where we saw N instructions "
- "issued:\n";
- TempStream << "[# issued], [# cycles]\n";
- for (const std::pair<unsigned, unsigned> &Entry : IssuedPerCycle) {
- TempStream << " " << Entry.first << ", " << Entry.second << " ("
- << format("%.1f", ((double)Entry.second / NumCycles) * 100)
- << "%)\n";
- }
+void SchedulerStatistics::updateHistograms() {
+ for (BufferUsage &BU : Usage)
+ BU.CumulativeNumUsedSlots += BU.SlotsInUse;
+ IssuedPerCycle[NumIssued]++;
+ NumIssued = 0;
+}
+
+void SchedulerStatistics::printSchedulerStats(raw_ostream &OS) const {
+ OS << "\n\nSchedulers - "
+ << "number of cycles where we saw N instructions issued:\n";
+ OS << "[# issued], [# cycles]\n";
+
+ const auto It =
+ std::max_element(IssuedPerCycle.begin(), IssuedPerCycle.end());
+ unsigned Index = std::distance(IssuedPerCycle.begin(), It);
+
+ bool HasColors = OS.has_colors();
+ for (unsigned I = 0, E = IssuedPerCycle.size(); I < E; ++I) {
+ unsigned IPC = IssuedPerCycle[I];
+ if (!IPC)
+ continue;
+
+ if (I == Index && HasColors)
+ OS.changeColor(raw_ostream::SAVEDCOLOR, true, false);
- TempStream.flush();
- OS << Buffer;
+ OS << " " << I << ", " << IPC << " ("
+ << format("%.1f", ((double)IPC / NumCycles) * 100) << "%)\n";
+ if (HasColors)
+ OS.resetColor();
+ }
}
void SchedulerStatistics::printSchedulerUsage(raw_ostream &OS) const {
- std::string Buffer;
- raw_string_ostream TempStream(Buffer);
- TempStream << "\n\nScheduler's queue usage:\n";
- // Early exit if no buffered resources were consumed.
- if (BufferedResources.empty()) {
- TempStream << "No scheduler resources used.\n";
- TempStream.flush();
- OS << Buffer;
+ assert(NumCycles && "Unexpected number of cycles!");
+
+ OS << "\nScheduler's queue usage:\n";
+ if (all_of(Usage, [](const BufferUsage &BU) { return !BU.MaxUsedSlots; })) {
+ OS << "No scheduler resources used.\n";
return;
}
+ OS << "[1] Resource name.\n"
+ << "[2] Average number of used buffer entries.\n"
+ << "[3] Maximum number of used buffer entries.\n"
+ << "[4] Total number of buffer entries.\n\n"
+ << " [1] [2] [3] [4]\n";
+
+ formatted_raw_ostream FOS(OS);
+ bool HasColors = FOS.has_colors();
for (unsigned I = 0, E = SM.getNumProcResourceKinds(); I < E; ++I) {
const MCProcResourceDesc &ProcResource = *SM.getProcResource(I);
if (ProcResource.BufferSize <= 0)
continue;
- const auto It = BufferedResources.find(I);
- unsigned MaxUsedSlots =
- It == BufferedResources.end() ? 0 : It->second.MaxUsedSlots;
- TempStream << ProcResource.Name << ", " << MaxUsedSlots << '/'
- << ProcResource.BufferSize << '\n';
+ const BufferUsage &BU = Usage[I];
+ double AvgUsage = (double)BU.CumulativeNumUsedSlots / NumCycles;
+ double AlmostFullThreshold = (double)(ProcResource.BufferSize * 4) / 5;
+ unsigned NormalizedAvg = floor((AvgUsage * 10) + 0.5) / 10;
+ unsigned NormalizedThreshold = floor((AlmostFullThreshold * 10) + 0.5) / 10;
+
+ FOS << ProcResource.Name;
+ FOS.PadToColumn(17);
+ if (HasColors && NormalizedAvg >= NormalizedThreshold)
+ FOS.changeColor(raw_ostream::YELLOW, true, false);
+ FOS << NormalizedAvg;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(28);
+ if (HasColors &&
+ BU.MaxUsedSlots == static_cast<unsigned>(ProcResource.BufferSize))
+ FOS.changeColor(raw_ostream::RED, true, false);
+ FOS << BU.MaxUsedSlots;
+ if (HasColors)
+ FOS.resetColor();
+ FOS.PadToColumn(39);
+ FOS << ProcResource.BufferSize << '\n';
}
- TempStream.flush();
- OS << Buffer;
+ FOS.flush();
+}
+
+void SchedulerStatistics::printView(llvm::raw_ostream &OS) const {
+ printSchedulerStats(OS);
+ printSchedulerUsage(OS);
}
+
} // namespace mca
///
/// Schedulers - number of cycles where we saw N instructions issued:
/// [# issued], [# cycles]
-/// 0, 7 (5.4%)
-/// 1, 4 (3.1%)
-/// 2, 8 (6.2%)
+/// 0, 6 (2.9%)
+/// 1, 106 (50.7%)
+/// 2, 97 (46.4%)
///
/// Scheduler's queue usage:
-/// JALU01, 0/20
-/// JFPU01, 18/18
-/// JLSAGU, 0/12
+/// [1] Resource name.
+/// [2] Average number of used buffer entries.
+/// [3] Maximum number of used buffer entries.
+/// [4] Total number of buffer entries.
///
+/// [1] [2] [3] [4]
+/// JALU01 0 0 20
+/// JFPU01 15 18 18
+/// JLSAGU 0 0 12
+//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TOOLS_LLVM_MCA_SCHEDULERSTATISTICS_H
namespace mca {
-class SchedulerStatistics : public View {
+class SchedulerStatistics final : public View {
const llvm::MCSchedModel &SM;
-
- using Histogram = std::map<unsigned, unsigned>;
- Histogram IssuedPerCycle;
-
unsigned NumIssued;
unsigned NumCycles;
struct BufferUsage {
unsigned SlotsInUse;
unsigned MaxUsedSlots;
+ uint64_t CumulativeNumUsedSlots;
};
- std::map<unsigned, BufferUsage> BufferedResources;
-
- void updateHistograms() {
- IssuedPerCycle[NumIssued]++;
- NumIssued = 0;
- }
+ std::vector<unsigned> IssuedPerCycle;
+ std::vector<BufferUsage> Usage;
- void printSchedulerStatistics(llvm::raw_ostream &OS) const;
+ void updateHistograms();
+ void printSchedulerStats(llvm::raw_ostream &OS) const;
void printSchedulerUsage(llvm::raw_ostream &OS) const;
public:
SchedulerStatistics(const llvm::MCSubtargetInfo &STI)
- : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0) {}
+ : SM(STI.getSchedModel()), NumIssued(0), NumCycles(0),
+ IssuedPerCycle(STI.getSchedModel().NumProcResourceKinds, 0),
+ Usage(STI.getSchedModel().NumProcResourceKinds, {0, 0, 0}) {}
void onEvent(const HWInstructionEvent &Event) override;
// buffered resource in the Buffers set.
void onReleasedBuffers(llvm::ArrayRef<unsigned> Buffers) override;
- void printView(llvm::raw_ostream &OS) const override {
- printSchedulerStatistics(OS);
- printSchedulerUsage(OS);
- }
+ void printView(llvm::raw_ostream &OS) const override;
};
} // namespace mca