[Exegesis] Native clusterization: sub-partition by sched class id
authorRoman Lebedev <lebedev.ri@gmail.com>
Tue, 7 Sep 2021 14:09:58 +0000 (17:09 +0300)
committerRoman Lebedev <lebedev.ri@gmail.com>
Tue, 7 Sep 2021 14:54:37 +0000 (17:54 +0300)
Currently native clusterization simply groups all benchmarks
by the opcode of key instruction, but that is suboptimal in certain cases,
e.g. where we can already tell that the particular instructions
already resolve into different sched classes.

llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test
llvm/tools/llvm-exegesis/lib/Analysis.cpp
llvm/tools/llvm-exegesis/lib/Analysis.h
llvm/tools/llvm-exegesis/lib/Clustering.cpp
llvm/tools/llvm-exegesis/lib/Clustering.h
llvm/tools/llvm-exegesis/llvm-exegesis.cpp

index 8619082..9c8eec0 100644 (file)
@@ -1,9 +1,15 @@
 # RUN: llvm-exegesis -mcpu=znver3 -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
 
+# Naive clusterization mainly groups by instruction opcode,
+# but it should also partition the benchmarks of the same opcode
+# by the sched class. For example, a regular `xor`, and same-operand `xor`
+# may have different characteristics, and it will be confusing/misleading
+# to group them.
+
 # CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
 # CHECK-CLUSTERS-NEXT: {{^}}0,
 # CHECK-CLUSTERS-SAME: ,1.00{{$}}
-# CHECK-CLUSTERS-NEXT: {{^}}0,
+# CHECK-CLUSTERS:      {{^}}1,
 # CHECK-CLUSTERS-SAME: ,0.20{{$}}
 
 ---
index be360b9..b12f872 100644 (file)
@@ -151,12 +151,15 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
   OS << "\n";
 }
 
-Analysis::Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
+Analysis::Analysis(const Target &Target,
+                   std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
+                   std::unique_ptr<MCInstrInfo> InstrInfo,
                    const InstructionBenchmarkClustering &Clustering,
                    double AnalysisInconsistencyEpsilon,
                    bool AnalysisDisplayUnstableOpcodes,
                    const std::string &ForceCpuName)
-    : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
+    : Clustering_(Clustering), SubtargetInfo_(std::move(SubtargetInfo)),
+      InstrInfo_(std::move(InstrInfo)),
       AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
                                            AnalysisInconsistencyEpsilon),
       AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
index 9d8b04c..c52948a 100644 (file)
@@ -36,7 +36,8 @@ namespace exegesis {
 // A helper class to analyze benchmark results for a target.
 class Analysis {
 public:
-  Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
+  Analysis(const Target &Target, std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
+           std::unique_ptr<MCInstrInfo> InstrInfo,
            const InstructionBenchmarkClustering &Clustering,
            double AnalysisInconsistencyEpsilon,
            bool AnalysisDisplayUnstableOpcodes,
index c9070ef..08646aa 100644 (file)
@@ -8,13 +8,15 @@
 
 #include "Clustering.h"
 #include "Error.h"
+#include "SchedClassResolution.h"
+#include "llvm/ADT/MapVector.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include <algorithm>
+#include <deque>
 #include <string>
 #include <vector>
-#include <deque>
 
 namespace llvm {
 namespace exegesis {
@@ -183,46 +185,58 @@ void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
   }
 }
 
-void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
-  // Given an instruction Opcode, which are the benchmarks of this instruction?
-  std::vector<SmallVector<size_t, 1>> OpcodeToPoints;
-  OpcodeToPoints.resize(NumOpcodes);
-  size_t NumOpcodesSeen = 0;
+void InstructionBenchmarkClustering::clusterizeNaive(
+    const MCSubtargetInfo &SubtargetInfo, const MCInstrInfo &InstrInfo) {
+  // Given an instruction Opcode, which sched class id's are represented,
+  // and which are the benchmarks for each sched class?
+  std::vector<SmallMapVector<unsigned, SmallVector<size_t, 1>, 1>>
+      OpcodeToSchedClassesToPoints;
+  const unsigned NumOpcodes = InstrInfo.getNumOpcodes();
+  OpcodeToSchedClassesToPoints.resize(NumOpcodes);
+  size_t NumClusters = 0;
   for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
     const InstructionBenchmark &Point = Points_[P];
-    const unsigned Opcode = Point.keyInstruction().getOpcode();
+    const MCInst &MCI = Point.keyInstruction();
+    unsigned SchedClassId;
+    std::tie(SchedClassId, std::ignore) =
+        ResolvedSchedClass::resolveSchedClassId(SubtargetInfo, InstrInfo, MCI);
+    const unsigned Opcode = MCI.getOpcode();
     assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
-    SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
-    if (PointsOfOpcode.empty()) // If we previously have not seen any points of
-      ++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
-    PointsOfOpcode.emplace_back(P);
+    auto &Points = OpcodeToSchedClassesToPoints[Opcode][SchedClassId];
+    if (Points.empty()) // If we previously have not seen any points of
+      ++NumClusters;    // this opcode's sched class, then new cluster begins.
+    Points.emplace_back(P);
   }
-  assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
-  assert(NumOpcodesSeen <= NumOpcodes &&
+  assert(NumClusters <= NumOpcodes &&
          "can't see more opcodes than there are total opcodes");
-  assert(NumOpcodesSeen <= Points_.size() &&
+  assert(NumClusters <= Points_.size() &&
          "can't see more opcodes than there are total points");
 
-  Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
-  for (ArrayRef<size_t> PointsOfOpcode :
-       make_filter_range(OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
-         return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
-       })) {
-    // Create a new cluster.
-    Clusters_.emplace_back(ClusterId::makeValid(
-        Clusters_.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode)));
-    Cluster &CurrentCluster = Clusters_.back();
-    // Mark points as belonging to the new cluster.
-    for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
-      ClusterIdForPoint_[P] = CurrentCluster.Id;
-    });
-    // And add all the points of this opcode to the new cluster.
-    CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
-    CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
-                                       PointsOfOpcode.end());
-    assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
+  Clusters_.reserve(NumClusters); // We already know how many clusters there is.
+  for (const auto &SchedClassesOfOpcode : OpcodeToSchedClassesToPoints) {
+    if (SchedClassesOfOpcode.empty())
+      continue;
+    for (ArrayRef<size_t> PointsOfSchedClass :
+         make_second_range(SchedClassesOfOpcode)) {
+      if (PointsOfSchedClass.empty())
+        continue;
+      // Create a new cluster.
+      Clusters_.emplace_back(ClusterId::makeValid(
+          Clusters_.size(),
+          /*IsUnstable=*/!areAllNeighbours(PointsOfSchedClass)));
+      Cluster &CurrentCluster = Clusters_.back();
+      // Mark points as belonging to the new cluster.
+      for_each(PointsOfSchedClass, [this, &CurrentCluster](size_t P) {
+        ClusterIdForPoint_[P] = CurrentCluster.Id;
+      });
+      // And add all the points of this opcode's sched class to the new cluster.
+      CurrentCluster.PointIndices.reserve(PointsOfSchedClass.size());
+      CurrentCluster.PointIndices.assign(PointsOfSchedClass.begin(),
+                                         PointsOfSchedClass.end());
+      assert(CurrentCluster.PointIndices.size() == PointsOfSchedClass.size());
+    }
   }
-  assert(Clusters_.size() == NumOpcodesSeen);
+  assert(Clusters_.size() == NumClusters);
 }
 
 // Given an instruction Opcode, we can make benchmarks (measurements) of the
@@ -317,7 +331,7 @@ void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
 Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
     const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
     const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
-    Optional<unsigned> NumOpcodes) {
+    const MCSubtargetInfo *SubtargetInfo, const MCInstrInfo *InstrInfo) {
   InstructionBenchmarkClustering Clustering(
       Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
   if (auto Error = Clustering.validateAndSetup()) {
@@ -330,13 +344,13 @@ Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
   if (Mode == ModeE::Dbscan) {
     Clustering.clusterizeDbScan(DbscanMinPts);
 
-    if (NumOpcodes.hasValue())
-      Clustering.stabilize(NumOpcodes.getValue());
+    if (InstrInfo)
+      Clustering.stabilize(InstrInfo->getNumOpcodes());
   } else /*if(Mode == ModeE::Naive)*/ {
-    if (!NumOpcodes.hasValue())
-      return make_error<Failure>(
-          "'naive' clustering mode requires opcode count to be specified");
-    Clustering.clusterizeNaive(NumOpcodes.getValue());
+    if (!SubtargetInfo || !InstrInfo)
+      return make_error<Failure>("'naive' clustering mode requires "
+                                 "SubtargetInfo and InstrInfo to be present");
+    Clustering.clusterizeNaive(*SubtargetInfo, *InstrInfo);
   }
 
   return Clustering;
index 449ce40..a4da3af 100644 (file)
@@ -32,7 +32,8 @@ public:
   static Expected<InstructionBenchmarkClustering>
   create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
          size_t DbscanMinPts, double AnalysisClusteringEpsilon,
-         Optional<unsigned> NumOpcodes = None);
+         const MCSubtargetInfo *SubtargetInfo = nullptr,
+         const MCInstrInfo *InstrInfo = nullptr);
 
   class ClusterId {
   public:
@@ -126,7 +127,8 @@ private:
   Error validateAndSetup();
 
   void clusterizeDbScan(size_t MinPts);
-  void clusterizeNaive(unsigned NumOpcodes);
+  void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
+                       const MCInstrInfo &InstrInfo);
 
   // Stabilization is only needed if dbscan was used to clusterize.
   void stabilize(unsigned NumOpcodes);
index 220f404..178270c 100644 (file)
@@ -435,16 +435,19 @@ static void analysisMain() {
     return;
   }
 
+  std::unique_ptr<MCSubtargetInfo> SubtargetInfo(
+      TheTarget->createMCSubtargetInfo(Points[0].LLVMTriple, CpuName, ""));
+
   std::unique_ptr<MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
   assert(InstrInfo && "Unable to create instruction info!");
 
   const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
       Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
-      AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));
+      AnalysisClusteringEpsilon, SubtargetInfo.get(), InstrInfo.get()));
 
-  const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
-                          AnalysisInconsistencyEpsilon,
-                          AnalysisDisplayUnstableOpcodes, CpuName);
+  const Analysis Analyzer(
+      *TheTarget, std::move(SubtargetInfo), std::move(InstrInfo), Clustering,
+      AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes, CpuName);
 
   maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
                                             AnalysisClustersOutputFile);