[Exegesis] Native clusterization: sub-partition by sched class id

author Roman Lebedev <lebedev.ri@gmail.com>

Tue, 7 Sep 2021 14:09:58 +0000 (17:09 +0300)

committer Roman Lebedev <lebedev.ri@gmail.com>

Tue, 7 Sep 2021 14:54:37 +0000 (17:54 +0300)
author Roman Lebedev <lebedev.ri@gmail.com>
Tue, 7 Sep 2021 14:09:58 +0000 (17:09 +0300)
committer Roman Lebedev <lebedev.ri@gmail.com>
Tue, 7 Sep 2021 14:54:37 +0000 (17:54 +0300)
diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test b/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test

index 8619082..9c8eec0 100644 (file)
--- a/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test
+++ b/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test
@@ -1,9 +1,15 @@
  # RUN: llvm-exegesis -mcpu=znver3 -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
  
+# Naive clusterization mainly groups by instruction opcode,
+# but it should also partition the benchmarks of the same opcode
+# by the sched class. For example, a regular `xor`, and same-operand `xor`
+# may have different characteristics, and it will be confusing/misleading
+# to group them.
+
  # CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,latency{{$}}
  # CHECK-CLUSTERS-NEXT: {{^}}0,
  # CHECK-CLUSTERS-SAME: ,1.00{{$}}
-# CHECK-CLUSTERS-NEXT: {{^}}0,
+# CHECK-CLUSTERS:      {{^}}1,
  # CHECK-CLUSTERS-SAME: ,0.20{{$}}
  
  ---
diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.cpp b/llvm/tools/llvm-exegesis/lib/Analysis.cpp

index be360b9..b12f872 100644 (file)
--- a/llvm/tools/llvm-exegesis/lib/Analysis.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Analysis.cpp
@@ -151,12 +151,15 @@ void Analysis::printInstructionRowCsv(const size_t PointId,
    OS << "\n";
  }
  
-Analysis::Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
+Analysis::Analysis(const Target &Target,
+                   std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
+                   std::unique_ptr<MCInstrInfo> InstrInfo,
                     const InstructionBenchmarkClustering &Clustering,
                     double AnalysisInconsistencyEpsilon,
                     bool AnalysisDisplayUnstableOpcodes,
                     const std::string &ForceCpuName)
-    : Clustering_(Clustering), InstrInfo_(std::move(InstrInfo)),
+    : Clustering_(Clustering), SubtargetInfo_(std::move(SubtargetInfo)),
+      InstrInfo_(std::move(InstrInfo)),
        AnalysisInconsistencyEpsilonSquared_(AnalysisInconsistencyEpsilon *
                                             AnalysisInconsistencyEpsilon),
        AnalysisDisplayUnstableOpcodes_(AnalysisDisplayUnstableOpcodes) {
diff --git a/llvm/tools/llvm-exegesis/lib/Analysis.h b/llvm/tools/llvm-exegesis/lib/Analysis.h

index 9d8b04c..c52948a 100644 (file)
--- a/llvm/tools/llvm-exegesis/lib/Analysis.h
+++ b/llvm/tools/llvm-exegesis/lib/Analysis.h
@@ -36,7 +36,8 @@ namespace exegesis {
  // A helper class to analyze benchmark results for a target.
  class Analysis {
  public:
-  Analysis(const Target &Target, std::unique_ptr<MCInstrInfo> InstrInfo,
+  Analysis(const Target &Target, std::unique_ptr<MCSubtargetInfo> SubtargetInfo,
+           std::unique_ptr<MCInstrInfo> InstrInfo,
             const InstructionBenchmarkClustering &Clustering,
             double AnalysisInconsistencyEpsilon,
             bool AnalysisDisplayUnstableOpcodes,
diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.cpp b/llvm/tools/llvm-exegesis/lib/Clustering.cpp

index c9070ef..08646aa 100644 (file)
--- a/llvm/tools/llvm-exegesis/lib/Clustering.cpp
+++ b/llvm/tools/llvm-exegesis/lib/Clustering.cpp
@@ -8,13 +8,15 @@
  
  #include "Clustering.h"
  #include "Error.h"
+#include "SchedClassResolution.h"
+#include "llvm/ADT/MapVector.h"
  #include "llvm/ADT/SetVector.h"
  #include "llvm/ADT/SmallSet.h"
  #include "llvm/ADT/SmallVector.h"
  #include <algorithm>
+#include <deque>
  #include <string>
  #include <vector>
-#include <deque>
  
  namespace llvm {
  namespace exegesis {
@@ -183,46 +185,58 @@ void InstructionBenchmarkClustering::clusterizeDbScan(const size_t MinPts) {
    }
  }
  
-void InstructionBenchmarkClustering::clusterizeNaive(unsigned NumOpcodes) {
-  // Given an instruction Opcode, which are the benchmarks of this instruction?
-  std::vector<SmallVector<size_t, 1>> OpcodeToPoints;
-  OpcodeToPoints.resize(NumOpcodes);
-  size_t NumOpcodesSeen = 0;
+void InstructionBenchmarkClustering::clusterizeNaive(
+    const MCSubtargetInfo &SubtargetInfo, const MCInstrInfo &InstrInfo) {
+  // Given an instruction Opcode, which sched class id's are represented,
+  // and which are the benchmarks for each sched class?
+  std::vector<SmallMapVector<unsigned, SmallVector<size_t, 1>, 1>>
+      OpcodeToSchedClassesToPoints;
+  const unsigned NumOpcodes = InstrInfo.getNumOpcodes();
+  OpcodeToSchedClassesToPoints.resize(NumOpcodes);
+  size_t NumClusters = 0;
    for (size_t P = 0, NumPoints = Points_.size(); P < NumPoints; ++P) {
      const InstructionBenchmark &Point = Points_[P];
-    const unsigned Opcode = Point.keyInstruction().getOpcode();
+    const MCInst &MCI = Point.keyInstruction();
+    unsigned SchedClassId;
+    std::tie(SchedClassId, std::ignore) =
+        ResolvedSchedClass::resolveSchedClassId(SubtargetInfo, InstrInfo, MCI);
+    const unsigned Opcode = MCI.getOpcode();
      assert(Opcode < NumOpcodes && "NumOpcodes is incorrect (too small)");
-    SmallVectorImpl<size_t> &PointsOfOpcode = OpcodeToPoints[Opcode];
-    if (PointsOfOpcode.empty()) // If we previously have not seen any points of
-      ++NumOpcodesSeen; // this opcode, then naturally this is the new opcode.
-    PointsOfOpcode.emplace_back(P);
+    auto &Points = OpcodeToSchedClassesToPoints[Opcode][SchedClassId];
+    if (Points.empty()) // If we previously have not seen any points of
+      ++NumClusters;    // this opcode's sched class, then new cluster begins.
+    Points.emplace_back(P);
    }
-  assert(OpcodeToPoints.size() == NumOpcodes && "sanity check");
-  assert(NumOpcodesSeen <= NumOpcodes &&
+  assert(NumClusters <= NumOpcodes &&
           "can't see more opcodes than there are total opcodes");
-  assert(NumOpcodesSeen <= Points_.size() &&
+  assert(NumClusters <= Points_.size() &&
           "can't see more opcodes than there are total points");
  
-  Clusters_.reserve(NumOpcodesSeen); // One cluster per opcode.
-  for (ArrayRef<size_t> PointsOfOpcode :
-       make_filter_range(OpcodeToPoints, [](ArrayRef<size_t> PointsOfOpcode) {
-         return !PointsOfOpcode.empty(); // Ignore opcodes with no points.
-       })) {
-    // Create a new cluster.
-    Clusters_.emplace_back(ClusterId::makeValid(
-        Clusters_.size(), /*IsUnstable=*/!areAllNeighbours(PointsOfOpcode)));
-    Cluster &CurrentCluster = Clusters_.back();
-    // Mark points as belonging to the new cluster.
-    for_each(PointsOfOpcode, [this, &CurrentCluster](size_t P) {
-      ClusterIdForPoint_[P] = CurrentCluster.Id;
-    });
-    // And add all the points of this opcode to the new cluster.
-    CurrentCluster.PointIndices.reserve(PointsOfOpcode.size());
-    CurrentCluster.PointIndices.assign(PointsOfOpcode.begin(),
-                                       PointsOfOpcode.end());
-    assert(CurrentCluster.PointIndices.size() == PointsOfOpcode.size());
+  Clusters_.reserve(NumClusters); // We already know how many clusters there is.
+  for (const auto &SchedClassesOfOpcode : OpcodeToSchedClassesToPoints) {
+    if (SchedClassesOfOpcode.empty())
+      continue;
+    for (ArrayRef<size_t> PointsOfSchedClass :
+         make_second_range(SchedClassesOfOpcode)) {
+      if (PointsOfSchedClass.empty())
+        continue;
+      // Create a new cluster.
+      Clusters_.emplace_back(ClusterId::makeValid(
+          Clusters_.size(),
+          /*IsUnstable=*/!areAllNeighbours(PointsOfSchedClass)));
+      Cluster &CurrentCluster = Clusters_.back();
+      // Mark points as belonging to the new cluster.
+      for_each(PointsOfSchedClass, [this, &CurrentCluster](size_t P) {
+        ClusterIdForPoint_[P] = CurrentCluster.Id;
+      });
+      // And add all the points of this opcode's sched class to the new cluster.
+      CurrentCluster.PointIndices.reserve(PointsOfSchedClass.size());
+      CurrentCluster.PointIndices.assign(PointsOfSchedClass.begin(),
+                                         PointsOfSchedClass.end());
+      assert(CurrentCluster.PointIndices.size() == PointsOfSchedClass.size());
+    }
    }
-  assert(Clusters_.size() == NumOpcodesSeen);
+  assert(Clusters_.size() == NumClusters);
  }
  
  // Given an instruction Opcode, we can make benchmarks (measurements) of the
@@ -317,7 +331,7 @@ void InstructionBenchmarkClustering::stabilize(unsigned NumOpcodes) {
  Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
      const std::vector<InstructionBenchmark> &Points, const ModeE Mode,
      const size_t DbscanMinPts, const double AnalysisClusteringEpsilon,
-    Optional<unsigned> NumOpcodes) {
+    const MCSubtargetInfo *SubtargetInfo, const MCInstrInfo *InstrInfo) {
    InstructionBenchmarkClustering Clustering(
        Points, AnalysisClusteringEpsilon * AnalysisClusteringEpsilon);
    if (auto Error = Clustering.validateAndSetup()) {
@@ -330,13 +344,13 @@ Expected<InstructionBenchmarkClustering> InstructionBenchmarkClustering::create(
    if (Mode == ModeE::Dbscan) {
      Clustering.clusterizeDbScan(DbscanMinPts);
  
-    if (NumOpcodes.hasValue())
-      Clustering.stabilize(NumOpcodes.getValue());
+    if (InstrInfo)
+      Clustering.stabilize(InstrInfo->getNumOpcodes());
    } else /*if(Mode == ModeE::Naive)*/ {
-    if (!NumOpcodes.hasValue())
-      return make_error<Failure>(
-          "'naive' clustering mode requires opcode count to be specified");
-    Clustering.clusterizeNaive(NumOpcodes.getValue());
+    if (!SubtargetInfo || !InstrInfo)
+      return make_error<Failure>("'naive' clustering mode requires "
+                                 "SubtargetInfo and InstrInfo to be present");
+    Clustering.clusterizeNaive(*SubtargetInfo, *InstrInfo);
    }
  
    return Clustering;
diff --git a/llvm/tools/llvm-exegesis/lib/Clustering.h b/llvm/tools/llvm-exegesis/lib/Clustering.h

index 449ce40..a4da3af 100644 (file)
--- a/llvm/tools/llvm-exegesis/lib/Clustering.h
+++ b/llvm/tools/llvm-exegesis/lib/Clustering.h
@@ -32,7 +32,8 @@ public:
    static Expected<InstructionBenchmarkClustering>
    create(const std::vector<InstructionBenchmark> &Points, ModeE Mode,
           size_t DbscanMinPts, double AnalysisClusteringEpsilon,
-         Optional<unsigned> NumOpcodes = None);
+         const MCSubtargetInfo *SubtargetInfo = nullptr,
+         const MCInstrInfo *InstrInfo = nullptr);
  
    class ClusterId {
    public:
@@ -126,7 +127,8 @@ private:
    Error validateAndSetup();
  
    void clusterizeDbScan(size_t MinPts);
-  void clusterizeNaive(unsigned NumOpcodes);
+  void clusterizeNaive(const MCSubtargetInfo &SubtargetInfo,
+                       const MCInstrInfo &InstrInfo);
  
    // Stabilization is only needed if dbscan was used to clusterize.
    void stabilize(unsigned NumOpcodes);
diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp

index 220f404..178270c 100644 (file)
--- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
+++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp
@@ -435,16 +435,19 @@ static void analysisMain() {
      return;
    }
  
+  std::unique_ptr<MCSubtargetInfo> SubtargetInfo(
+      TheTarget->createMCSubtargetInfo(Points[0].LLVMTriple, CpuName, ""));
+
    std::unique_ptr<MCInstrInfo> InstrInfo(TheTarget->createMCInstrInfo());
    assert(InstrInfo && "Unable to create instruction info!");
  
    const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
        Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
-      AnalysisClusteringEpsilon, InstrInfo->getNumOpcodes()));
+      AnalysisClusteringEpsilon, SubtargetInfo.get(), InstrInfo.get()));
  
-  const Analysis Analyzer(*TheTarget, std::move(InstrInfo), Clustering,
-                          AnalysisInconsistencyEpsilon,
-                          AnalysisDisplayUnstableOpcodes, CpuName);
+  const Analysis Analyzer(
+      *TheTarget, std::move(SubtargetInfo), std::move(InstrInfo), Clustering,
+      AnalysisInconsistencyEpsilon, AnalysisDisplayUnstableOpcodes, CpuName);
  
    maybeRunAnalysis<Analysis::PrintClusters>(Analyzer, "analysis clusters",
                                              AnalysisClustersOutputFile);
author	Roman Lebedev <lebedev.ri@gmail.com>
	Tue, 7 Sep 2021 14:09:58 +0000 (17:09 +0300)
committer	Roman Lebedev <lebedev.ri@gmail.com>
	Tue, 7 Sep 2021 14:54:37 +0000 (17:54 +0300)
llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test		patch \| blob \| history
llvm/tools/llvm-exegesis/lib/Analysis.cpp		patch \| blob \| history
llvm/tools/llvm-exegesis/lib/Analysis.h		patch \| blob \| history
llvm/tools/llvm-exegesis/lib/Clustering.cpp		patch \| blob \| history
llvm/tools/llvm-exegesis/lib/Clustering.h		patch \| blob \| history
llvm/tools/llvm-exegesis/llvm-exegesis.cpp		patch \| blob \| history