STATIC
BenchmarkResult.cpp
BenchmarkRunner.cpp
+ Clustering.cpp
InMemoryAssembler.cpp
InstructionSnippetGenerator.cpp
Latency.cpp
--- /dev/null
+//===-- Clustering.cpp ------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Clustering.h"
+#include <string>
+#include <unordered_set>
+
+namespace exegesis {
+
+// The clustering problem has the following characteristics:
+// (A) - Low dimension (dimensions are typically proc resource units,
+// typically < 10).
+// (B) - Number of points : ~thousands (points are measurements of an MCInst)
+// (C) - Number of clusters: ~tens.
+// (D) - The number of clusters is not known /a priory/.
+// (E) - The amount of noise is relatively small.
+// The problem is rather small. In terms of algorithms, (D) disqualifies
+// k-means and makes algorithms such as DBSCAN[1] or OPTICS[2] more applicable.
+//
+// We've used DBSCAN here because it's simple to implement. This is a pretty
+// straightforward and inefficient implementation of the pseudocode in [2].
+//
+// [1] https://en.wikipedia.org/wiki/DBSCAN
+// [2] https://en.wikipedia.org/wiki/OPTICS_algorithm
+
+namespace {
+
+// Finds the points at distance less than sqrt(EpsilonSquared) of Q (not
+// including Q).
+std::vector<size_t> rangeQuery(const std::vector<InstructionBenchmark> &Points,
+ const size_t Q, const double EpsilonSquared) {
+ std::vector<size_t> Neighbors;
+ const auto &QMeasurements = Points[Q].Measurements;
+ for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
+ if (P == Q)
+ continue;
+ const auto &PMeasurements = Points[P].Measurements;
+ if (PMeasurements.empty()) // Error point.
+ continue;
+ double DistanceSquared = 0;
+ for (size_t I = 0, E = QMeasurements.size(); I < E; ++I) {
+ const auto Diff = PMeasurements[I].Value - QMeasurements[I].Value;
+ DistanceSquared += Diff * Diff;
+ }
+ if (DistanceSquared <= EpsilonSquared) {
+ Neighbors.push_back(P);
+ }
+ }
+ return Neighbors;
+}
+
+} // namespace
+
+InstructionBenchmarkClustering::InstructionBenchmarkClustering()
+ : NoiseCluster_(ClusterId::noise()), ErrorCluster_(ClusterId::error()) {}
+
+llvm::Error InstructionBenchmarkClustering::validateAndSetup(
+ const std::vector<InstructionBenchmark> &Points) {
+ ClusterIdForPoint_.resize(Points.size());
+ // Mark erroneous measurements out.
+ // All points must have the same number of dimensions, in the same order.
+ const std::vector<BenchmarkMeasure> *LastMeasurement = nullptr;
+ for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
+ const auto &Point = Points[P];
+ if (!Point.Error.empty()) {
+ ClusterIdForPoint_[P] = ClusterId::error();
+ ErrorCluster_.PointIndices.push_back(P);
+ continue;
+ }
+ const auto *CurMeasurement = &Point.Measurements;
+ if (LastMeasurement) {
+ if (LastMeasurement->size() != CurMeasurement->size()) {
+ return llvm::make_error<llvm::StringError>(
+ "inconsistent measurement dimensions",
+ llvm::inconvertibleErrorCode());
+ }
+ for (size_t I = 0, E = LastMeasurement->size(); I < E; ++I) {
+ if (LastMeasurement->at(I).Key != CurMeasurement->at(I).Key) {
+ return llvm::make_error<llvm::StringError>(
+ "inconsistent measurement dimensions keys",
+ llvm::inconvertibleErrorCode());
+ }
+ }
+ }
+ LastMeasurement = CurMeasurement;
+ }
+ if (LastMeasurement) {
+ NumDimensions_ = LastMeasurement->size();
+ }
+ return llvm::Error::success();
+}
+
+void InstructionBenchmarkClustering::dbScan(
+ const std::vector<InstructionBenchmark> &Points, const size_t MinPts,
+ const double EpsilonSquared) {
+ for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
+ if (!ClusterIdForPoint_[P].isUndef())
+ continue; // Previously processed in inner loop.
+ const auto Neighbors = rangeQuery(Points, P, EpsilonSquared);
+ if (Neighbors.size() + 1 < MinPts) { // Density check.
+ // The region around P is not dense enough to create a new cluster, mark
+ // as noise for now.
+ ClusterIdForPoint_[P] = ClusterId::noise();
+ continue;
+ }
+
+ // Create a new cluster, add P.
+ Clusters_.emplace_back(ClusterId::makeValid(Clusters_.size()));
+ Cluster &CurrentCluster = Clusters_.back();
+ ClusterIdForPoint_[P] = CurrentCluster.Id; /* Label initial point */
+ CurrentCluster.PointIndices.push_back(P);
+
+ // Process P's neighbors.
+ std::unordered_set<size_t> ToProcess(Neighbors.begin(), Neighbors.end());
+ while (!ToProcess.empty()) {
+ // Retrieve a point from the set.
+ const size_t Q = *ToProcess.begin();
+ ToProcess.erase(Q);
+
+ if (ClusterIdForPoint_[Q].isNoise()) {
+ // Change noise point to border point.
+ ClusterIdForPoint_[Q] = CurrentCluster.Id;
+ CurrentCluster.PointIndices.push_back(Q);
+ continue;
+ }
+ if (!ClusterIdForPoint_[Q].isUndef()) {
+ continue; // Previously processed.
+ }
+ // Add Q to the current custer.
+ ClusterIdForPoint_[Q] = CurrentCluster.Id;
+ CurrentCluster.PointIndices.push_back(Q);
+ // And extend to the neighbors of Q if the region is dense enough.
+ const auto Neighbors = rangeQuery(Points, Q, EpsilonSquared);
+ if (Neighbors.size() + 1 >= MinPts) {
+ ToProcess.insert(Neighbors.begin(), Neighbors.end());
+ }
+ }
+ }
+
+ // Add noisy points to noise cluster.
+ for (size_t P = 0, NumPoints = Points.size(); P < NumPoints; ++P) {
+ if (ClusterIdForPoint_[P].isNoise()) {
+ NoiseCluster_.PointIndices.push_back(P);
+ }
+ }
+}
+
+llvm::Expected<InstructionBenchmarkClustering>
+InstructionBenchmarkClustering::create(
+ const std::vector<InstructionBenchmark> &Points, const size_t MinPts,
+ const double Epsilon) {
+ InstructionBenchmarkClustering Clustering;
+ if (auto Error = Clustering.validateAndSetup(Points)) {
+ return Error;
+ }
+ if (Clustering.ErrorCluster_.PointIndices.size() == Points.size()) {
+ return Clustering; // Nothing to cluster.
+ }
+
+ Clustering.dbScan(Points, MinPts, Epsilon * Epsilon);
+ return Clustering;
+}
+
+} // namespace exegesis
--- /dev/null
+//===-- Clustering.h --------------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// Utilities to compute benchmark result clusters.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
+#define LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
+
+#include "BenchmarkResult.h"
+#include "llvm/Support/Error.h"
+#include <vector>
+
+namespace exegesis {
+
+class InstructionBenchmarkClustering {
+public:
+ // Clusters `Points` using DBSCAN with the given parameters. See the cc file
+ // for more explanations on the algorithm.
+ static llvm::Expected<InstructionBenchmarkClustering>
+ create(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
+ double Epsilon);
+
+ class ClusterId {
+ public:
+ static ClusterId noise() { return ClusterId(kNoise); }
+ static ClusterId error() { return ClusterId(kError); }
+ static ClusterId makeValid(int Id) {
+ assert(Id >= 0);
+ return ClusterId(Id);
+ }
+ ClusterId() : Id_(kUndef) {}
+ bool operator==(const ClusterId &O) const { return Id_ == O.Id_; }
+
+ bool isValid() const { return Id_ >= 0; }
+ bool isUndef() const { return Id_ == kUndef; }
+ bool isNoise() const { return Id_ == kNoise; }
+ bool isError() const { return Id_ == kError; }
+
+ // Precondition: isValid().
+ size_t getId() const {
+ assert(isValid());
+ return static_cast<size_t>(Id_);
+ }
+
+ private:
+ explicit ClusterId(int Id) : Id_(Id) {}
+ static constexpr const int kUndef = -1;
+ static constexpr const int kNoise = -2;
+ static constexpr const int kError = -3;
+ int Id_;
+ };
+
+ struct Cluster {
+ Cluster() = delete;
+ explicit Cluster(const ClusterId &Id) : Id(Id) {}
+
+ const ClusterId Id;
+ // Indices of benchmarks within the cluster.
+ std::vector<int> PointIndices;
+ };
+
+ ClusterId getClusterIdForPoint(size_t P) const {
+ return ClusterIdForPoint_[P];
+ }
+
+ const Cluster &getCluster(ClusterId Id) const {
+ assert(!Id.isUndef() && "unlabeled cluster");
+ if (Id.isNoise()) {
+ return NoiseCluster_;
+ }
+ if (Id.isError()) {
+ return ErrorCluster_;
+ }
+ return Clusters_[Id.getId()];
+ }
+
+ const std::vector<Cluster> &getValidClusters() const { return Clusters_; }
+
+private:
+ InstructionBenchmarkClustering();
+ llvm::Error validateAndSetup(const std::vector<InstructionBenchmark> &Points);
+ void dbScan(const std::vector<InstructionBenchmark> &Points, size_t MinPts,
+ double EpsilonSquared);
+ int NumDimensions_ = 0;
+ // ClusterForPoint_[P] is the cluster id for Points[P].
+ std::vector<ClusterId> ClusterIdForPoint_;
+ std::vector<Cluster> Clusters_;
+ Cluster NoiseCluster_;
+ Cluster ErrorCluster_;
+};
+
+} // namespace exegesis
+
+#endif // LLVM_TOOLS_LLVM_EXEGESIS_CLUSTERING_H
add_llvm_unittest(LLVMExegesisTests
BenchmarkResultTest.cpp
+ ClusteringTest.cpp
OperandGraphTest.cpp
PerfHelperTest.cpp
)
--- /dev/null
+//===-- ClusteringTest.cpp --------------------------------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Clustering.h"
+#include "BenchmarkResult.h"
+#include "llvm/Support/Error.h"
+#include "llvm/Support/raw_ostream.h"
+#include "gmock/gmock.h"
+#include "gtest/gtest.h"
+
+namespace exegesis {
+
+namespace {
+
+using testing::Field;
+using testing::UnorderedElementsAre;
+using testing::UnorderedElementsAreArray;
+
+TEST(ClusteringTest, Clusters3D) {
+ std::vector<InstructionBenchmark> Points(6);
+
+ // Cluster around (x=0, y=1, z=2): points {0, 3}.
+ Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, "A"}};
+ Points[3].Measurements = {{"x", -0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
+ // Cluster around (x=1, y=1, z=2): points {1, 4}.
+ Points[1].Measurements = {{"x", 1.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
+ Points[4].Measurements = {{"x", 0.99, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
+ // Cluster around (x=0, y=0, z=0): points {5}, marked as noise.
+ Points[5].Measurements = {{"x", 0.0, ""}, {"y", 0.01, ""}, {"z", -0.02, ""}};
+ // Error cluster: points {2}
+ Points[2].Error = "oops";
+
+ auto HasPoints = [](const std::vector<int> &Indices) {
+ return Field(&InstructionBenchmarkClustering::Cluster::PointIndices,
+ UnorderedElementsAreArray(Indices));
+ };
+
+ auto Clustering = InstructionBenchmarkClustering::create(Points, 2, 0.25);
+ ASSERT_TRUE((bool)Clustering);
+ EXPECT_THAT(Clustering.get().getValidClusters(),
+ UnorderedElementsAre(HasPoints({0, 3}), HasPoints({1, 4})));
+ EXPECT_THAT(Clustering.get().getCluster(
+ InstructionBenchmarkClustering::ClusterId::noise()),
+ HasPoints({5}));
+ EXPECT_THAT(Clustering.get().getCluster(
+ InstructionBenchmarkClustering::ClusterId::error()),
+ HasPoints({2}));
+
+ EXPECT_EQ(Clustering.get().getClusterIdForPoint(2),
+ InstructionBenchmarkClustering::ClusterId::error());
+ EXPECT_EQ(Clustering.get().getClusterIdForPoint(5),
+ InstructionBenchmarkClustering::ClusterId::noise());
+ EXPECT_EQ(Clustering.get().getClusterIdForPoint(0),
+ Clustering.get().getClusterIdForPoint(3));
+ EXPECT_EQ(Clustering.get().getClusterIdForPoint(1),
+ Clustering.get().getClusterIdForPoint(4));
+}
+
+TEST(ClusteringTest, Clusters3D_InvalidSize) {
+ std::vector<InstructionBenchmark> Points(6);
+ Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}, {"z", 1.98, ""}};
+ Points[1].Measurements = {{"y", 1.02, ""}, {"z", 1.98, ""}};
+ auto Error =
+ InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
+ ASSERT_TRUE((bool)Error);
+ consumeError(std::move(Error));
+}
+
+TEST(ClusteringTest, Clusters3D_InvalidOrder) {
+ std::vector<InstructionBenchmark> Points(6);
+ Points[0].Measurements = {{"x", 0.01, ""}, {"y", 1.02, ""}};
+ Points[1].Measurements = {{"y", 1.02, ""}, {"x", 1.98, ""}};
+ auto Error =
+ InstructionBenchmarkClustering::create(Points, 2, 0.25).takeError();
+ ASSERT_TRUE((bool)Error);
+ consumeError(std::move(Error));
+}
+
+} // namespace
+} // namespace exegesis