From 07f93a1e390cb89b41a9dc76f9f97a65e566e4e3 Mon Sep 17 00:00:00 2001
From: Guillaume Chatelet <gchatelet@google.com>
Date: Thu, 17 Feb 2022 15:29:43 +0000
Subject: [PATCH] [libc][automemcpy] Discard aggrated samples from JSON

The benchmark framework synthesizes fake "aggregate" Samples representing mean, median and cv.
We're only interested in "iteration" samples.

Differential Revision: https://reviews.llvm.org/D120062
---
 .../automemcpy/include/automemcpy/ResultAnalyzer.h |  6 +++++
 libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp  |  2 ++
 .../automemcpy/lib/ResultAnalyzerMain.cpp          |  6 ++++-
 .../automemcpy/unittests/ResultAnalyzerTest.cpp    | 30 ++++++++++++++--------
 4 files changed, 32 insertions(+), 12 deletions(-)
diff --git a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h b/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
index 2991df0..d4bf272 100644
--- a/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
+++ b/libc/benchmarks/automemcpy/include/automemcpy/ResultAnalyzer.h
@@ -79,9 +79,15 @@ struct SampleId {
                           Distribution.Name)
 };
 
+// The type of Samples as reported by the Google Benchmark's JSON result file.
+// We are only interested in the "iteration" samples, the "aggregate" ones
+// represent derived metrics such as 'mean' or 'median'.
+enum class SampleType { UNKNOWN, ITERATION, AGGREGATE };
+
 // A SampleId with an associated measured throughput.
 struct Sample {
   SampleId Id;
+  SampleType Type = SampleType::UNKNOWN;
   double BytesPerSecond = 0;
 };
 
diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
index 6bfde0d..b134f6c 100644
--- a/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
+++ b/libc/benchmarks/automemcpy/lib/ResultAnalyzer.cpp
@@ -107,6 +107,8 @@ static void processPerDistributionData(PerDistributionData &Data) {
 std::vector<FunctionData> getThroughputs(ArrayRef<Sample> Samples) {
   std::unordered_map<FunctionId, FunctionData, FunctionId::Hasher> Functions;
   for (const auto &S : Samples) {
+    if (S.Type != SampleType::ITERATION)
+      break;
     auto &Function = Functions[S.Id.Function];
     auto &Data = Function.PerDistributionData[S.Id.Distribution.Name];
     Data.BytesPerSecondSamples.push_back(S.BytesPerSecond);
diff --git a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp b/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp
index 422bc57..f3fb825 100644
--- a/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp
+++ b/libc/benchmarks/automemcpy/lib/ResultAnalyzerMain.cpp
@@ -61,13 +61,17 @@ static StringRef getInternalizedString(StringRef VolatileStr) {
 // Helper function for the LLVM JSON API.
 bool fromJSON(const json::Value &V, Sample &Out, json::Path P) {
   std::string Label;
+  std::string RunType;
   json::ObjectMapper O(V, P);
   if (O && O.map("bytes_per_second", Out.BytesPerSecond) &&
-      O.map("label", Label)) {
+      O.map("run_type", RunType) && O.map("label", Label)) {
     const auto LabelPair = StringRef(Label).split(',');
     Out.Id.Function.Name = getInternalizedString(LabelPair.first);
     Out.Id.Function.Type = getFunctionDescriptor(LabelPair.first).Type;
     Out.Id.Distribution.Name = getInternalizedString(LabelPair.second);
+    Out.Type = StringSwitch<SampleType>(RunType)
+                   .Case("aggregate", SampleType::AGGREGATE)
+                   .Case("iteration", SampleType::ITERATION);
     return true;
   }
   return false;
diff --git a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp b/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
index 10d0f98..7b67f70 100644
--- a/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
+++ b/libc/benchmarks/automemcpy/unittests/ResultAnalyzerTest.cpp
@@ -24,7 +24,8 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsOneSample) {
   static constexpr DistributionId DistA = {{"A"}};
   static constexpr SampleId Id = {Foo1, DistA};
   static constexpr Sample kSamples[] = {
-      Sample{Id, 4},
+      Sample{Id, SampleType::ITERATION, 4},
+      Sample{Id, SampleType::AGGREGATE, -1}, // Aggegates gets discarded
   };
 
   const std::vector<FunctionData> Data = getThroughputs(kSamples);
@@ -42,8 +43,9 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsManySamplesSameBucket) {
   static constexpr FunctionId Foo1 = {"memcpy1", FunctionType::MEMCPY};
   static constexpr DistributionId DistA = {{"A"}};
   static constexpr SampleId Id = {Foo1, DistA};
-  static constexpr Sample kSamples[] = {Sample{Id, 4}, Sample{Id, 5},
-                                        Sample{Id, 5}};
+  static constexpr Sample kSamples[] = {Sample{Id, SampleType::ITERATION, 4},
+                                        Sample{Id, SampleType::ITERATION, 5},
+                                        Sample{Id, SampleType::ITERATION, 5}};
 
   const std::vector<FunctionData> Data = getThroughputs(kSamples);
   EXPECT_THAT(Data, SizeIs(1));
@@ -63,8 +65,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getThroughputsServeralFunctionAndDist) {
   static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
   static constexpr DistributionId DistB = {{"B"}};
   static constexpr Sample kSamples[] = {
-      Sample{{Foo1, DistA}, 1}, Sample{{Foo1, DistB}, 2},
-      Sample{{Foo2, DistA}, 3}, Sample{{Foo2, DistB}, 4}};
+      Sample{{Foo1, DistA}, SampleType::ITERATION, 1},
+      Sample{{Foo1, DistB}, SampleType::ITERATION, 2},
+      Sample{{Foo2, DistA}, SampleType::ITERATION, 3},
+      Sample{{Foo2, DistB}, SampleType::ITERATION, 4}};
   // Data is aggregated per function.
   const std::vector<FunctionData> Data = getThroughputs(kSamples);
   EXPECT_THAT(Data, SizeIs(2)); // 2 functions Foo1 and Foo2.
@@ -78,9 +82,10 @@ TEST(AutomemcpyJsonResultsAnalyzer, getScore) {
   static constexpr FunctionId Foo2 = {"memcpy2", FunctionType::MEMCPY};
   static constexpr FunctionId Foo3 = {"memcpy3", FunctionType::MEMCPY};
   static constexpr DistributionId Dist = {{"A"}};
-  static constexpr Sample kSamples[] = {Sample{{Foo1, Dist}, 1},
-                                        Sample{{Foo2, Dist}, 2},
-                                        Sample{{Foo3, Dist}, 3}};
+  static constexpr Sample kSamples[] = {
+      Sample{{Foo1, Dist}, SampleType::ITERATION, 1},
+      Sample{{Foo2, Dist}, SampleType::ITERATION, 2},
+      Sample{{Foo3, Dist}, SampleType::ITERATION, 3}};
 
   // Data is aggregated per function.
   std::vector<FunctionData> Data = getThroughputs(kSamples);
@@ -113,9 +118,12 @@ TEST(AutomemcpyJsonResultsAnalyzer, castVotes) {
   static constexpr DistributionId DistA = {{"A"}};
   static constexpr DistributionId DistB = {{"B"}};
   static constexpr Sample kSamples[] = {
-      Sample{{Foo1, DistA}, 0}, Sample{{Foo1, DistB}, 30},
-      Sample{{Foo2, DistA}, 1}, Sample{{Foo2, DistB}, 100},
-      Sample{{Foo3, DistA}, 7}, Sample{{Foo3, DistB}, 100},
+      Sample{{Foo1, DistA}, SampleType::ITERATION, 0},
+      Sample{{Foo1, DistB}, SampleType::ITERATION, 30},
+      Sample{{Foo2, DistA}, SampleType::ITERATION, 1},
+      Sample{{Foo2, DistB}, SampleType::ITERATION, 100},
+      Sample{{Foo3, DistA}, SampleType::ITERATION, 7},
+      Sample{{Foo3, DistB}, SampleType::ITERATION, 100},
   };
 
   // DistA Thoughput ranges from 0 to 7.
-- 
2.7.4