[exegesis] Analysis: filtering for benchmark results
authorRoman Lebedev <lebedev.ri@gmail.com>
Wed, 4 Jan 2023 18:13:48 +0000 (21:13 +0300)
committerRoman Lebedev <lebedev.ri@gmail.com>
Wed, 4 Jan 2023 18:16:11 +0000 (21:16 +0300)
By default, all benchmark results are analysed, but sometimes it may be useful
to only look at those that to not involve memory, or vice versa. This option
allows to either keep all benchmarks, or filter out (ignore) either all the
ones that do involve memory (involve instructions that may read or write to
memory), or the opposite, to only keep such benchmarks.

Personally, so far i have found the benchmarks that do involve memory
to have dubious results. But the ones that do not involve memory,
are generally actionable. So i would like to have a toggle to declutter results.

Reviewed By: courbet

Differential Revision: https://reviews.llvm.org/D140734

llvm/docs/CommandGuide/llvm-exegesis.rst
llvm/test/tools/llvm-exegesis/X86/analysis-filter.test [new file with mode: 0644]
llvm/tools/llvm-exegesis/lib/BenchmarkResult.h
llvm/tools/llvm-exegesis/llvm-exegesis.cpp

index 1d78f45..dd4fc78 100644 (file)
@@ -278,6 +278,14 @@ OPTIONS
  If non-empty, write inconsistencies found during analysis to this file. `-`
  prints to stdout. By default, this analysis is not run.
 
+.. option:: -analysis-filter=[all|reg-only|mem-only]
+
+ By default, all benchmark results are analysed, but sometimes it may be useful
+ to only look at those that to not involve memory, or vice versa. This option
+ allows to either keep all benchmarks, or filter out (ignore) either all the
+ ones that do involve memory (involve instructions that may read or write to
+ memory), or the opposite, to only keep such benchmarks.
+
 .. option:: -analysis-clustering=[dbscan,naive]
 
  Specify the clustering algorithm to use. By default DBSCAN will be used.
diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-filter.test b/llvm/test/tools/llvm-exegesis/X86/analysis-filter.test
new file mode 100644 (file)
index 0000000..d6ad67f
--- /dev/null
@@ -0,0 +1,110 @@
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck %s --check-prefixes=CHECK-CLUSTERS-ALL
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=all | FileCheck %s --check-prefixes=CHECK-CLUSTERS-ALL
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=reg-only | FileCheck %s --check-prefixes=CHECK-CLUSTERS,CHECK-CLUSTERS-REG
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=mem-only | FileCheck %s --check-prefixes=CHECK-CLUSTERS,CHECK-CLUSTERS-MEM
+
+# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,NumMicroOps{{$}}
+
+# CHECK-CLUSTERS-ALL: {{^}}0,
+# CHECK-CLUSTERS-ALL-SAME: ,42.00{{$}}
+# CHECK-CLUSTERS-ALL: {{^}}1,
+# CHECK-CLUSTERS-ALL-SAME: ,840.00{{$}}
+
+# CHECK-CLUSTERS-REG: {{^}}0,
+# CHECK-CLUSTERS-REG-SAME: ,42.00{{$}}
+
+# CHECK-CLUSTERS-MEM: {{^}}0,
+# CHECK-CLUSTERS-MEM-SAME: ,840.00{{$}}
+
+# CHECK-CLUSTERS-NOT: {{^[0-9]+}},
+
+---
+mode:            uops
+key:
+  instructions:
+    - 'ADDPSrr XMM8 XMM8 XMM9'
+    - 'ADDPSrr XMM1 XMM1 XMM13'
+    - 'ADDPSrr XMM12 XMM12 XMM3'
+    - 'ADDPSrr XMM7 XMM7 XMM13'
+    - 'ADDPSrr XMM5 XMM5 XMM11'
+    - 'ADDPSrr XMM2 XMM2 XMM14'
+    - 'ADDPSrr XMM6 XMM6 XMM14'
+    - 'ADDPSrr XMM0 XMM0 XMM4'
+    - 'ADDPSrr XMM10 XMM10 XMM9'
+    - 'ADDPSrr XMM15 XMM15 XMM9'
+  config:          ''
+  register_initial_values:
+    - 'XMM8=0x0'
+    - 'XMM9=0x0'
+    - 'MXCSR=0x0'
+    - 'XMM1=0x0'
+    - 'XMM13=0x0'
+    - 'XMM12=0x0'
+    - 'XMM3=0x0'
+    - 'XMM7=0x0'
+    - 'XMM5=0x0'
+    - 'XMM11=0x0'
+    - 'XMM2=0x0'
+    - 'XMM14=0x0'
+    - 'XMM6=0x0'
+    - 'XMM0=0x0'
+    - 'XMM4=0x0'
+    - 'XMM10=0x0'
+    - 'XMM15=0x0'
+cpu_name:        znver3
+llvm_triple:     x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+  - { key: NumMicroOps, value: 42, per_snippet_value: 420 }
+error:           ''
+info:            instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, randomizing registers
+assembled_snippet: 
+...
+---
+mode:            uops
+key:
+  instructions:
+    - 'ADDPSrm XMM11 XMM11 RDI i_0x1 %noreg i_0x0 %noreg'
+    - 'ADDPSrm XMM6 XMM6 RDI i_0x1 %noreg i_0x40 %noreg'
+    - 'ADDPSrm XMM12 XMM12 RDI i_0x1 %noreg i_0x80 %noreg'
+    - 'ADDPSrm XMM8 XMM8 RDI i_0x1 %noreg i_0xc0 %noreg'
+    - 'ADDPSrm XMM15 XMM15 RDI i_0x1 %noreg i_0x100 %noreg'
+    - 'ADDPSrm XMM2 XMM2 RDI i_0x1 %noreg i_0x140 %noreg'
+    - 'ADDPSrm XMM13 XMM13 RDI i_0x1 %noreg i_0x180 %noreg'
+    - 'ADDPSrm XMM0 XMM0 RDI i_0x1 %noreg i_0x1c0 %noreg'
+    - 'ADDPSrm XMM14 XMM14 RDI i_0x1 %noreg i_0x200 %noreg'
+    - 'ADDPSrm XMM10 XMM10 RDI i_0x1 %noreg i_0x240 %noreg'
+    - 'ADDPSrm XMM7 XMM7 RDI i_0x1 %noreg i_0x280 %noreg'
+    - 'ADDPSrm XMM3 XMM3 RDI i_0x1 %noreg i_0x2c0 %noreg'
+    - 'ADDPSrm XMM1 XMM1 RDI i_0x1 %noreg i_0x300 %noreg'
+    - 'ADDPSrm XMM4 XMM4 RDI i_0x1 %noreg i_0x340 %noreg'
+    - 'ADDPSrm XMM5 XMM5 RDI i_0x1 %noreg i_0x380 %noreg'
+    - 'ADDPSrm XMM9 XMM9 RDI i_0x1 %noreg i_0x3c0 %noreg'
+  config:          ''
+  register_initial_values:
+    - 'XMM11=0x0'
+    - 'MXCSR=0x0'
+    - 'XMM6=0x0'
+    - 'XMM12=0x0'
+    - 'XMM8=0x0'
+    - 'XMM15=0x0'
+    - 'XMM2=0x0'
+    - 'XMM13=0x0'
+    - 'XMM0=0x0'
+    - 'XMM14=0x0'
+    - 'XMM10=0x0'
+    - 'XMM7=0x0'
+    - 'XMM3=0x0'
+    - 'XMM1=0x0'
+    - 'XMM4=0x0'
+    - 'XMM5=0x0'
+    - 'XMM9=0x0'
+cpu_name:        znver3
+llvm_triple:     x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+  - { key: NumMicroOps, value: 840, per_snippet_value: 8400 }
+error:           ''
+info:            instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, randomizing registers
+assembled_snippet: 
+...
index 11c2ade..f6db14f 100644 (file)
@@ -34,6 +34,8 @@ class Error;
 
 namespace exegesis {
 
+enum class InstructionBenchmarkFilter { All, RegOnly, WithMem };
+
 struct InstructionBenchmarkKey {
   // The LLVM opcode name.
   std::vector<MCInst> Instructions;
index 6588504..aa7261f 100644 (file)
@@ -144,6 +144,18 @@ static cl::opt<bool> IgnoreInvalidSchedClass(
     cl::desc("ignore instructions that do not define a sched class"),
     cl::cat(BenchmarkOptions), cl::init(false));
 
+static cl::opt<exegesis::InstructionBenchmarkFilter> AnalysisSnippetFilter(
+    "analysis-filter", cl::desc("Filter the benchmarks before analysing them"),
+    cl::cat(BenchmarkOptions),
+    cl::values(
+        clEnumValN(exegesis::InstructionBenchmarkFilter::All, "all",
+                   "Keep all benchmarks (default)"),
+        clEnumValN(exegesis::InstructionBenchmarkFilter::RegOnly, "reg-only",
+                   "Keep only those benchmarks that do *NOT* involve memory"),
+        clEnumValN(exegesis::InstructionBenchmarkFilter::WithMem, "mem-only",
+                   "Keep only the benchmarks that *DO* involve memory")),
+    cl::init(exegesis::InstructionBenchmarkFilter::All));
+
 static cl::opt<exegesis::InstructionBenchmarkClustering::ModeE>
     AnalysisClusteringAlgorithm(
         "analysis-clustering", cl::desc("the clustering algorithm to use"),
@@ -495,6 +507,26 @@ static void maybeRunAnalysis(const Analysis &Analyzer, const std::string &Name,
     ExitOnFileError(OutputFilename, std::move(Err));
 }
 
+static void filterPoints(MutableArrayRef<InstructionBenchmark> Points,
+                         const MCInstrInfo &MCII) {
+  if (AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::All)
+    return;
+
+  bool WantPointsWithMemOps =
+      AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::WithMem;
+  for (InstructionBenchmark &Point : Points) {
+    if (!Point.Error.empty())
+      continue;
+    if (WantPointsWithMemOps ==
+        any_of(Point.Key.Instructions, [&MCII](const MCInst &Inst) {
+          const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
+          return MCDesc.mayLoad() || MCDesc.mayStore();
+        }))
+      continue;
+    Point.Error = "filtered out by user";
+  }
+}
+
 static void analysisMain() {
   ExitOnErr.setBanner("llvm-exegesis: ");
   if (BenchmarkFile.empty())
@@ -540,7 +572,7 @@ static void analysisMain() {
   // Read benchmarks.
   const LLVMState State = ExitOnErr(
       LLVMState::Create(TripleAndCpu.LLVMTriple, TripleAndCpu.CpuName));
-  const std::vector<InstructionBenchmark> Points = ExitOnFileError(
+  std::vector<InstructionBenchmark> Points = ExitOnFileError(
       BenchmarkFile, InstructionBenchmark::readYamls(State, *MemoryBuffer));
 
   outs() << "Parsed " << Points.size() << " benchmark points\n";
@@ -550,6 +582,8 @@ static void analysisMain() {
   }
   // FIXME: Merge points from several runs (latency and uops).
 
+  filterPoints(Points, State.getInstrInfo());
+
   const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
       Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
       AnalysisClusteringEpsilon, &State.getSubtargetInfo(),