If non-empty, write inconsistencies found during analysis to this file. `-`
prints to stdout. By default, this analysis is not run.
+.. option:: -analysis-filter=[all|reg-only|mem-only]
+
+ By default, all benchmark results are analysed, but sometimes it may be useful
+ to only look at those that to not involve memory, or vice versa. This option
+ allows to either keep all benchmarks, or filter out (ignore) either all the
+ ones that do involve memory (involve instructions that may read or write to
+ memory), or the opposite, to only keep such benchmarks.
+
.. option:: -analysis-clustering=[dbscan,naive]
Specify the clustering algorithm to use. By default DBSCAN will be used.
--- /dev/null
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 | FileCheck %s --check-prefixes=CHECK-CLUSTERS-ALL
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=all | FileCheck %s --check-prefixes=CHECK-CLUSTERS-ALL
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=reg-only | FileCheck %s --check-prefixes=CHECK-CLUSTERS,CHECK-CLUSTERS-REG
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-filter=mem-only | FileCheck %s --check-prefixes=CHECK-CLUSTERS,CHECK-CLUSTERS-MEM
+
+# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,NumMicroOps{{$}}
+
+# CHECK-CLUSTERS-ALL: {{^}}0,
+# CHECK-CLUSTERS-ALL-SAME: ,42.00{{$}}
+# CHECK-CLUSTERS-ALL: {{^}}1,
+# CHECK-CLUSTERS-ALL-SAME: ,840.00{{$}}
+
+# CHECK-CLUSTERS-REG: {{^}}0,
+# CHECK-CLUSTERS-REG-SAME: ,42.00{{$}}
+
+# CHECK-CLUSTERS-MEM: {{^}}0,
+# CHECK-CLUSTERS-MEM-SAME: ,840.00{{$}}
+
+# CHECK-CLUSTERS-NOT: {{^[0-9]+}},
+
+---
+mode: uops
+key:
+ instructions:
+ - 'ADDPSrr XMM8 XMM8 XMM9'
+ - 'ADDPSrr XMM1 XMM1 XMM13'
+ - 'ADDPSrr XMM12 XMM12 XMM3'
+ - 'ADDPSrr XMM7 XMM7 XMM13'
+ - 'ADDPSrr XMM5 XMM5 XMM11'
+ - 'ADDPSrr XMM2 XMM2 XMM14'
+ - 'ADDPSrr XMM6 XMM6 XMM14'
+ - 'ADDPSrr XMM0 XMM0 XMM4'
+ - 'ADDPSrr XMM10 XMM10 XMM9'
+ - 'ADDPSrr XMM15 XMM15 XMM9'
+ config: ''
+ register_initial_values:
+ - 'XMM8=0x0'
+ - 'XMM9=0x0'
+ - 'MXCSR=0x0'
+ - 'XMM1=0x0'
+ - 'XMM13=0x0'
+ - 'XMM12=0x0'
+ - 'XMM3=0x0'
+ - 'XMM7=0x0'
+ - 'XMM5=0x0'
+ - 'XMM11=0x0'
+ - 'XMM2=0x0'
+ - 'XMM14=0x0'
+ - 'XMM6=0x0'
+ - 'XMM0=0x0'
+ - 'XMM4=0x0'
+ - 'XMM10=0x0'
+ - 'XMM15=0x0'
+cpu_name: znver3
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: NumMicroOps, value: 42, per_snippet_value: 420 }
+error: ''
+info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, randomizing registers
+assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C4104883EC04C70424801F0000C5F8AE14244883C4044883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C410450F58C1410F58CD440F58E3410F58FD410F58EB410F58D6410F58F60F58C4450F58D1450F58F9450F58C1410F58CD440F58E3410F58FD410F58EB410F58D6410F58F60F58C4450F58D1450F58F9450F58C1410F58CD440F58E3410F58FD410F58EB410F58D6410F58F60F58C4450F58D1450F58F9450F58C1410F58CD440F58E3410F58FD410F58EB410F58D6410F58F60F58C4450F58D1450F58F9C3
+...
+---
+mode: uops
+key:
+ instructions:
+ - 'ADDPSrm XMM11 XMM11 RDI i_0x1 %noreg i_0x0 %noreg'
+ - 'ADDPSrm XMM6 XMM6 RDI i_0x1 %noreg i_0x40 %noreg'
+ - 'ADDPSrm XMM12 XMM12 RDI i_0x1 %noreg i_0x80 %noreg'
+ - 'ADDPSrm XMM8 XMM8 RDI i_0x1 %noreg i_0xc0 %noreg'
+ - 'ADDPSrm XMM15 XMM15 RDI i_0x1 %noreg i_0x100 %noreg'
+ - 'ADDPSrm XMM2 XMM2 RDI i_0x1 %noreg i_0x140 %noreg'
+ - 'ADDPSrm XMM13 XMM13 RDI i_0x1 %noreg i_0x180 %noreg'
+ - 'ADDPSrm XMM0 XMM0 RDI i_0x1 %noreg i_0x1c0 %noreg'
+ - 'ADDPSrm XMM14 XMM14 RDI i_0x1 %noreg i_0x200 %noreg'
+ - 'ADDPSrm XMM10 XMM10 RDI i_0x1 %noreg i_0x240 %noreg'
+ - 'ADDPSrm XMM7 XMM7 RDI i_0x1 %noreg i_0x280 %noreg'
+ - 'ADDPSrm XMM3 XMM3 RDI i_0x1 %noreg i_0x2c0 %noreg'
+ - 'ADDPSrm XMM1 XMM1 RDI i_0x1 %noreg i_0x300 %noreg'
+ - 'ADDPSrm XMM4 XMM4 RDI i_0x1 %noreg i_0x340 %noreg'
+ - 'ADDPSrm XMM5 XMM5 RDI i_0x1 %noreg i_0x380 %noreg'
+ - 'ADDPSrm XMM9 XMM9 RDI i_0x1 %noreg i_0x3c0 %noreg'
+ config: ''
+ register_initial_values:
+ - 'XMM11=0x0'
+ - 'MXCSR=0x0'
+ - 'XMM6=0x0'
+ - 'XMM12=0x0'
+ - 'XMM8=0x0'
+ - 'XMM15=0x0'
+ - 'XMM2=0x0'
+ - 'XMM13=0x0'
+ - 'XMM0=0x0'
+ - 'XMM14=0x0'
+ - 'XMM10=0x0'
+ - 'XMM7=0x0'
+ - 'XMM3=0x0'
+ - 'XMM1=0x0'
+ - 'XMM4=0x0'
+ - 'XMM5=0x0'
+ - 'XMM9=0x0'
+cpu_name: znver3
+llvm_triple: x86_64-unknown-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: NumMicroOps, value: 840, per_snippet_value: 8400 }
+error: ''
+info: instruction has tied variables, avoiding Read-After-Write issue, picking random def and use registers not aliasing each other, for uses, randomizing registers
+assembled_snippet: 4883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F1C244883C4104883EC04C70424801F0000C5F8AE14244883C4044883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F04244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F34244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F14244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F3C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F1C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F0C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F24244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C5FA6F2C244883C4104883EC10C7042400000000C744240400000000C744240800000000C744240C00000000C57A6F0C244883C410440F581F0F587740440F58A780000000440F5887C0000000440F58BF000100000F589740010000440F58AF800100000F5887C0010000440F58B700020000440F5897400200000F58BF800200000F589FC00200000F588F000300000F58A7400300000F58AF80030000440F588FC0030000440F581F0F587740440F58A780000000440F5887C0000000440F58BF000100000F589740010000440F58AF800100000F5887C0010000440F58B700020000440F5897400200000F58BF800200000F589FC00200000F588F000300000F58A7400300000F58AF80030000440F588FC0030000440F581F0F587740440F58A780000000440F5887C0000000440F58BF000100000F589740010000440F58AF800100000F5887C0010000440F58B700020000440F5897400200000F58BF800200000F589FC00200000F588F000300000F58A7400300000F58AF80030000440F588FC0030000440F581F0F587740440F58A780000000440F5887C0000000440F58BF000100000F589740010000440F58AF800100000F5887C0010000440F58B700020000440F5897400200000F58BF800200000F589FC00200000F588F000300000F58A7400300000F58AF80030000440F588FC0030000C3
+...
namespace exegesis {
+enum class InstructionBenchmarkFilter { All, RegOnly, WithMem };
+
struct InstructionBenchmarkKey {
// The LLVM opcode name.
std::vector<MCInst> Instructions;
cl::desc("ignore instructions that do not define a sched class"),
cl::cat(BenchmarkOptions), cl::init(false));
+static cl::opt<exegesis::InstructionBenchmarkFilter> AnalysisSnippetFilter(
+ "analysis-filter", cl::desc("Filter the benchmarks before analysing them"),
+ cl::cat(BenchmarkOptions),
+ cl::values(
+ clEnumValN(exegesis::InstructionBenchmarkFilter::All, "all",
+ "Keep all benchmarks (default)"),
+ clEnumValN(exegesis::InstructionBenchmarkFilter::RegOnly, "reg-only",
+ "Keep only those benchmarks that do *NOT* involve memory"),
+ clEnumValN(exegesis::InstructionBenchmarkFilter::WithMem, "mem-only",
+ "Keep only the benchmarks that *DO* involve memory")),
+ cl::init(exegesis::InstructionBenchmarkFilter::All));
+
static cl::opt<exegesis::InstructionBenchmarkClustering::ModeE>
AnalysisClusteringAlgorithm(
"analysis-clustering", cl::desc("the clustering algorithm to use"),
ExitOnFileError(OutputFilename, std::move(Err));
}
+static void filterPoints(MutableArrayRef<InstructionBenchmark> Points,
+ const MCInstrInfo &MCII) {
+ if (AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::All)
+ return;
+
+ bool WantPointsWithMemOps =
+ AnalysisSnippetFilter == exegesis::InstructionBenchmarkFilter::WithMem;
+ for (InstructionBenchmark &Point : Points) {
+ if (!Point.Error.empty())
+ continue;
+ if (WantPointsWithMemOps ==
+ any_of(Point.Key.Instructions, [&MCII](const MCInst &Inst) {
+ const MCInstrDesc &MCDesc = MCII.get(Inst.getOpcode());
+ return MCDesc.mayLoad() || MCDesc.mayStore();
+ }))
+ continue;
+ Point.Error = "filtered out by user";
+ }
+}
+
static void analysisMain() {
ExitOnErr.setBanner("llvm-exegesis: ");
if (BenchmarkFile.empty())
// Read benchmarks.
const LLVMState State = ExitOnErr(
LLVMState::Create(TripleAndCpu.LLVMTriple, TripleAndCpu.CpuName));
- const std::vector<InstructionBenchmark> Points = ExitOnFileError(
+ std::vector<InstructionBenchmark> Points = ExitOnFileError(
BenchmarkFile, InstructionBenchmark::readYamls(State, *MemoryBuffer));
outs() << "Parsed " << Points.size() << " benchmark points\n";
}
// FIXME: Merge points from several runs (latency and uops).
+ filterPoints(Points, State.getInstrInfo());
+
const auto Clustering = ExitOnErr(InstructionBenchmarkClustering::create(
Points, AnalysisClusteringAlgorithm, AnalysisDbscanNumPoints,
AnalysisClusteringEpsilon, &State.getSubtargetInfo(),