From 7a76140220bd2c4730a2c39b6fd645402040f011 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Wed, 7 Dec 2022 20:14:13 +0300 Subject: [PATCH] [llvm-exegesis] Dry run mode Sometimes we only want to ensure that we can produce snippets (all the way through `SnippetRepetitor`!), but don't care for the execution. E.g. all of our tests are this way. I've built LLVM without PFM and removed my CPU from `X86PfmCounters.td`, and this produces the expected results in that configuration. Reviewed By: courbet Differential Revision: https://reviews.llvm.org/D139448 --- llvm/docs/CommandGuide/llvm-exegesis.rst | 26 ++++++- .../test/tools/llvm-exegesis/AArch64/lit.local.cfg | 5 +- llvm/test/tools/llvm-exegesis/Mips/lit.local.cfg | 5 +- .../llvm-exegesis/PowerPC/latency-by-opcode-name.s | 2 +- .../test/tools/llvm-exegesis/PowerPC/lit.local.cfg | 26 ------- .../llvm-exegesis/PowerPC/unsupported-opcode.s | 2 +- .../analysis-CMOV16rm-noreg-deserialization.test | 58 ++++++++++++++++ ...rization-same-opcode-different-sched-class.test | 2 +- .../inverse_throughput-by-opcode-name.s | 5 +- .../X86/inverse_throughput/lit.local.cfg | 29 -------- .../llvm-exegesis/X86/latency/latency-CMOV32rr.s | 4 +- .../llvm-exegesis/X86/latency/latency-IN16rr.s | 2 +- .../llvm-exegesis/X86/latency/latency-LEA64_32r.s | 4 +- .../llvm-exegesis/X86/latency/latency-LEA64r.s | 4 +- .../llvm-exegesis/X86/latency/latency-SBB8rr.s | 4 +- .../X86/latency/latency-SETCCr-cond-codes-sweep.s | 4 +- .../llvm-exegesis/X86/latency/latency-SQRTSSr.s | 2 +- .../X86/latency/latency-by-opcode-name.s | 4 +- .../tools/llvm-exegesis/X86/latency/lit.local.cfg | 29 -------- .../llvm-exegesis/X86/latency/max-configs.test | 4 +- .../tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att | 6 +- .../test/tools/llvm-exegesis/X86/lbr/lit.local.cfg | 9 ++- llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s | 2 +- llvm/test/tools/llvm-exegesis/X86/lit.local.cfg | 4 -- .../tools/llvm-exegesis/X86/uops/lit.local.cfg | 29 -------- .../tools/llvm-exegesis/X86/uops/uops-ADD32mi8.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-ADD32mr.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-ADD32rm.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-ADD_F32m.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-BEXTR32rm.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-BSF16rm.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-BTR64mr.s | 4 +- .../X86/uops/uops-CMOV16rm-noreg-serialization.s | 11 +++ .../llvm-exegesis/X86/uops/uops-CMOV16rm-noreg.s | 17 ----- .../tools/llvm-exegesis/X86/uops/uops-FLDENVm.s | 2 +- .../tools/llvm-exegesis/X86/uops/uops-LEA64r.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-POPCNT32rr.s | 2 +- llvm/test/tools/llvm-exegesis/X86/uops/uops-STD.s | 4 +- .../llvm-exegesis/X86/uops/uops-VFMADDSS4rm.s | 4 +- .../tools/llvm-exegesis/X86/uops/uops-XCHG64rr.s | 4 +- .../llvm-exegesis/X86/uops/uops-by-opcode-name.s | 4 +- .../llvm-exegesis/X86/uops/uops-misspelled-div.s | 6 +- llvm/test/tools/llvm-exegesis/lit.local.cfg | 2 - llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp | 13 +++- llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h | 4 +- .../llvm-exegesis/lib/LatencyBenchmarkRunner.cpp | 3 +- .../llvm-exegesis/lib/LatencyBenchmarkRunner.h | 1 + llvm/tools/llvm-exegesis/lib/LlvmState.cpp | 46 ++++++++---- llvm/tools/llvm-exegesis/lib/LlvmState.h | 2 +- llvm/tools/llvm-exegesis/lib/Target.cpp | 32 ++++++--- llvm/tools/llvm-exegesis/lib/Target.h | 4 +- llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h | 5 +- llvm/tools/llvm-exegesis/llvm-exegesis.cpp | 81 ++++++++++++++++------ 53 files changed, 301 insertions(+), 249 deletions(-) create mode 100644 llvm/test/tools/llvm-exegesis/X86/analysis-CMOV16rm-noreg-deserialization.test delete mode 100644 llvm/test/tools/llvm-exegesis/X86/inverse_throughput/lit.local.cfg delete mode 100644 llvm/test/tools/llvm-exegesis/X86/latency/lit.local.cfg delete mode 100644 llvm/test/tools/llvm-exegesis/X86/uops/lit.local.cfg create mode 100644 llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s delete mode 100644 llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg.s delete mode 100644 llvm/test/tools/llvm-exegesis/lit.local.cfg diff --git a/llvm/docs/CommandGuide/llvm-exegesis.rst b/llvm/docs/CommandGuide/llvm-exegesis.rst index bf2222e..d8f98aa 100644 --- a/llvm/docs/CommandGuide/llvm-exegesis.rst +++ b/llvm/docs/CommandGuide/llvm-exegesis.rst @@ -19,9 +19,10 @@ Given an LLVM opcode name and a benchmarking mode, :program:`llvm-exegesis` generates a code snippet that makes execution as serial (resp. as parallel) as possible so that we can measure the latency (resp. inverse throughput/uop decomposition) of the instruction. -The code snippet is jitted and executed on the host subtarget. The time taken -(resp. resource usage) is measured using hardware performance counters. The -result is printed out as YAML to the standard output. +The code snippet is jitted and, unless requested not to, executed on the +host subtarget. The time taken (resp. resource usage) is measured using +hardware performance counters. The result is printed out as YAML +to the standard output. The main goal of this tool is to automatically (in)validate the LLVM's TableDef scheduling models. To that end, we also provide analysis of the results. @@ -195,6 +196,14 @@ OPTIONS In `analysis` mode, you also need to specify at least one of the `-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`. +.. option:: --skip-measurements + + By default, when `-mode=` is specified, the generated snippet will be executed + and measured, and that requires that we are running on the hardware for which + the snippet was generated, and that supports performance measurements. + But sometimes, you just want to generate snippets, and this is exactly what + this options allows one to do. + .. option:: -x86-lbr-sample-period= Specify the LBR sampling period - how many branches before we take a sample. @@ -293,10 +302,21 @@ OPTIONS If set, ignore instructions that do not have a sched class (class idx = 0). +.. option:: -mtriple= + + Target triple. See `-version` for available targets. + .. option:: -mcpu= If set, measure the cpu characteristics using the counters for this CPU. This is useful when creating new sched models (the host CPU is unknown to LLVM). + (`-mcpu=help` for details) + +.. option:: --analysis-override-benchmark-triple-and-cpu + + By default, llvm-exegesis will analyze the benchmarks for the triple/CPU they + were measured for, but if you want to analyze them for some other combination + (specified via `-mtriple`/`-mcpu`), you can pass this flag. .. option:: --dump-object-to-disk=true diff --git a/llvm/test/tools/llvm-exegesis/AArch64/lit.local.cfg b/llvm/test/tools/llvm-exegesis/AArch64/lit.local.cfg index 392b282..a768323 100644 --- a/llvm/test/tools/llvm-exegesis/AArch64/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/AArch64/lit.local.cfg @@ -1,7 +1,10 @@ import subprocess import lit.util -if not ('AArch64' in config.root.targets): +if 'native' not in config.available_features: + config.unsupported = True + +elif not ('AArch64' in config.root.targets): # We need support for AArch64. config.unsupported = True diff --git a/llvm/test/tools/llvm-exegesis/Mips/lit.local.cfg b/llvm/test/tools/llvm-exegesis/Mips/lit.local.cfg index 6cd33af..766ae9c 100644 --- a/llvm/test/tools/llvm-exegesis/Mips/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/Mips/lit.local.cfg @@ -1,7 +1,10 @@ import subprocess import lit.util -if not ('Mips' in config.root.targets): +if 'native' not in config.available_features: + config.unsupported = True + +elif not ('Mips' in config.root.targets): # We need support for Mips. config.unsupported = True diff --git a/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s index a70a22e3..20e473d 100644 --- a/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/PowerPC/latency-by-opcode-name.s @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=ADD8 | FileCheck %s +# RUN: llvm-exegesis -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 --skip-measurements -mode=latency -opcode-name=ADD8 | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/PowerPC/lit.local.cfg b/llvm/test/tools/llvm-exegesis/PowerPC/lit.local.cfg index 076f218..135f579 100644 --- a/llvm/test/tools/llvm-exegesis/PowerPC/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/PowerPC/lit.local.cfg @@ -1,29 +1,3 @@ -import subprocess -import lit.util - if not ('PowerPC' in config.root.targets): # We need support for PowerPC. config.unsupported = True - -elif not ('powerpc' in config.root.host_triple): - # We need to be running on an PPC host. - config.unsupported = True - -else: - # We need libpfm to be installed and allow reading perf counters. We can - # only know that at runtime, so we try to measure the latency of an empty - # code snippet and bail out on error. - llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir) - if not llvm_exegesis_exe: - print('llvm-exegesis not found') - config.unsupported = True - else: - try: - with open(os.devnull, 'w') as quiet: - check_llvm_exegesis_result = subprocess.call( - [llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) - except OSError: - print('could not exec llvm-exegesis') - config.unsupported = True - if not check_llvm_exegesis_result == 0: - config.unsupported = True diff --git a/llvm/test/tools/llvm-exegesis/PowerPC/unsupported-opcode.s b/llvm/test/tools/llvm-exegesis/PowerPC/unsupported-opcode.s index 67348ef..49b79b8 100644 --- a/llvm/test/tools/llvm-exegesis/PowerPC/unsupported-opcode.s +++ b/llvm/test/tools/llvm-exegesis/PowerPC/unsupported-opcode.s @@ -1,3 +1,3 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=SELECT_I8 2>&1 | FileCheck %s +# RUN: llvm-exegesis -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 --skip-measurements -mode=latency -opcode-name=SELECT_I8 2>&1 | FileCheck %s CHECK: Unsupported opcode: isPseudo/usesCustomInserter diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-CMOV16rm-noreg-deserialization.test b/llvm/test/tools/llvm-exegesis/X86/analysis-CMOV16rm-noreg-deserialization.test new file mode 100644 index 0000000..e86ede8 --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/analysis-CMOV16rm-noreg-deserialization.test @@ -0,0 +1,58 @@ +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s + +# https://bugs.llvm.org/show_bug.cgi?id=41448 +# Verify that deserialization works. Since CMOV16rm has a variant sched class, just printing clusters is sufficient + +--- +mode: uops +key: + instructions: + - 'CMOV16rm AX AX RDI i_0x1 %noreg i_0x0 %noreg i_0x0' + - 'CMOV16rm BP BP RDI i_0x1 %noreg i_0x40 %noreg i_0x0' + - 'CMOV16rm BX BX RDI i_0x1 %noreg i_0x80 %noreg i_0x0' + - 'CMOV16rm CX CX RDI i_0x1 %noreg i_0xc0 %noreg i_0x0' + - 'CMOV16rm DX DX RDI i_0x1 %noreg i_0x100 %noreg i_0x0' + - 'CMOV16rm SI SI RDI i_0x1 %noreg i_0x140 %noreg i_0x0' + - 'CMOV16rm R8W R8W RDI i_0x1 %noreg i_0x180 %noreg i_0x0' + - 'CMOV16rm R9W R9W RDI i_0x1 %noreg i_0x1c0 %noreg i_0x0' + - 'CMOV16rm R10W R10W RDI i_0x1 %noreg i_0x200 %noreg i_0x0' + - 'CMOV16rm R11W R11W RDI i_0x1 %noreg i_0x240 %noreg i_0x0' + - 'CMOV16rm R12W R12W RDI i_0x1 %noreg i_0x280 %noreg i_0x0' + - 'CMOV16rm R13W R13W RDI i_0x1 %noreg i_0x2c0 %noreg i_0x0' + - 'CMOV16rm R14W R14W RDI i_0x1 %noreg i_0x300 %noreg i_0x0' + - 'CMOV16rm R15W R15W RDI i_0x1 %noreg i_0x340 %noreg i_0x0' + config: '' + register_initial_values: + - 'AX=0x0' + - 'EFLAGS=0x0' + - 'BP=0x0' + - 'BX=0x0' + - 'CX=0x0' + - 'DX=0x0' + - 'SI=0x0' + - 'R8W=0x0' + - 'R9W=0x0' + - 'R10W=0x0' + - 'R11W=0x0' + - 'R12W=0x0' + - 'R13W=0x0' + - 'R14W=0x0' + - 'R15W=0x0' +cpu_name: znver3 +llvm_triple: x86_64-pc-linux-gnu +num_repetitions: 10000 +measurements: + - { key: Zn3Int, value: 1.0161, per_snippet_value: 14.2254 } + - { key: Zn3FPU, value: 0, per_snippet_value: 0 } + - { key: Zn3Load, value: 1.003, per_snippet_value: 14.042 } + - { key: Zn3Store, value: 0.0023, per_snippet_value: 0.0322 } + - { key: Zn3Divider, value: 0, per_snippet_value: 0 } + - { key: NumMicroOps, value: 1.0146, per_snippet_value: 14.2044 } +error: '' +info: instruction has tied variables, using static renaming. +assembled_snippet: 5541574156415541545366B800004883EC08C7042400000000C7442404000000009D66BD000066BB000066B9000066BA000066BE00006641B800006641B900006641BA00006641BB00006641BC00006641BD00006641BE00006641BF0000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF40030000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF40030000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF40030000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF400300005B415C415D415E415F5DC3 +... + + +# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class, +# CHECK-CLUSTERS-NEXT: {{^}}0, diff --git a/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test b/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test index 9c8eec0..3c3af2e 100644 --- a/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test +++ b/llvm/test/tools/llvm-exegesis/X86/analysis-naive-clusterization-same-opcode-different-sched-class.test @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mcpu=znver3 -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s +# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s # Naive clusterization mainly groups by instruction opcode, # but it should also partition the benchmarks of the same opcode diff --git a/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/inverse_throughput-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/inverse_throughput-by-opcode-name.s index 61c4280..49c020b 100644 --- a/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/inverse_throughput-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/inverse_throughput-by-opcode-name.s @@ -1,9 +1,8 @@ -# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=inverse_throughput --skip-measurements -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=inverse_throughput --skip-measurements -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: inverse_throughput CHECK-NEXT: key: CHECK-NEXT: instructions: CHECK-NEXT: ADD32rr -CHECK: key: inverse_throughput diff --git a/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/lit.local.cfg deleted file mode 100644 index c11fe88..0000000 --- a/llvm/test/tools/llvm-exegesis/X86/inverse_throughput/lit.local.cfg +++ /dev/null @@ -1,29 +0,0 @@ -import subprocess -import lit.util - -if not ('X86' in config.root.targets): - # We need support for X86. - config.unsupported = True - -elif not ('x86_64' in config.root.host_triple): - # We need to be running on an X86 host. - config.unsupported = True - -else: - # We need libpfm to be installed and allow reading perf counters. We can - # only know that at runtime, so we try to measure the latency of an empty - # code snippet and bail out on error. - llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir) - if not llvm_exegesis_exe: - print('llvm-exegesis not found') - config.unsupported = True - else: - try: - with open(os.devnull, 'w') as quiet: - check_llvm_exegesis_inverse_throughput_result = subprocess.call( - [llvm_exegesis_exe, '-mode', 'inverse_throughput', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) - except OSError: - print('could not exec llvm-exegesis') - config.unsupported = True - if not check_llvm_exegesis_inverse_throughput_result == 0: - config.unsupported = True diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s index 5def574..9f6a548 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-CMOV32rr.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=CMOV32rr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=latency -opcode-name=CMOV32rr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=CMOV32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=CMOV32rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s index c57b61a..4a1e66f 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-IN16rr.s @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=IN16rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=IN16rr -repetition-mode=duplicate | FileCheck %s # FIXME: Sometimes fails with: 'unimplemented operand type' # ALLOW_RETRIES: 2 diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64_32r.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64_32r.s index 40b6db9..82c2c315 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64_32r.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64_32r.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64_32r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s -# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64_32r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64_32r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64_32r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64r.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64r.s index e5d6db2..1115118 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64r.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-LEA64r.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s -# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s index 4ac8330..8be38e5 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SBB8rr.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SETCCr-cond-codes-sweep.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SETCCr-cond-codes-sweep.s index 3b17ffd..d60df84 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SETCCr-cond-codes-sweep.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SETCCr-cond-codes-sweep.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=SETCCr --max-configs-per-opcode=1 | FileCheck %s --check-prefix=CHECK -# RUN: llvm-exegesis -mode=latency -opcode-name=SETCCr --max-configs-per-opcode=256 | FileCheck %s --check-prefix=SWEEP +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SETCCr --max-configs-per-opcode=1 | FileCheck %s --check-prefix=CHECK +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SETCCr --max-configs-per-opcode=256 | FileCheck %s --check-prefix=SWEEP CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s index 1908b9a..5c607c1 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-SQRTSSr.s @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=SQRTSSr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SQRTSSr -repetition-mode=loop | FileCheck %s # Check that the setup code for MXCSR does not crash the snippet. diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s index f69f263..3121f64 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/X86/latency/latency-by-opcode-name.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/latency/lit.local.cfg deleted file mode 100644 index 994810e..0000000 --- a/llvm/test/tools/llvm-exegesis/X86/latency/lit.local.cfg +++ /dev/null @@ -1,29 +0,0 @@ -import subprocess -import lit.util - -if not ('X86' in config.root.targets): - # We need support for X86. - config.unsupported = True - -elif not ('x86_64' in config.root.host_triple): - # We need to be running on an X86 host. - config.unsupported = True - -else: - # We need libpfm to be installed and allow reading perf counters. We can - # only know that at runtime, so we try to measure the latency of an empty - # code snippet and bail out on error. - llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir) - if not llvm_exegesis_exe: - print('llvm-exegesis not found') - config.unsupported = True - else: - try: - with open(os.devnull, 'w') as quiet: - check_llvm_exegesis_latency_result = subprocess.call( - [llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) - except OSError: - print('could not exec llvm-exegesis') - config.unsupported = True - if not check_llvm_exegesis_latency_result == 0: - config.unsupported = True diff --git a/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test b/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test index a980e85..98e95f0 100644 --- a/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test +++ b/llvm/test/tools/llvm-exegesis/X86/latency/max-configs.test @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=1 | FileCheck -check-prefixes=CHECK,CHECK1 %s -# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=2 | FileCheck -check-prefixes=CHECK,CHECK2 %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -max-configs-per-opcode=1 | FileCheck -check-prefixes=CHECK,CHECK1 %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -max-configs-per-opcode=2 | FileCheck -check-prefixes=CHECK,CHECK2 %s CHECK: --- CHECK-NEXT: mode: latency diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att b/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att index 8f85b39..9b2314f 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/Inputs/mov_add.att @@ -1,4 +1,4 @@ -# LLVM-EXEGESIS-LIVEIN RDI -# LLVM-EXEGESIS-DEFREG XMM1 42 +# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-LIVEIN RDI +# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-DEFREG XMM1 42 movq $2, %rdi -addq $0x10, %rdi \ No newline at end of file +addq $0x10, %rdi diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg index 14d4476..a188cb2 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/lit.local.cfg @@ -1,15 +1,18 @@ import subprocess import lit.util -if not ('X86' in config.root.targets): +if 'native' not in config.available_features: + config.unsupported = True + +elif not ('X86' in config.root.targets): # We need support for X86. config.unsupported = True elif not ('x86_64' in config.root.host_triple): # We need to be running on an X86 host. config.unsupported = True - -else: + +else: # We need libpfm to be installed and the host to be support LBR format with cycles. llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir) if not llvm_exegesis_exe: diff --git a/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s b/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s index 5f72e8f..6f0762b 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s +++ b/llvm/test/tools/llvm-exegesis/X86/lbr/mov-add.s @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mode=latency --repetition-mode=loop --x86-lbr-sample-period=521 --snippets-file=%p/Inputs/mov_add.att +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --repetition-mode=loop --x86-lbr-sample-period=521 --snippets-file=%p/Inputs/mov_add.att CHECK: --- diff --git a/llvm/test/tools/llvm-exegesis/X86/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/lit.local.cfg index 6be98bd..7230f35 100644 --- a/llvm/test/tools/llvm-exegesis/X86/lit.local.cfg +++ b/llvm/test/tools/llvm-exegesis/X86/lit.local.cfg @@ -1,7 +1,3 @@ if not ('X86' in config.root.targets): # We need support for X86. config.unsupported = True - -elif not ('x86_64' in config.root.host_triple): - # We need to be running on an X86 host. - config.unsupported = True \ No newline at end of file diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/lit.local.cfg b/llvm/test/tools/llvm-exegesis/X86/uops/lit.local.cfg deleted file mode 100644 index d3a8f3f..0000000 --- a/llvm/test/tools/llvm-exegesis/X86/uops/lit.local.cfg +++ /dev/null @@ -1,29 +0,0 @@ -import subprocess -import lit.util - -if not ('X86' in config.root.targets): - # We need support for X86. - config.unsupported = True - -elif not ('x86_64' in config.root.host_triple): - # We need to be running on an X86 host. - config.unsupported = True - -else: - # We need libpfm to be installed and allow reading perf counters. We can - # only know that at runtime, so we try to measure the latency of an empty - # code snippet and bail out on error. - llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir) - if not llvm_exegesis_exe: - print('llvm-exegesis not found') - config.unsupported = True - else: - try: - with open(os.devnull, 'w') as quiet: - check_llvm_exegesis_uops_result = subprocess.call( - [llvm_exegesis_exe, '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet) - except OSError: - print('could not exec llvm-exegesis') - config.unsupported = True - if not check_llvm_exegesis_uops_result == 0: - config.unsupported = True \ No newline at end of file diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mi8.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mi8.s index 6aef789..c32777c 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mi8.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mi8.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mi8 -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mi8 -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mi8 -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mi8 -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mr.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mr.s index 0c3628f..e0d9c239 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mr.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32mr.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32rm.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32rm.s index b63c6e7..5d8a43d 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32rm.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD32rm.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rm -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rm -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD_F32m.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD_F32m.s index 023fa78..3b9caa5 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD_F32m.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-ADD_F32m.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD_F32m -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD_F32m -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD_F32m -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD_F32m -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-BEXTR32rm.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-BEXTR32rm.s index a33023c..f7c899f 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-BEXTR32rm.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-BEXTR32rm.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=BEXTR32rm -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=BEXTR32rm -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BEXTR32rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BEXTR32rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-BSF16rm.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-BSF16rm.s index fdf99ff..bd79a70 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-BSF16rm.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-BSF16rm.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=BSF16rm -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=BSF16rm -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BSF16rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BSF16rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-BTR64mr.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-BTR64mr.s index da622e4..8c89a8e 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-BTR64mr.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-BTR64mr.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=BTR64mr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=BTR64mr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BTR64mr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BTR64mr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s new file mode 100644 index 0000000..1b0fe8c --- /dev/null +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg-serialization.s @@ -0,0 +1,11 @@ +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=CMOV16rm -benchmarks-file=- | FileCheck %s -check-prefixes=CHECK-YAML + +# https://bugs.llvm.org/show_bug.cgi?id=41448 +# Verify that we correctly serialize RegNo 0 as %noreg, not as an empty string! + +CHECK-YAML: --- +CHECK-YAML-NEXT: mode: uops +CHECK-YAML-NEXT: key: +CHECK-YAML-NEXT: instructions: +CHECK-YAML-NEXT: - 'CMOV16rm {{[A-Z0-9]+}} {{[A-Z0-9]+}} {{[A-Z0-9]+}} i_0x1 %noreg i_0x0 %noreg i_0x{{[0-9a-f]}}' +CHECK-YAML-LAST: ... diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg.s deleted file mode 100644 index 3fc1f31..0000000 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-CMOV16rm-noreg.s +++ /dev/null @@ -1,17 +0,0 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=CMOV16rm -benchmarks-file=%t.CMOV16rm-uops.yaml -# RUN: FileCheck -check-prefixes=CHECK-YAML -input-file=%t.CMOV16rm-uops.yaml %s -# RUN: llvm-exegesis -mcpu=bdver2 -mode=analysis -benchmarks-file=%t.CMOV16rm-uops.yaml -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s - -# https://bugs.llvm.org/show_bug.cgi?id=41448 -# 1. Verify that we correctly serialize RegNo 0 as %noreg, not as an empty string! -# 2. Verify that deserialization works. Since CMOV16rm has a variant sched class, just printing clusters is sufficient - -CHECK-YAML: --- -CHECK-YAML-NEXT: mode: uops -CHECK-YAML-NEXT: key: -CHECK-YAML-NEXT: instructions: -CHECK-YAML-NEXT: - 'CMOV16rm {{[A-Z0-9]+}} {{[A-Z0-9]+}} {{[A-Z0-9]+}} i_0x1 %noreg i_0x0 %noreg i_0x{{[0-9a-f]}}' -CHECK-YAML-LAST: ... - -# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class, -# CHECK-CLUSTERS-NEXT: {{^}}0, diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-FLDENVm.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-FLDENVm.s index be182d8..07aeed1 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-FLDENVm.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-FLDENVm.s @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=FLDENVm,FLDL2E -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=FLDENVm,FLDL2E -repetition-mode=duplicate | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-LEA64r.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-LEA64r.s index 68dde0a..057b156 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-LEA64r.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-LEA64r.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s CHECK: --- CHECK-NEXT: mode: uops diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-POPCNT32rr.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-POPCNT32rr.s index 67d6524..f59d5c2 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-POPCNT32rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-POPCNT32rr.s @@ -1,4 +1,4 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=POPCNT32rr 2>&1 | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=POPCNT32rr 2>&1 | FileCheck %s CHECK: --- CHECK-NEXT: mode: uops diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-STD.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-STD.s index c67beed..0757db9 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-STD.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-STD.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=STD -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=STD -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=STD -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=STD -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-VFMADDSS4rm.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-VFMADDSS4rm.s index 9044fbb..b4b5172 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-VFMADDSS4rm.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-VFMADDSS4rm.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=VFMADDSS4rm -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=VFMADDSS4rm -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=VFMADDSS4rm -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=VFMADDSS4rm -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-XCHG64rr.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-XCHG64rr.s index c73b2bc..ad4868c 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-XCHG64rr.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-XCHG64rr.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=XCHG64rr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=XCHG64rr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=XCHG64rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=XCHG64rr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-by-opcode-name.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-by-opcode-name.s index 5d13f20..c751ef5 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-by-opcode-name.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-by-opcode-name.s @@ -1,5 +1,5 @@ -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s -# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s +# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s CHECK: mode: uops CHECK-NEXT: key: diff --git a/llvm/test/tools/llvm-exegesis/X86/uops/uops-misspelled-div.s b/llvm/test/tools/llvm-exegesis/X86/uops/uops-misspelled-div.s index 05e8f4d..5e4a304 100644 --- a/llvm/test/tools/llvm-exegesis/X86/uops/uops-misspelled-div.s +++ b/llvm/test/tools/llvm-exegesis/X86/uops/uops-misspelled-div.s @@ -1,7 +1,7 @@ -# RUN: not llvm-exegesis -mode=uops -snippets-file=%s 2>&1 | FileCheck %s +# RUN: not llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -snippets-file=%s 2>&1 | FileCheck %s -# LLVM-EXEGESIS-DEFREG CL 1 -# LLVM-EXEGESIS-DEFREG AX 1 +# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-DEFREG CL 1 +# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-DEFREG AX 1 div8r cl CHECK: error: invalid instruction mnemonic 'div8r' diff --git a/llvm/test/tools/llvm-exegesis/lit.local.cfg b/llvm/test/tools/llvm-exegesis/lit.local.cfg deleted file mode 100644 index b2fc141..0000000 --- a/llvm/test/tools/llvm-exegesis/lit.local.cfg +++ /dev/null @@ -1,2 +0,0 @@ -if 'native' not in config.available_features: - config.unsupported = True diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp index 03e7ccc..70e3911 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.cpp @@ -30,8 +30,11 @@ namespace llvm { namespace exegesis { BenchmarkRunner::BenchmarkRunner(const LLVMState &State, - InstructionBenchmark::ModeE Mode) - : State(State), Mode(Mode), Scratch(std::make_unique()) {} + InstructionBenchmark::ModeE Mode, + bool BenchmarkSkipMeasurements) + : State(State), Mode(Mode), + BenchmarkSkipMeasurements(BenchmarkSkipMeasurements), + Scratch(std::make_unique()) {} BenchmarkRunner::~BenchmarkRunner() = default; @@ -213,6 +216,12 @@ Expected BenchmarkRunner::runConfiguration( ObjectFile = getObjectFromBuffer(OS.str()); } + if (BenchmarkSkipMeasurements) { + InstrBenchmark.Error = + "in --skip-measurements mode, actual measurements skipped."; + continue; + } + const FunctionExecutorImpl Executor(State, std::move(ObjectFile), Scratch.get()); auto NewMeasurements = runMeasurements(Executor); diff --git a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h index b66902e..870105d 100644 --- a/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/BenchmarkRunner.h @@ -35,7 +35,8 @@ namespace exegesis { class BenchmarkRunner { public: explicit BenchmarkRunner(const LLVMState &State, - InstructionBenchmark::ModeE Mode); + InstructionBenchmark::ModeE Mode, + bool BenchmarkSkipMeasurements); virtual ~BenchmarkRunner(); @@ -77,6 +78,7 @@ public: protected: const LLVMState &State; const InstructionBenchmark::ModeE Mode; + const bool BenchmarkSkipMeasurements; private: virtual Expected> diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp index 6cdefb8..e837bb1 100644 --- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp +++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.cpp @@ -20,8 +20,9 @@ namespace exegesis { LatencyBenchmarkRunner::LatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode, + bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE ResultAgg) - : BenchmarkRunner(State, Mode) { + : BenchmarkRunner(State, Mode, BenchmarkSkipMeasurements) { assert((Mode == InstructionBenchmark::Latency || Mode == InstructionBenchmark::InverseThroughput) && "invalid mode"); diff --git a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h index b9b9efc..942cc80 100644 --- a/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/LatencyBenchmarkRunner.h @@ -23,6 +23,7 @@ class LatencyBenchmarkRunner : public BenchmarkRunner { public: LatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode, + bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE ResultAggMode); ~LatencyBenchmarkRunner() override; diff --git a/llvm/tools/llvm-exegesis/lib/LlvmState.cpp b/llvm/tools/llvm-exegesis/lib/LlvmState.cpp index 7a4770d..5239e82 100644 --- a/llvm/tools/llvm-exegesis/lib/LlvmState.cpp +++ b/llvm/tools/llvm-exegesis/lib/LlvmState.cpp @@ -22,33 +22,55 @@ namespace llvm { namespace exegesis { -Expected LLVMState::Create(std::string Triple, std::string CpuName, +Expected LLVMState::Create(std::string TripleName, + std::string CpuName, const StringRef Features) { - if (Triple.empty()) - Triple = sys::getProcessTriple(); - if (CpuName.empty()) - CpuName = sys::getHostCPUName().str(); + if (TripleName.empty()) + TripleName = Triple::normalize(sys::getDefaultTargetTriple()); + + Triple TheTriple(TripleName); + + // Get the target specific parser. std::string Error; - const Target *const TheTarget = TargetRegistry::lookupTarget(Triple, Error); + const Target *TheTarget = + TargetRegistry::lookupTarget(/*MArch=*/"", TheTriple, Error); if (!TheTarget) { - return llvm::make_error( - "no LLVM target for triple " + Triple, llvm::inconvertibleErrorCode()); + return llvm::make_error("no LLVM target for triple " + + TripleName, + llvm::inconvertibleErrorCode()); + } + + // Update Triple with the updated triple from the target lookup. + TripleName = TheTriple.str(); + + if (CpuName == "native") + CpuName = std::string(llvm::sys::getHostCPUName()); + + std::unique_ptr STI( + TheTarget->createMCSubtargetInfo(TripleName, CpuName, "")); + assert(STI && "Unable to create subtarget info!"); + if (!STI->isCPUStringValid(CpuName)) { + return llvm::make_error(Twine("invalid CPU name (") + .concat(CpuName) + .concat(") for triple ") + .concat(TripleName), + llvm::inconvertibleErrorCode()); } const TargetOptions Options; std::unique_ptr TM( static_cast(TheTarget->createTargetMachine( - Triple, CpuName, Features, Options, Reloc::Model::Static))); + TripleName, CpuName, Features, Options, Reloc::Model::Static))); if (!TM) { return llvm::make_error( "unable to create target machine", llvm::inconvertibleErrorCode()); } const ExegesisTarget *ET = - Triple.empty() ? &ExegesisTarget::getDefault() - : ExegesisTarget::lookup(TM->getTargetTriple()); + TripleName.empty() ? &ExegesisTarget::getDefault() + : ExegesisTarget::lookup(TM->getTargetTriple()); if (!ET) { return llvm::make_error( - "no Exegesis target for triple " + Triple, + "no Exegesis target for triple " + TripleName, llvm::inconvertibleErrorCode()); } return LLVMState(std::move(TM), ET, CpuName); diff --git a/llvm/tools/llvm-exegesis/lib/LlvmState.h b/llvm/tools/llvm-exegesis/lib/LlvmState.h index 55247fe..2651a85 100644 --- a/llvm/tools/llvm-exegesis/lib/LlvmState.h +++ b/llvm/tools/llvm-exegesis/lib/LlvmState.h @@ -39,7 +39,7 @@ public: // If `Triple` is empty, uses the host triple. // If `CpuName` is empty, uses the host CPU. // `Features` is intended for tests. - static Expected Create(std::string Triple, std::string CpuName, + static Expected Create(std::string TripleName, std::string CpuName, StringRef Features = ""); const TargetMachine &getTargetMachine() const { return *TheTargetMachine; } diff --git a/llvm/tools/llvm-exegesis/lib/Target.cpp b/llvm/tools/llvm-exegesis/lib/Target.cpp index 9ff19d5..1ee5b31 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.cpp +++ b/llvm/tools/llvm-exegesis/lib/Target.cpp @@ -70,6 +70,7 @@ std::unique_ptr ExegesisTarget::createSnippetGenerator( Expected> ExegesisTarget::createBenchmarkRunner( InstructionBenchmark::ModeE Mode, const LLVMState &State, + bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE ResultAggMode) const { PfmCountersInfo PfmCounters = State.getPfmCounters(); switch (Mode) { @@ -77,21 +78,29 @@ ExegesisTarget::createBenchmarkRunner( return nullptr; case InstructionBenchmark::Latency: case InstructionBenchmark::InverseThroughput: - if (!PfmCounters.CycleCounter) { + if (!BenchmarkSkipMeasurements && !PfmCounters.CycleCounter) { const char *ModeName = Mode == InstructionBenchmark::Latency ? "latency" : "inverse_throughput"; return make_error( Twine("can't run '") .concat(ModeName) - .concat("' mode, sched model does not define a cycle counter.")); + .concat( + "' mode, sched model does not define a cycle counter. You " + "can pass --skip-measurements to skip the actual " + "benchmarking.")); } - return createLatencyBenchmarkRunner(State, Mode, ResultAggMode); + return createLatencyBenchmarkRunner(State, Mode, BenchmarkSkipMeasurements, + ResultAggMode); case InstructionBenchmark::Uops: - if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters) - return make_error("can't run 'uops' mode, sched model does not " - "define uops or issue counters."); - return createUopsBenchmarkRunner(State, ResultAggMode); + if (!BenchmarkSkipMeasurements && !PfmCounters.UopsCounter && + !PfmCounters.IssueCounters) + return make_error( + "can't run 'uops' mode, sched model does not define uops or issue " + "counters. You can pass --skip-measurements to skip the actual " + "benchmarking."); + return createUopsBenchmarkRunner(State, BenchmarkSkipMeasurements, + ResultAggMode); } return nullptr; } @@ -108,14 +117,17 @@ std::unique_ptr ExegesisTarget::createParallelSnippetGenerator std::unique_ptr ExegesisTarget::createLatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode, + bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE ResultAggMode) const { - return std::make_unique(State, Mode, ResultAggMode); + return std::make_unique( + State, Mode, BenchmarkSkipMeasurements, ResultAggMode); } std::unique_ptr ExegesisTarget::createUopsBenchmarkRunner( - const LLVMState &State, + const LLVMState &State, bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE /*unused*/) const { - return std::make_unique(State); + return std::make_unique(State, + BenchmarkSkipMeasurements); } static_assert(std::is_pod::value, diff --git a/llvm/tools/llvm-exegesis/lib/Target.h b/llvm/tools/llvm-exegesis/lib/Target.h index 28c103aa..9487869 100644 --- a/llvm/tools/llvm-exegesis/lib/Target.h +++ b/llvm/tools/llvm-exegesis/lib/Target.h @@ -155,6 +155,7 @@ public: // Creates a benchmark runner for the given mode. Expected> createBenchmarkRunner( InstructionBenchmark::ModeE Mode, const LLVMState &State, + bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE ResultAggMode = InstructionBenchmark::Min) const; @@ -193,9 +194,10 @@ private: const LLVMState &State, const SnippetGenerator::Options &Opts) const; std::unique_ptr virtual createLatencyBenchmarkRunner( const LLVMState &State, InstructionBenchmark::ModeE Mode, + bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE ResultAggMode) const; std::unique_ptr virtual createUopsBenchmarkRunner( - const LLVMState &State, + const LLVMState &State, bool BenchmarkSkipMeasurements, InstructionBenchmark::ResultAggregationModeE ResultAggMode) const; const ExegesisTarget *Next = nullptr; diff --git a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h index cda74eb..f9e5926 100644 --- a/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h +++ b/llvm/tools/llvm-exegesis/lib/UopsBenchmarkRunner.h @@ -21,8 +21,9 @@ namespace exegesis { class UopsBenchmarkRunner : public BenchmarkRunner { public: - UopsBenchmarkRunner(const LLVMState &State) - : BenchmarkRunner(State, InstructionBenchmark::Uops) {} + UopsBenchmarkRunner(const LLVMState &State, bool BenchmarkSkipMeasurements) + : BenchmarkRunner(State, InstructionBenchmark::Uops, + BenchmarkSkipMeasurements) {} ~UopsBenchmarkRunner() override; static constexpr const size_t kMinNumDifferentAddresses = 6; diff --git a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp index 9f1317d..8da8d6d 100644 --- a/llvm/tools/llvm-exegesis/llvm-exegesis.cpp +++ b/llvm/tools/llvm-exegesis/llvm-exegesis.cpp @@ -35,6 +35,8 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/InitLLVM.h" #include "llvm/Support/Path.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetSelect.h" @@ -87,7 +89,8 @@ static cl::opt BenchmarkMode( static cl::opt ResultAggMode( "result-aggregation-mode", - cl::desc("How to aggregate multi-values result"), cl::cat(Options), + cl::desc("How to aggregate multi-values result"), + cl::cat(BenchmarkOptions), cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min", "Keep min reading"), clEnumValN(exegesis::InstructionBenchmark::Max, "max", @@ -111,6 +114,11 @@ static cl::opt RepetitionMode( "All of the above and take the minimum of measurements")), cl::init(exegesis::InstructionBenchmark::Duplicate)); +static cl::opt BenchmarkSkipMeasurements( + "skip-measurements", + cl::desc("do everything except actually performing the measurements"), + cl::cat(BenchmarkOptions), cl::init(false)); + static cl::opt NumRepetitions("num-repetitions", cl::desc("number of time to repeat the asm snippet"), @@ -177,10 +185,23 @@ static cl::opt AnalysisDisplayUnstableOpcodes( "instead show only such unstable opcodes"), cl::cat(AnalysisOptions), cl::init(false)); -static cl::opt CpuName( - "mcpu", - cl::desc("cpu name to use for pfm counters, leave empty to autodetect"), - cl::cat(Options), cl::init("")); +static cl::opt AnalysisOverrideBenchmarksTripleAndCpu( + "analysis-override-benchmark-triple-and-cpu", + cl::desc("By default, we analyze the benchmarks for the triple/CPU they " + "were measured for, but if you want to analyze them for some " + "other combination (specified via -mtriple/-mcpu), you can " + "pass this flag."), + cl::cat(AnalysisOptions), cl::init(false)); + +static cl::opt + TripleName("mtriple", + cl::desc("Target triple. See -version for available targets"), + cl::cat(Options)); + +static cl::opt + MCPU("mcpu", + cl::desc("Target a specific cpu type (-mcpu=help for details)"), + cl::value_desc("cpu-name"), cl::cat(Options)); static cl::opt DumpObjectToDisk("dump-object-to-disk", @@ -291,28 +312,31 @@ generateSnippets(const LLVMState &State, unsigned Opcode, } void benchmarkMain() { + if (!BenchmarkSkipMeasurements) { #ifndef HAVE_LIBPFM - ExitWithError("benchmarking unavailable, LLVM was built without libpfm."); + ExitWithError( + "benchmarking unavailable, LLVM was built without libpfm. You can pass " + "--skip-measurements to skip the actual benchmarking."); +#else + if (exegesis::pfm::pfmInitialize()) + ExitWithError("cannot initialize libpfm"); #endif + } - if (exegesis::pfm::pfmInitialize()) - ExitWithError("cannot initialize libpfm"); - - InitializeAllTargets(); - InitializeAllTargetMCs(); InitializeAllAsmPrinters(); InitializeAllAsmParsers(); InitializeAllExegesisTargets(); - const LLVMState State = ExitOnErr(LLVMState::Create("", CpuName)); + const LLVMState State = ExitOnErr(LLVMState::Create(TripleName, MCPU)); // Preliminary check to ensure features needed for requested // benchmark mode are present on target CPU and/or OS. - ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); + if (!BenchmarkSkipMeasurements) + ExitOnErr(State.getExegesisTarget().checkFeatureSupport()); const std::unique_ptr Runner = ExitOnErr(State.getExegesisTarget().createBenchmarkRunner( - BenchmarkMode, State, ResultAggMode)); + BenchmarkMode, State, BenchmarkSkipMeasurements, ResultAggMode)); if (!Runner) { ExitWithError("cannot create benchmark runner"); } @@ -411,8 +435,6 @@ static void analysisMain() { "and --analysis-inconsistencies-output-file must be specified"); } - InitializeAllTargets(); - InitializeAllTargetMCs(); InitializeAllAsmPrinters(); InitializeAllDisassemblers(); InitializeAllExegesisTargets(); @@ -433,10 +455,12 @@ static void analysisMain() { "is unsupported."); } auto TripleAndCpu = *TriplesAndCpus.begin(); - if (!CpuName.empty()) { + if (AnalysisOverrideBenchmarksTripleAndCpu) { llvm::errs() << "overridding file CPU name (" << TripleAndCpu.CpuName - << ") with provided CPU name (" << CpuName << ")\n"; - TripleAndCpu.CpuName = CpuName; + << ") with provided tripled (" << TripleName + << ") and CPU name (" << MCPU << ")\n"; + TripleAndCpu.LLVMTriple = TripleName; + TripleAndCpu.CpuName = MCPU; } llvm::errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '" << TripleAndCpu.CpuName << "'\n"; @@ -474,7 +498,24 @@ static void analysisMain() { int main(int Argc, char **Argv) { using namespace llvm; - cl::ParseCommandLineOptions(Argc, Argv, ""); + + InitLLVM X(Argc, Argv); + + // Initialize targets so we can print them when flag --version is specified. + InitializeAllTargetInfos(); + InitializeAllTargets(); + InitializeAllTargetMCs(); + + // Enable printing of available targets when flag --version is specified. + cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion); + + cl::HideUnrelatedOptions({&llvm::exegesis::Options, + &llvm::exegesis::BenchmarkOptions, + &llvm::exegesis::AnalysisOptions}); + + cl::ParseCommandLineOptions(Argc, Argv, + "llvm host machine instruction characteristics " + "measurment and analysis.\n"); exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) { if (Err.isA()) -- 2.7.4