generates a code snippet that makes execution as serial (resp. as parallel) as
possible so that we can measure the latency (resp. inverse throughput/uop decomposition)
of the instruction.
-The code snippet is jitted and executed on the host subtarget. The time taken
-(resp. resource usage) is measured using hardware performance counters. The
-result is printed out as YAML to the standard output.
+The code snippet is jitted and, unless requested not to, executed on the
+host subtarget. The time taken (resp. resource usage) is measured using
+hardware performance counters. The result is printed out as YAML
+to the standard output.
The main goal of this tool is to automatically (in)validate the LLVM's TableDef
scheduling models. To that end, we also provide analysis of the results.
In `analysis` mode, you also need to specify at least one of the
`-analysis-clusters-output-file=` and `-analysis-inconsistencies-output-file=`.
+.. option:: --skip-measurements
+
+ By default, when `-mode=` is specified, the generated snippet will be executed
+ and measured, and that requires that we are running on the hardware for which
+ the snippet was generated, and that supports performance measurements.
+ But sometimes, you just want to generate snippets, and this is exactly what
+ this options allows one to do.
+
.. option:: -x86-lbr-sample-period=<nBranches/sample>
Specify the LBR sampling period - how many branches before we take a sample.
If set, ignore instructions that do not have a sched class (class idx = 0).
+.. option:: -mtriple=<triple name>
+
+ Target triple. See `-version` for available targets.
+
.. option:: -mcpu=<cpu name>
If set, measure the cpu characteristics using the counters for this CPU. This
is useful when creating new sched models (the host CPU is unknown to LLVM).
+ (`-mcpu=help` for details)
+
+.. option:: --analysis-override-benchmark-triple-and-cpu
+
+ By default, llvm-exegesis will analyze the benchmarks for the triple/CPU they
+ were measured for, but if you want to analyze them for some other combination
+ (specified via `-mtriple`/`-mcpu`), you can pass this flag.
.. option:: --dump-object-to-disk=true
import subprocess
import lit.util
-if not ('AArch64' in config.root.targets):
+if 'native' not in config.available_features:
+ config.unsupported = True
+
+elif not ('AArch64' in config.root.targets):
# We need support for AArch64.
config.unsupported = True
import subprocess
import lit.util
-if not ('Mips' in config.root.targets):
+if 'native' not in config.available_features:
+ config.unsupported = True
+
+elif not ('Mips' in config.root.targets):
# We need support for Mips.
config.unsupported = True
-# RUN: llvm-exegesis -mode=latency -opcode-name=ADD8 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 --skip-measurements -mode=latency -opcode-name=ADD8 | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: latency
-import subprocess
-import lit.util
-
if not ('PowerPC' in config.root.targets):
# We need support for PowerPC.
config.unsupported = True
-
-elif not ('powerpc' in config.root.host_triple):
- # We need to be running on an PPC host.
- config.unsupported = True
-
-else:
- # We need libpfm to be installed and allow reading perf counters. We can
- # only know that at runtime, so we try to measure the latency of an empty
- # code snippet and bail out on error.
- llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
- if not llvm_exegesis_exe:
- print('llvm-exegesis not found')
- config.unsupported = True
- else:
- try:
- with open(os.devnull, 'w') as quiet:
- check_llvm_exegesis_result = subprocess.call(
- [llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
- except OSError:
- print('could not exec llvm-exegesis')
- config.unsupported = True
- if not check_llvm_exegesis_result == 0:
- config.unsupported = True
-# RUN: llvm-exegesis -mode=latency -opcode-name=SELECT_I8 2>&1 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 --skip-measurements -mode=latency -opcode-name=SELECT_I8 2>&1 | FileCheck %s
CHECK: Unsupported opcode: isPseudo/usesCustomInserter
--- /dev/null
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
+
+# https://bugs.llvm.org/show_bug.cgi?id=41448
+# Verify that deserialization works. Since CMOV16rm has a variant sched class, just printing clusters is sufficient
+
+---
+mode: uops
+key:
+ instructions:
+ - 'CMOV16rm AX AX RDI i_0x1 %noreg i_0x0 %noreg i_0x0'
+ - 'CMOV16rm BP BP RDI i_0x1 %noreg i_0x40 %noreg i_0x0'
+ - 'CMOV16rm BX BX RDI i_0x1 %noreg i_0x80 %noreg i_0x0'
+ - 'CMOV16rm CX CX RDI i_0x1 %noreg i_0xc0 %noreg i_0x0'
+ - 'CMOV16rm DX DX RDI i_0x1 %noreg i_0x100 %noreg i_0x0'
+ - 'CMOV16rm SI SI RDI i_0x1 %noreg i_0x140 %noreg i_0x0'
+ - 'CMOV16rm R8W R8W RDI i_0x1 %noreg i_0x180 %noreg i_0x0'
+ - 'CMOV16rm R9W R9W RDI i_0x1 %noreg i_0x1c0 %noreg i_0x0'
+ - 'CMOV16rm R10W R10W RDI i_0x1 %noreg i_0x200 %noreg i_0x0'
+ - 'CMOV16rm R11W R11W RDI i_0x1 %noreg i_0x240 %noreg i_0x0'
+ - 'CMOV16rm R12W R12W RDI i_0x1 %noreg i_0x280 %noreg i_0x0'
+ - 'CMOV16rm R13W R13W RDI i_0x1 %noreg i_0x2c0 %noreg i_0x0'
+ - 'CMOV16rm R14W R14W RDI i_0x1 %noreg i_0x300 %noreg i_0x0'
+ - 'CMOV16rm R15W R15W RDI i_0x1 %noreg i_0x340 %noreg i_0x0'
+ config: ''
+ register_initial_values:
+ - 'AX=0x0'
+ - 'EFLAGS=0x0'
+ - 'BP=0x0'
+ - 'BX=0x0'
+ - 'CX=0x0'
+ - 'DX=0x0'
+ - 'SI=0x0'
+ - 'R8W=0x0'
+ - 'R9W=0x0'
+ - 'R10W=0x0'
+ - 'R11W=0x0'
+ - 'R12W=0x0'
+ - 'R13W=0x0'
+ - 'R14W=0x0'
+ - 'R15W=0x0'
+cpu_name: znver3
+llvm_triple: x86_64-pc-linux-gnu
+num_repetitions: 10000
+measurements:
+ - { key: Zn3Int, value: 1.0161, per_snippet_value: 14.2254 }
+ - { key: Zn3FPU, value: 0, per_snippet_value: 0 }
+ - { key: Zn3Load, value: 1.003, per_snippet_value: 14.042 }
+ - { key: Zn3Store, value: 0.0023, per_snippet_value: 0.0322 }
+ - { key: Zn3Divider, value: 0, per_snippet_value: 0 }
+ - { key: NumMicroOps, value: 1.0146, per_snippet_value: 14.2044 }
+error: ''
+info: instruction has tied variables, using static renaming.
+assembled_snippet: 5541574156415541545366B800004883EC08C7042400000000C7442404000000009D66BD000066BB000066B9000066BA000066BE00006641B800006641B900006641BA00006641BB00006641BC00006641BD00006641BE00006641BF0000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF40030000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF40030000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF40030000660F4007660F406F40660F409F80000000660F408FC0000000660F409700010000660F40B74001000066440F40878001000066440F408FC001000066440F40970002000066440F409F4002000066440F40A78002000066440F40AFC002000066440F40B70003000066440F40BF400300005B415C415D415E415F5DC3
+...
+
+
+# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,
+# CHECK-CLUSTERS-NEXT: {{^}}0,
-# RUN: llvm-exegesis -mcpu=znver3 -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
+# RUN: llvm-exegesis -mode=analysis -benchmarks-file=%s -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
# Naive clusterization mainly groups by instruction opcode,
# but it should also partition the benchmarks of the same opcode
-# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=inverse_throughput -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=inverse_throughput --skip-measurements -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=inverse_throughput --skip-measurements -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: inverse_throughput
CHECK-NEXT: key:
CHECK-NEXT: instructions:
CHECK-NEXT: ADD32rr
-CHECK: key: inverse_throughput
+++ /dev/null
-import subprocess
-import lit.util
-
-if not ('X86' in config.root.targets):
- # We need support for X86.
- config.unsupported = True
-
-elif not ('x86_64' in config.root.host_triple):
- # We need to be running on an X86 host.
- config.unsupported = True
-
-else:
- # We need libpfm to be installed and allow reading perf counters. We can
- # only know that at runtime, so we try to measure the latency of an empty
- # code snippet and bail out on error.
- llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
- if not llvm_exegesis_exe:
- print('llvm-exegesis not found')
- config.unsupported = True
- else:
- try:
- with open(os.devnull, 'w') as quiet:
- check_llvm_exegesis_inverse_throughput_result = subprocess.call(
- [llvm_exegesis_exe, '-mode', 'inverse_throughput', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
- except OSError:
- print('could not exec llvm-exegesis')
- config.unsupported = True
- if not check_llvm_exegesis_inverse_throughput_result == 0:
- config.unsupported = True
-# RUN: llvm-exegesis -mode=latency -opcode-name=CMOV32rr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=latency -opcode-name=CMOV32rr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=CMOV32rr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=CMOV32rr -repetition-mode=loop | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: latency
-# RUN: llvm-exegesis -mode=latency -opcode-name=IN16rr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=IN16rr -repetition-mode=duplicate | FileCheck %s
# FIXME: Sometimes fails with: 'unimplemented operand type'
# ALLOW_RETRIES: 2
-# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64_32r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
-# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64_32r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64_32r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64_32r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: latency
-# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
-# RUN: llvm-exegesis -mode=latency -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: latency
-# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -repetition-mode=loop | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: latency
-# RUN: llvm-exegesis -mode=latency -opcode-name=SETCCr --max-configs-per-opcode=1 | FileCheck %s --check-prefix=CHECK
-# RUN: llvm-exegesis -mode=latency -opcode-name=SETCCr --max-configs-per-opcode=256 | FileCheck %s --check-prefix=SWEEP
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SETCCr --max-configs-per-opcode=1 | FileCheck %s --check-prefix=CHECK
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SETCCr --max-configs-per-opcode=256 | FileCheck %s --check-prefix=SWEEP
CHECK: ---
CHECK-NEXT: mode: latency
-# RUN: llvm-exegesis -mode=latency -opcode-name=SQRTSSr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SQRTSSr -repetition-mode=loop | FileCheck %s
# Check that the setup code for MXCSR does not crash the snippet.
-# RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=latency -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: latency
+++ /dev/null
-import subprocess
-import lit.util
-
-if not ('X86' in config.root.targets):
- # We need support for X86.
- config.unsupported = True
-
-elif not ('x86_64' in config.root.host_triple):
- # We need to be running on an X86 host.
- config.unsupported = True
-
-else:
- # We need libpfm to be installed and allow reading perf counters. We can
- # only know that at runtime, so we try to measure the latency of an empty
- # code snippet and bail out on error.
- llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
- if not llvm_exegesis_exe:
- print('llvm-exegesis not found')
- config.unsupported = True
- else:
- try:
- with open(os.devnull, 'w') as quiet:
- check_llvm_exegesis_latency_result = subprocess.call(
- [llvm_exegesis_exe, '-mode', 'latency', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
- except OSError:
- print('could not exec llvm-exegesis')
- config.unsupported = True
- if not check_llvm_exegesis_latency_result == 0:
- config.unsupported = True
-# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=1 | FileCheck -check-prefixes=CHECK,CHECK1 %s
-# RUN: llvm-exegesis -mode=latency -opcode-name=SBB8rr -max-configs-per-opcode=2 | FileCheck -check-prefixes=CHECK,CHECK2 %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -max-configs-per-opcode=1 | FileCheck -check-prefixes=CHECK,CHECK1 %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --skip-measurements -opcode-name=SBB8rr -max-configs-per-opcode=2 | FileCheck -check-prefixes=CHECK,CHECK2 %s
CHECK: ---
CHECK-NEXT: mode: latency
-# LLVM-EXEGESIS-LIVEIN RDI
-# LLVM-EXEGESIS-DEFREG XMM1 42
+# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-LIVEIN RDI
+# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-DEFREG XMM1 42
movq $2, %rdi
-addq $0x10, %rdi
\ No newline at end of file
+addq $0x10, %rdi
import subprocess
import lit.util
-if not ('X86' in config.root.targets):
+if 'native' not in config.available_features:
+ config.unsupported = True
+
+elif not ('X86' in config.root.targets):
# We need support for X86.
config.unsupported = True
elif not ('x86_64' in config.root.host_triple):
# We need to be running on an X86 host.
config.unsupported = True
-
-else:
+
+else:
# We need libpfm to be installed and the host to be support LBR format with cycles.
llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
if not llvm_exegesis_exe:
-# RUN: llvm-exegesis -mode=latency --repetition-mode=loop --x86-lbr-sample-period=521 --snippets-file=%p/Inputs/mov_add.att
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=latency --repetition-mode=loop --x86-lbr-sample-period=521 --snippets-file=%p/Inputs/mov_add.att
CHECK: ---
if not ('X86' in config.root.targets):
# We need support for X86.
config.unsupported = True
-
-elif not ('x86_64' in config.root.host_triple):
- # We need to be running on an X86 host.
- config.unsupported = True
\ No newline at end of file
+++ /dev/null
-import subprocess
-import lit.util
-
-if not ('X86' in config.root.targets):
- # We need support for X86.
- config.unsupported = True
-
-elif not ('x86_64' in config.root.host_triple):
- # We need to be running on an X86 host.
- config.unsupported = True
-
-else:
- # We need libpfm to be installed and allow reading perf counters. We can
- # only know that at runtime, so we try to measure the latency of an empty
- # code snippet and bail out on error.
- llvm_exegesis_exe = lit.util.which('llvm-exegesis', config.llvm_tools_dir)
- if not llvm_exegesis_exe:
- print('llvm-exegesis not found')
- config.unsupported = True
- else:
- try:
- with open(os.devnull, 'w') as quiet:
- check_llvm_exegesis_uops_result = subprocess.call(
- [llvm_exegesis_exe, '-mode', 'uops', '-snippets-file', '/dev/null'], stdout=quiet, stderr=quiet)
- except OSError:
- print('could not exec llvm-exegesis')
- config.unsupported = True
- if not check_llvm_exegesis_uops_result == 0:
- config.unsupported = True
\ No newline at end of file
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mi8 -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mi8 -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mi8 -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mi8 -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32mr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32mr -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rm -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rm -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rm -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rm -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD_F32m -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD_F32m -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD_F32m -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD_F32m -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=BEXTR32rm -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=BEXTR32rm -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BEXTR32rm -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BEXTR32rm -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=BSF16rm -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=BSF16rm -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BSF16rm -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BSF16rm -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=BTR64mr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=BTR64mr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BTR64mr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=BTR64mr -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
--- /dev/null
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=CMOV16rm -benchmarks-file=- | FileCheck %s -check-prefixes=CHECK-YAML
+
+# https://bugs.llvm.org/show_bug.cgi?id=41448
+# Verify that we correctly serialize RegNo 0 as %noreg, not as an empty string!
+
+CHECK-YAML: ---
+CHECK-YAML-NEXT: mode: uops
+CHECK-YAML-NEXT: key:
+CHECK-YAML-NEXT: instructions:
+CHECK-YAML-NEXT: - 'CMOV16rm {{[A-Z0-9]+}} {{[A-Z0-9]+}} {{[A-Z0-9]+}} i_0x1 %noreg i_0x0 %noreg i_0x{{[0-9a-f]}}'
+CHECK-YAML-LAST: ...
+++ /dev/null
-# RUN: llvm-exegesis -mode=uops -opcode-name=CMOV16rm -benchmarks-file=%t.CMOV16rm-uops.yaml
-# RUN: FileCheck -check-prefixes=CHECK-YAML -input-file=%t.CMOV16rm-uops.yaml %s
-# RUN: llvm-exegesis -mcpu=bdver2 -mode=analysis -benchmarks-file=%t.CMOV16rm-uops.yaml -analysis-clusters-output-file=- -analysis-clustering-epsilon=0.1 -analysis-inconsistency-epsilon=0.1 -analysis-numpoints=1 -analysis-clustering=naive | FileCheck -check-prefixes=CHECK-CLUSTERS %s
-
-# https://bugs.llvm.org/show_bug.cgi?id=41448
-# 1. Verify that we correctly serialize RegNo 0 as %noreg, not as an empty string!
-# 2. Verify that deserialization works. Since CMOV16rm has a variant sched class, just printing clusters is sufficient
-
-CHECK-YAML: ---
-CHECK-YAML-NEXT: mode: uops
-CHECK-YAML-NEXT: key:
-CHECK-YAML-NEXT: instructions:
-CHECK-YAML-NEXT: - 'CMOV16rm {{[A-Z0-9]+}} {{[A-Z0-9]+}} {{[A-Z0-9]+}} i_0x1 %noreg i_0x0 %noreg i_0x{{[0-9a-f]}}'
-CHECK-YAML-LAST: ...
-
-# CHECK-CLUSTERS: {{^}}cluster_id,opcode_name,config,sched_class,
-# CHECK-CLUSTERS-NEXT: {{^}}0,
-# RUN: llvm-exegesis -mode=uops -opcode-name=FLDENVm,FLDL2E -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=FLDENVm,FLDL2E -repetition-mode=duplicate | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=LEA64r -repetition-mode=duplicate -max-configs-per-opcode=2 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=LEA64r -repetition-mode=loop -max-configs-per-opcode=2 | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: uops
-# RUN: llvm-exegesis -mode=uops -opcode-name=POPCNT32rr 2>&1 | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=POPCNT32rr 2>&1 | FileCheck %s
CHECK: ---
CHECK-NEXT: mode: uops
-# RUN: llvm-exegesis -mode=uops -opcode-name=STD -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=STD -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=STD -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=STD -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=VFMADDSS4rm -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=VFMADDSS4rm -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=VFMADDSS4rm -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=VFMADDSS4rm -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=XCHG64rr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=XCHG64rr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=XCHG64rr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=XCHG64rr -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s
-# RUN: llvm-exegesis -mode=uops -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rr -repetition-mode=duplicate | FileCheck %s
+# RUN: llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -opcode-name=ADD32rr -repetition-mode=loop | FileCheck %s
CHECK: mode: uops
CHECK-NEXT: key:
-# RUN: not llvm-exegesis -mode=uops -snippets-file=%s 2>&1 | FileCheck %s
+# RUN: not llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mode=uops --skip-measurements -snippets-file=%s 2>&1 | FileCheck %s
-# LLVM-EXEGESIS-DEFREG CL 1
-# LLVM-EXEGESIS-DEFREG AX 1
+# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-DEFREG CL 1
+# llvm-exegesis -mtriple=x86_64-unknown-unknown -mcpu=x86-64-DEFREG AX 1
div8r cl
CHECK: error: invalid instruction mnemonic 'div8r'
+++ /dev/null
-if 'native' not in config.available_features:
- config.unsupported = True
namespace exegesis {
BenchmarkRunner::BenchmarkRunner(const LLVMState &State,
- InstructionBenchmark::ModeE Mode)
- : State(State), Mode(Mode), Scratch(std::make_unique<ScratchSpace>()) {}
+ InstructionBenchmark::ModeE Mode,
+ bool BenchmarkSkipMeasurements)
+ : State(State), Mode(Mode),
+ BenchmarkSkipMeasurements(BenchmarkSkipMeasurements),
+ Scratch(std::make_unique<ScratchSpace>()) {}
BenchmarkRunner::~BenchmarkRunner() = default;
ObjectFile = getObjectFromBuffer(OS.str());
}
+ if (BenchmarkSkipMeasurements) {
+ InstrBenchmark.Error =
+ "in --skip-measurements mode, actual measurements skipped.";
+ continue;
+ }
+
const FunctionExecutorImpl Executor(State, std::move(ObjectFile),
Scratch.get());
auto NewMeasurements = runMeasurements(Executor);
class BenchmarkRunner {
public:
explicit BenchmarkRunner(const LLVMState &State,
- InstructionBenchmark::ModeE Mode);
+ InstructionBenchmark::ModeE Mode,
+ bool BenchmarkSkipMeasurements);
virtual ~BenchmarkRunner();
protected:
const LLVMState &State;
const InstructionBenchmark::ModeE Mode;
+ const bool BenchmarkSkipMeasurements;
private:
virtual Expected<std::vector<BenchmarkMeasure>>
LatencyBenchmarkRunner::LatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE ResultAgg)
- : BenchmarkRunner(State, Mode) {
+ : BenchmarkRunner(State, Mode, BenchmarkSkipMeasurements) {
assert((Mode == InstructionBenchmark::Latency ||
Mode == InstructionBenchmark::InverseThroughput) &&
"invalid mode");
public:
LatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE ResultAggMode);
~LatencyBenchmarkRunner() override;
namespace llvm {
namespace exegesis {
-Expected<LLVMState> LLVMState::Create(std::string Triple, std::string CpuName,
+Expected<LLVMState> LLVMState::Create(std::string TripleName,
+ std::string CpuName,
const StringRef Features) {
- if (Triple.empty())
- Triple = sys::getProcessTriple();
- if (CpuName.empty())
- CpuName = sys::getHostCPUName().str();
+ if (TripleName.empty())
+ TripleName = Triple::normalize(sys::getDefaultTargetTriple());
+
+ Triple TheTriple(TripleName);
+
+ // Get the target specific parser.
std::string Error;
- const Target *const TheTarget = TargetRegistry::lookupTarget(Triple, Error);
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(/*MArch=*/"", TheTriple, Error);
if (!TheTarget) {
- return llvm::make_error<llvm::StringError>(
- "no LLVM target for triple " + Triple, llvm::inconvertibleErrorCode());
+ return llvm::make_error<llvm::StringError>("no LLVM target for triple " +
+ TripleName,
+ llvm::inconvertibleErrorCode());
+ }
+
+ // Update Triple with the updated triple from the target lookup.
+ TripleName = TheTriple.str();
+
+ if (CpuName == "native")
+ CpuName = std::string(llvm::sys::getHostCPUName());
+
+ std::unique_ptr<MCSubtargetInfo> STI(
+ TheTarget->createMCSubtargetInfo(TripleName, CpuName, ""));
+ assert(STI && "Unable to create subtarget info!");
+ if (!STI->isCPUStringValid(CpuName)) {
+ return llvm::make_error<llvm::StringError>(Twine("invalid CPU name (")
+ .concat(CpuName)
+ .concat(") for triple ")
+ .concat(TripleName),
+ llvm::inconvertibleErrorCode());
}
const TargetOptions Options;
std::unique_ptr<const TargetMachine> TM(
static_cast<LLVMTargetMachine *>(TheTarget->createTargetMachine(
- Triple, CpuName, Features, Options, Reloc::Model::Static)));
+ TripleName, CpuName, Features, Options, Reloc::Model::Static)));
if (!TM) {
return llvm::make_error<llvm::StringError>(
"unable to create target machine", llvm::inconvertibleErrorCode());
}
const ExegesisTarget *ET =
- Triple.empty() ? &ExegesisTarget::getDefault()
- : ExegesisTarget::lookup(TM->getTargetTriple());
+ TripleName.empty() ? &ExegesisTarget::getDefault()
+ : ExegesisTarget::lookup(TM->getTargetTriple());
if (!ET) {
return llvm::make_error<llvm::StringError>(
- "no Exegesis target for triple " + Triple,
+ "no Exegesis target for triple " + TripleName,
llvm::inconvertibleErrorCode());
}
return LLVMState(std::move(TM), ET, CpuName);
// If `Triple` is empty, uses the host triple.
// If `CpuName` is empty, uses the host CPU.
// `Features` is intended for tests.
- static Expected<LLVMState> Create(std::string Triple, std::string CpuName,
+ static Expected<LLVMState> Create(std::string TripleName, std::string CpuName,
StringRef Features = "");
const TargetMachine &getTargetMachine() const { return *TheTargetMachine; }
Expected<std::unique_ptr<BenchmarkRunner>>
ExegesisTarget::createBenchmarkRunner(
InstructionBenchmark::ModeE Mode, const LLVMState &State,
+ bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
PfmCountersInfo PfmCounters = State.getPfmCounters();
switch (Mode) {
return nullptr;
case InstructionBenchmark::Latency:
case InstructionBenchmark::InverseThroughput:
- if (!PfmCounters.CycleCounter) {
+ if (!BenchmarkSkipMeasurements && !PfmCounters.CycleCounter) {
const char *ModeName = Mode == InstructionBenchmark::Latency
? "latency"
: "inverse_throughput";
return make_error<Failure>(
Twine("can't run '")
.concat(ModeName)
- .concat("' mode, sched model does not define a cycle counter."));
+ .concat(
+ "' mode, sched model does not define a cycle counter. You "
+ "can pass --skip-measurements to skip the actual "
+ "benchmarking."));
}
- return createLatencyBenchmarkRunner(State, Mode, ResultAggMode);
+ return createLatencyBenchmarkRunner(State, Mode, BenchmarkSkipMeasurements,
+ ResultAggMode);
case InstructionBenchmark::Uops:
- if (!PfmCounters.UopsCounter && !PfmCounters.IssueCounters)
- return make_error<Failure>("can't run 'uops' mode, sched model does not "
- "define uops or issue counters.");
- return createUopsBenchmarkRunner(State, ResultAggMode);
+ if (!BenchmarkSkipMeasurements && !PfmCounters.UopsCounter &&
+ !PfmCounters.IssueCounters)
+ return make_error<Failure>(
+ "can't run 'uops' mode, sched model does not define uops or issue "
+ "counters. You can pass --skip-measurements to skip the actual "
+ "benchmarking.");
+ return createUopsBenchmarkRunner(State, BenchmarkSkipMeasurements,
+ ResultAggMode);
}
return nullptr;
}
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createLatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const {
- return std::make_unique<LatencyBenchmarkRunner>(State, Mode, ResultAggMode);
+ return std::make_unique<LatencyBenchmarkRunner>(
+ State, Mode, BenchmarkSkipMeasurements, ResultAggMode);
}
std::unique_ptr<BenchmarkRunner> ExegesisTarget::createUopsBenchmarkRunner(
- const LLVMState &State,
+ const LLVMState &State, bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE /*unused*/) const {
- return std::make_unique<UopsBenchmarkRunner>(State);
+ return std::make_unique<UopsBenchmarkRunner>(State,
+ BenchmarkSkipMeasurements);
}
static_assert(std::is_pod<PfmCountersInfo>::value,
// Creates a benchmark runner for the given mode.
Expected<std::unique_ptr<BenchmarkRunner>> createBenchmarkRunner(
InstructionBenchmark::ModeE Mode, const LLVMState &State,
+ bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE ResultAggMode =
InstructionBenchmark::Min) const;
const LLVMState &State, const SnippetGenerator::Options &Opts) const;
std::unique_ptr<BenchmarkRunner> virtual createLatencyBenchmarkRunner(
const LLVMState &State, InstructionBenchmark::ModeE Mode,
+ bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
std::unique_ptr<BenchmarkRunner> virtual createUopsBenchmarkRunner(
- const LLVMState &State,
+ const LLVMState &State, bool BenchmarkSkipMeasurements,
InstructionBenchmark::ResultAggregationModeE ResultAggMode) const;
const ExegesisTarget *Next = nullptr;
class UopsBenchmarkRunner : public BenchmarkRunner {
public:
- UopsBenchmarkRunner(const LLVMState &State)
- : BenchmarkRunner(State, InstructionBenchmark::Uops) {}
+ UopsBenchmarkRunner(const LLVMState &State, bool BenchmarkSkipMeasurements)
+ : BenchmarkRunner(State, InstructionBenchmark::Uops,
+ BenchmarkSkipMeasurements) {}
~UopsBenchmarkRunner() override;
static constexpr const size_t kMinNumDifferentAddresses = 6;
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Format.h"
+#include "llvm/Support/Host.h"
+#include "llvm/Support/InitLLVM.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetSelect.h"
static cl::opt<exegesis::InstructionBenchmark::ResultAggregationModeE>
ResultAggMode(
"result-aggregation-mode",
- cl::desc("How to aggregate multi-values result"), cl::cat(Options),
+ cl::desc("How to aggregate multi-values result"),
+ cl::cat(BenchmarkOptions),
cl::values(clEnumValN(exegesis::InstructionBenchmark::Min, "min",
"Keep min reading"),
clEnumValN(exegesis::InstructionBenchmark::Max, "max",
"All of the above and take the minimum of measurements")),
cl::init(exegesis::InstructionBenchmark::Duplicate));
+static cl::opt<bool> BenchmarkSkipMeasurements(
+ "skip-measurements",
+ cl::desc("do everything except actually performing the measurements"),
+ cl::cat(BenchmarkOptions), cl::init(false));
+
static cl::opt<unsigned>
NumRepetitions("num-repetitions",
cl::desc("number of time to repeat the asm snippet"),
"instead show only such unstable opcodes"),
cl::cat(AnalysisOptions), cl::init(false));
-static cl::opt<std::string> CpuName(
- "mcpu",
- cl::desc("cpu name to use for pfm counters, leave empty to autodetect"),
- cl::cat(Options), cl::init(""));
+static cl::opt<bool> AnalysisOverrideBenchmarksTripleAndCpu(
+ "analysis-override-benchmark-triple-and-cpu",
+ cl::desc("By default, we analyze the benchmarks for the triple/CPU they "
+ "were measured for, but if you want to analyze them for some "
+ "other combination (specified via -mtriple/-mcpu), you can "
+ "pass this flag."),
+ cl::cat(AnalysisOptions), cl::init(false));
+
+static cl::opt<std::string>
+ TripleName("mtriple",
+ cl::desc("Target triple. See -version for available targets"),
+ cl::cat(Options));
+
+static cl::opt<std::string>
+ MCPU("mcpu",
+ cl::desc("Target a specific cpu type (-mcpu=help for details)"),
+ cl::value_desc("cpu-name"), cl::cat(Options));
static cl::opt<bool>
DumpObjectToDisk("dump-object-to-disk",
}
void benchmarkMain() {
+ if (!BenchmarkSkipMeasurements) {
#ifndef HAVE_LIBPFM
- ExitWithError("benchmarking unavailable, LLVM was built without libpfm.");
+ ExitWithError(
+ "benchmarking unavailable, LLVM was built without libpfm. You can pass "
+ "--skip-measurements to skip the actual benchmarking.");
+#else
+ if (exegesis::pfm::pfmInitialize())
+ ExitWithError("cannot initialize libpfm");
#endif
+ }
- if (exegesis::pfm::pfmInitialize())
- ExitWithError("cannot initialize libpfm");
-
- InitializeAllTargets();
- InitializeAllTargetMCs();
InitializeAllAsmPrinters();
InitializeAllAsmParsers();
InitializeAllExegesisTargets();
- const LLVMState State = ExitOnErr(LLVMState::Create("", CpuName));
+ const LLVMState State = ExitOnErr(LLVMState::Create(TripleName, MCPU));
// Preliminary check to ensure features needed for requested
// benchmark mode are present on target CPU and/or OS.
- ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
+ if (!BenchmarkSkipMeasurements)
+ ExitOnErr(State.getExegesisTarget().checkFeatureSupport());
const std::unique_ptr<BenchmarkRunner> Runner =
ExitOnErr(State.getExegesisTarget().createBenchmarkRunner(
- BenchmarkMode, State, ResultAggMode));
+ BenchmarkMode, State, BenchmarkSkipMeasurements, ResultAggMode));
if (!Runner) {
ExitWithError("cannot create benchmark runner");
}
"and --analysis-inconsistencies-output-file must be specified");
}
- InitializeAllTargets();
- InitializeAllTargetMCs();
InitializeAllAsmPrinters();
InitializeAllDisassemblers();
InitializeAllExegesisTargets();
"is unsupported.");
}
auto TripleAndCpu = *TriplesAndCpus.begin();
- if (!CpuName.empty()) {
+ if (AnalysisOverrideBenchmarksTripleAndCpu) {
llvm::errs() << "overridding file CPU name (" << TripleAndCpu.CpuName
- << ") with provided CPU name (" << CpuName << ")\n";
- TripleAndCpu.CpuName = CpuName;
+ << ") with provided tripled (" << TripleName
+ << ") and CPU name (" << MCPU << ")\n";
+ TripleAndCpu.LLVMTriple = TripleName;
+ TripleAndCpu.CpuName = MCPU;
}
llvm::errs() << "using Triple '" << TripleAndCpu.LLVMTriple << "' and CPU '"
<< TripleAndCpu.CpuName << "'\n";
int main(int Argc, char **Argv) {
using namespace llvm;
- cl::ParseCommandLineOptions(Argc, Argv, "");
+
+ InitLLVM X(Argc, Argv);
+
+ // Initialize targets so we can print them when flag --version is specified.
+ InitializeAllTargetInfos();
+ InitializeAllTargets();
+ InitializeAllTargetMCs();
+
+ // Enable printing of available targets when flag --version is specified.
+ cl::AddExtraVersionPrinter(TargetRegistry::printRegisteredTargetsForVersion);
+
+ cl::HideUnrelatedOptions({&llvm::exegesis::Options,
+ &llvm::exegesis::BenchmarkOptions,
+ &llvm::exegesis::AnalysisOptions});
+
+ cl::ParseCommandLineOptions(Argc, Argv,
+ "llvm host machine instruction characteristics "
+ "measurment and analysis.\n");
exegesis::ExitOnErr.setExitCodeMapper([](const Error &Err) {
if (Err.isA<exegesis::ClusteringError>())