The main goal of this tool is to automatically (in)validate the LLVM's TableDef
scheduling models. To that end, we also provide analysis of the results.
-EXAMPLES: benchmarking
-----------------------
+:program:`llvm-exegesis` can also benchmark arbitrary user-provided code
+snippets.
+
+EXAMPLE 1: benchmarking instructions
+------------------------------------
Assume you have an X86-64 machine. To measure the latency of a single
instruction, run:
FIXME: Provide an :program:`llvm-exegesis` option to test all instructions.
-EXAMPLES: analysis
-----------------------
+
+EXAMPLE 2: benchmarking a custom code snippet
+---------------------------------------------
+
+To measure the latency/uops of a custom piece of code, you can specify the
+`snippets-file` option (`-` reads from standard input).
+
+.. code-block:: bash
+
+ $ echo "vzeroupper" | llvm-exegesis -mode=uops -snippets-file=-
+
+Real-life code snippets typically depend on registers or memory.
+:program:`llvm-exegesis` checks the liveliness of registers (i.e. any register
+use has a corresponding def or is a "live in"). If your code depends on the
+value of some registers, you have two options:
+ - Mark the register as requiring a definition. :program:`llvm-exegesis` will
+ automatically assign a value to the register. This can be done using the
+ directive `LLVM-EXEGESIS-DEFREG <reg name> <hex_value>`, where `<hex_value>`
+ is a bit pattern used to fill `<reg_name>`. If `<hex_value>` is smaller than
+ the register width, it will be sign-extended.
+ - Mark the register as a "live in". :program:`llvm-exegesis` will benchmark
+ using whatever value was in this registers on entry. This can be done using
+ the directive `LLVM-EXEGESIS-LIVEIN <reg name>`.
+
+For example, the following code snippet depends on the values of XMM1 (which
+will be set by the tool) and the memory buffer passed in RDI (live in).
+
+.. code-block:: none
+
+ # LLVM-EXEGESIS-LIVEIN RDI
+ # LLVM-EXEGESIS-DEFREG XMM1 42
+ vmulps (%rdi), %xmm1, %xmm2
+ vhaddps %xmm2, %xmm2, %xmm3
+ addq $0x10, %rdi
+
+
+EXAMPLE 3: analysis
+-------------------
Assuming you have a set of benchmarked instructions (either latency or uops) as
YAML in file `/tmp/benchmarks.yaml`, you can analyze the results using the
.. option:: -opcode-index=<LLVM opcode index>
- Specify the opcode to measure, by index.
- Either `opcode-index` or `opcode-name` must be set.
+ Specify the opcode to measure, by index. See example 1 for details.
+ Either `opcode-index`, `opcode-name` or `snippets-file` must be set.
.. option:: -opcode-name=<LLVM opcode name>
- Specify the opcode to measure, by name.
- Either `opcode-index` or `opcode-name` must be set.
+ Specify the opcode to measure, by name. See example 1 for details.
+ Either `opcode-index`, `opcode-name` or `snippets-file` must be set.
+
+ .. option:: -snippets-file=<filename>
+
+ Specify the custom code snippet to measure. See example 2 for details.
+ Either `opcode-index`, `opcode-name` or `snippets-file` must be set.
.. option:: -mode=[latency|uops|analysis]
// Reserves some space on the stack, fills it with the content of the provided
// constant and provide methods to load the stack value into a register.
struct ConstantInliner {
- explicit ConstantInliner(const llvm::APInt &Constant)
- : StackSize(Constant.getBitWidth() / 8) {
- assert(Constant.getBitWidth() % 8 == 0 && "Must be a multiple of 8");
- add(allocateStackSpace(StackSize));
- size_t ByteOffset = 0;
- for (; StackSize - ByteOffset >= 4; ByteOffset += 4)
- add(fillStackSpace(
- llvm::X86::MOV32mi, ByteOffset,
- Constant.extractBits(32, ByteOffset * 8).getZExtValue()));
- if (StackSize - ByteOffset >= 2) {
- add(fillStackSpace(
- llvm::X86::MOV16mi, ByteOffset,
- Constant.extractBits(16, ByteOffset * 8).getZExtValue()));
- ByteOffset += 2;
- }
- if (StackSize - ByteOffset >= 1)
- add(fillStackSpace(
- llvm::X86::MOV8mi, ByteOffset,
- Constant.extractBits(8, ByteOffset * 8).getZExtValue()));
- }
+ explicit ConstantInliner(const llvm::APInt &Constant) : Constant_(Constant) {}
std::vector<llvm::MCInst> loadAndFinalize(unsigned Reg, unsigned RegBitWidth,
unsigned Opcode) {
- assert(StackSize * 8 == RegBitWidth &&
- "Value does not have the correct size");
+ assert((RegBitWidth & 7) == 0 &&
+ "RegBitWidth must be a multiple of 8 bits");
+ initStack(RegBitWidth / 8);
add(loadToReg(Reg, Opcode));
- add(releaseStackSpace(StackSize));
+ add(releaseStackSpace(RegBitWidth / 8));
return std::move(Instructions);
}
std::vector<llvm::MCInst>
loadX87AndFinalize(unsigned Reg, unsigned RegBitWidth, unsigned Opcode) {
- assert(StackSize * 8 == RegBitWidth &&
- "Value does not have the correct size");
+ assert((RegBitWidth & 7) == 0 &&
+ "RegBitWidth must be a multiple of 8 bits");
+ initStack(RegBitWidth / 8);
add(llvm::MCInstBuilder(Opcode)
.addReg(llvm::X86::RSP) // BaseReg
.addImm(1) // ScaleAmt
.addReg(0)); // Segment
if (Reg != llvm::X86::ST0)
add(llvm::MCInstBuilder(llvm::X86::ST_Frr).addReg(Reg));
- add(releaseStackSpace(StackSize));
+ add(releaseStackSpace(RegBitWidth / 8));
return std::move(Instructions);
}
std::vector<llvm::MCInst> popFlagAndFinalize() {
- assert(StackSize * 8 == 64 && "Value does not have the correct size");
+ initStack(8);
add(llvm::MCInstBuilder(llvm::X86::POPF64));
return std::move(Instructions);
}
return *this;
}
- const size_t StackSize;
+ void initStack(unsigned Bytes) {
+ assert(Constant_.getBitWidth() <= Bytes * 8 &&
+ "Value does not have the correct size");
+ const llvm::APInt WideConstant = Constant_.getBitWidth() < Bytes * 8
+ ? Constant_.sext(Bytes * 8)
+ : Constant_;
+ add(allocateStackSpace(Bytes));
+ size_t ByteOffset = 0;
+ for (; Bytes - ByteOffset >= 4; ByteOffset += 4)
+ add(fillStackSpace(
+ llvm::X86::MOV32mi, ByteOffset,
+ WideConstant.extractBits(32, ByteOffset * 8).getZExtValue()));
+ if (Bytes - ByteOffset >= 2) {
+ add(fillStackSpace(
+ llvm::X86::MOV16mi, ByteOffset,
+ WideConstant.extractBits(16, ByteOffset * 8).getZExtValue()));
+ ByteOffset += 2;
+ }
+ if (Bytes - ByteOffset >= 1)
+ add(fillStackSpace(
+ llvm::X86::MOV8mi, ByteOffset,
+ WideConstant.extractBits(8, ByteOffset * 8).getZExtValue()));
+ }
+
+ llvm::APInt Constant_;
std::vector<llvm::MCInst> Instructions;
};
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCInstBuilder.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCParser/MCAsmParser.h"
+#include "llvm/MC/MCParser/MCTargetAsmParser.h"
#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/Object/ObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/Path.h"
+#include "llvm/Support/SourceMgr.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/TargetSelect.h"
#include <algorithm>
llvm::cl::init(""));
static llvm::cl::opt<std::string>
+ SnippetsFile("snippets-file", llvm::cl::desc("code snippets to measure"),
+ llvm::cl::init(""));
+
+static llvm::cl::opt<std::string>
BenchmarkFile("benchmarks-file", llvm::cl::desc(""), llvm::cl::init(""));
static llvm::cl::opt<exegesis::InstructionBenchmark::ModeE> BenchmarkMode(
void LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET();
#endif
-static unsigned GetOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) {
- if (OpcodeName.empty() && (OpcodeIndex == 0))
+// Checks that only one of OpcodeName, OpcodeIndex or SnippetsFile is provided,
+// and returns the opcode index or 0 if snippets should be read from
+// `SnippetsFile`.
+static unsigned getOpcodeOrDie(const llvm::MCInstrInfo &MCInstrInfo) {
+ const size_t NumSetFlags = (OpcodeName.empty() ? 0 : 1) +
+ (OpcodeIndex == 0 ? 0 : 1) +
+ (SnippetsFile.empty() ? 0 : 1);
+ if (NumSetFlags != 1)
llvm::report_fatal_error(
- "please provide one and only one of 'opcode-index' or 'opcode-name'");
+ "please provide one and only one of 'opcode-index', 'opcode-name' or "
+ "'snippets-file'");
+ if (!SnippetsFile.empty())
+ return 0;
if (OpcodeIndex > 0)
return OpcodeIndex;
// Resolve opcode name -> opcode.
}
// Generates code snippets for opcode `Opcode`.
-llvm::Expected<std::vector<BenchmarkCode>>
+static llvm::Expected<std::vector<BenchmarkCode>>
generateSnippets(const LLVMState &State, unsigned Opcode) {
const std::unique_ptr<SnippetGenerator> Generator =
State.getExegesisTarget().createSnippetGenerator(BenchmarkMode, State);
- if (!Generator) {
+ if (!Generator)
llvm::report_fatal_error("cannot create snippet generator");
- }
const llvm::MCInstrDesc &InstrDesc = State.getInstrInfo().get(Opcode);
// Ignore instructions that we cannot run.
return Generator->generateConfigurations(Opcode);
}
+namespace {
+
+// An MCStreamer that reads a BenchmarkCode definition from a file.
+// The BenchmarkCode definition is just an asm file, with additional comments to
+// specify which registers should be defined or are live on entry.
+class BenchmarkCodeStreamer : public llvm::MCStreamer,
+ public llvm::AsmCommentConsumer {
+public:
+ explicit BenchmarkCodeStreamer(llvm::MCContext *Context,
+ const llvm::MCRegisterInfo *TheRegInfo,
+ BenchmarkCode *Result)
+ : llvm::MCStreamer(*Context), RegInfo(TheRegInfo), Result(Result) {}
+
+ // Implementation of the llvm::MCStreamer interface. We only care about
+ // instructions.
+ void EmitInstruction(const llvm::MCInst &instruction,
+ const llvm::MCSubtargetInfo &mc_subtarget_info,
+ bool PrintSchedInfo) override {
+ Result->Instructions.push_back(instruction);
+ }
+
+ // Implementation of the llvm::AsmCommentConsumer.
+ void HandleComment(llvm::SMLoc Loc, llvm::StringRef CommentText) override {
+ CommentText = CommentText.trim();
+ if (!CommentText.consume_front("LLVM-EXEGESIS-"))
+ return;
+ if (CommentText.consume_front("DEFREG")) {
+ // LLVM-EXEGESIS-DEFREF <reg> <hex_value>
+ RegisterValue RegVal;
+ llvm::SmallVector<llvm::StringRef, 2> Parts;
+ CommentText.split(Parts, ' ', /*unlimited splits*/ -1,
+ /*do not keep empty strings*/ false);
+ if (Parts.size() != 2) {
+ llvm::errs() << "invalid comment 'LLVM-EXEGESIS-DEFREG " << CommentText
+ << "\n";
+ ++InvalidComments;
+ }
+ if (!(RegVal.Register = findRegisterByName(Parts[0].trim()))) {
+ llvm::errs() << "unknown register in 'LLVM-EXEGESIS-DEFREG "
+ << CommentText << "\n";
+ ++InvalidComments;
+ return;
+ }
+ const llvm::StringRef HexValue = Parts[1].trim();
+ RegVal.Value = llvm::APInt(
+ /* each hex digit is 4 bits */ HexValue.size() * 4, HexValue, 16);
+ Result->RegisterInitialValues.push_back(std::move(RegVal));
+ return;
+ }
+ if (CommentText.consume_front("LIVEIN")) {
+ // LLVM-EXEGESIS-LIVEIN <reg>
+ if (unsigned Reg = findRegisterByName(CommentText.ltrim()))
+ Result->LiveIns.push_back(Reg);
+ else {
+ llvm::errs() << "unknown register in 'LLVM-EXEGESIS-LIVEIN "
+ << CommentText << "\n";
+ ++InvalidComments;
+ }
+ return;
+ }
+ }
+
+ unsigned numInvalidComments() const { return InvalidComments; }
+
+private:
+ // We only care about instructions, we don't implement this part of the API.
+ void EmitCommonSymbol(llvm::MCSymbol *symbol, uint64_t size,
+ unsigned byte_alignment) override {}
+ bool EmitSymbolAttribute(llvm::MCSymbol *symbol,
+ llvm::MCSymbolAttr attribute) override {
+ return false;
+ }
+ void EmitValueToAlignment(unsigned byte_alignment, int64_t value,
+ unsigned value_size,
+ unsigned max_bytes_to_emit) override {}
+ void EmitZerofill(llvm::MCSection *section, llvm::MCSymbol *symbol,
+ uint64_t size, unsigned byte_alignment,
+ llvm::SMLoc Loc) override {}
+
+ unsigned findRegisterByName(const llvm::StringRef RegName) const {
+ // FIXME: Can we do better than this ?
+ for (unsigned I = 0, E = RegInfo->getNumRegs(); I < E; ++I) {
+ if (RegName == RegInfo->getName(I))
+ return I;
+ }
+ llvm::errs() << "'" << RegName
+ << "' is not a valid register name for the target\n";
+ return 0;
+ }
+
+ const llvm::MCRegisterInfo *const RegInfo;
+ BenchmarkCode *const Result;
+ unsigned InvalidComments = 0;
+};
+
+} // namespace
+
+// Reads code snippets from file `Filename`.
+static llvm::Expected<std::vector<BenchmarkCode>>
+readSnippets(const LLVMState &State, llvm::StringRef Filename) {
+ llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> BufferPtr =
+ llvm::MemoryBuffer::getFileOrSTDIN(Filename);
+ if (std::error_code EC = BufferPtr.getError()) {
+ return llvm::make_error<BenchmarkFailure>(
+ "cannot read snippet: " + Filename + ": " + EC.message());
+ }
+ llvm::SourceMgr SM;
+ SM.AddNewSourceBuffer(std::move(BufferPtr.get()), llvm::SMLoc());
+
+ BenchmarkCode Result;
+
+ llvm::MCObjectFileInfo ObjectFileInfo;
+ const llvm::TargetMachine &TM = State.getTargetMachine();
+ llvm::MCContext Context(TM.getMCAsmInfo(), TM.getMCRegisterInfo(),
+ &ObjectFileInfo);
+ ObjectFileInfo.InitMCObjectFileInfo(TM.getTargetTriple(), /*PIC*/ false,
+ Context);
+ BenchmarkCodeStreamer Streamer(&Context, TM.getMCRegisterInfo(), &Result);
+ const std::unique_ptr<llvm::MCAsmParser> AsmParser(
+ llvm::createMCAsmParser(SM, Context, Streamer, *TM.getMCAsmInfo()));
+ if (!AsmParser)
+ return llvm::make_error<BenchmarkFailure>("cannot create asm parser");
+ AsmParser->getLexer().setCommentConsumer(&Streamer);
+
+ const std::unique_ptr<llvm::MCTargetAsmParser> TargetAsmParser(
+ TM.getTarget().createMCAsmParser(*TM.getMCSubtargetInfo(), *AsmParser,
+ *TM.getMCInstrInfo(),
+ llvm::MCTargetOptions()));
+
+ if (!TargetAsmParser)
+ return llvm::make_error<BenchmarkFailure>(
+ "cannot create target asm parser");
+ AsmParser->setTargetParser(*TargetAsmParser);
+
+ if (AsmParser->Run(false))
+ return llvm::make_error<BenchmarkFailure>("cannot parse asm file");
+ if (Streamer.numInvalidComments())
+ return llvm::make_error<BenchmarkFailure>(
+ llvm::Twine("found ")
+ .concat(llvm::Twine(Streamer.numInvalidComments()))
+ .concat(" invalid LLVM-EXEGESIS comments"));
+ return std::vector<BenchmarkCode>{std::move(Result)};
+}
+
void benchmarkMain() {
if (exegesis::pfm::pfmInitialize())
llvm::report_fatal_error("cannot initialize libpfm");
llvm::InitializeNativeTarget();
llvm::InitializeNativeTargetAsmPrinter();
+ llvm::InitializeNativeTargetAsmParser();
#ifdef LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET
LLVM_EXEGESIS_INITIALIZE_NATIVE_TARGET();
#endif
const LLVMState State;
- const auto Opcode = GetOpcodeOrDie(State.getInstrInfo());
-
- // Ignore instructions without a sched class if -ignore-invalid-sched-class is
- // passed.
- if (IgnoreInvalidSchedClass &&
- State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
- llvm::errs() << "ignoring instruction without sched class\n";
- return;
+ const auto Opcode = getOpcodeOrDie(State.getInstrInfo());
+
+ std::vector<BenchmarkCode> Configurations;
+ if (Opcode > 0) {
+ // Ignore instructions without a sched class if -ignore-invalid-sched-class
+ // is passed.
+ if (IgnoreInvalidSchedClass &&
+ State.getInstrInfo().get(Opcode).getSchedClass() == 0) {
+ llvm::errs() << "ignoring instruction without sched class\n";
+ return;
+ }
+ Configurations = ExitOnErr(generateSnippets(State, Opcode));
+ } else {
+ Configurations = ExitOnErr(readSnippets(State, SnippetsFile));
}
- // FIXME: Allow arbitrary code.
- const std::vector<BenchmarkCode> Configurations =
- ExitOnErr(generateSnippets(State, Opcode));
-
const std::unique_ptr<BenchmarkRunner> Runner =
State.getExegesisTarget().createBenchmarkRunner(BenchmarkMode, State);
if (!Runner) {