return Size;
}
- /// Verify that assembling instruction \p Inst results in the same sequence of
- /// bytes as \p Encoding.
- bool validateEncoding(const MCInst &Instruction,
- ArrayRef<uint8_t> Encoding) const;
+ /// Validate that disassembling the \p Sequence of bytes into an instruction
+ /// and assembling the instruction again, results in a byte sequence identical
+ /// to the original one.
+ bool validateInstructionEncoding(ArrayRef<uint8_t> Sequence) const;
/// Return a function execution count threshold for determining whether
/// the function is 'hot'. Consider it hot if count is above the average exec
return std::make_pair(HotSize, ColdSize);
}
-bool BinaryContext::validateEncoding(const MCInst &Inst,
- ArrayRef<uint8_t> InputEncoding) const {
+bool BinaryContext::validateInstructionEncoding(
+ ArrayRef<uint8_t> InputSequence) const {
+ MCInst Inst;
+ uint64_t InstSize;
+ DisAsm->getInstruction(Inst, InstSize, InputSequence, 0, nulls());
+ assert(InstSize == InputSequence.size() &&
+ "Disassembled instruction size does not match the sequence.");
+
SmallString<256> Code;
SmallVector<MCFixup, 4> Fixups;
raw_svector_ostream VecOS(Code);
MCE->encodeInstruction(Inst, VecOS, Fixups, *STI);
- auto EncodedData = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
- if (InputEncoding != EncodedData) {
+ auto OutputSequence = ArrayRef<uint8_t>((uint8_t *)Code.data(), Code.size());
+ if (InputSequence != OutputSequence) {
if (opts::Verbosity > 1) {
errs() << "BOLT-WARNING: mismatched encoding detected\n"
- << " input: " << InputEncoding << '\n'
- << " output: " << EncodedData << '\n';
+ << " input: " << InputSequence << '\n'
+ << " output: " << OutputSequence << '\n';
}
return false;
}
// Check integrity of LLVM assembler/disassembler.
if (opts::CheckEncoding && !BC.MIB->isBranch(Instruction) &&
!BC.MIB->isCall(Instruction) && !BC.MIB->isNoop(Instruction)) {
- if (!BC.validateEncoding(Instruction, FunctionData.slice(Offset, Size))) {
+ if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
errs() << "BOLT-WARNING: mismatching LLVM encoding detected in "
<< "function " << *this << " for instruction :\n";
BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
break;
}
- // Disassemble again without the symbolizer and check that the disassembly
- // matches the assembler output.
- MCInst TempInst;
- BC.DisAsm->getInstruction(TempInst, Size, FunctionData.slice(Offset),
- AbsoluteInstrAddr, nulls());
- if (!BC.validateEncoding(TempInst, FunctionData.slice(Offset, Size))) {
+ if (!BC.validateInstructionEncoding(FunctionData.slice(Offset, Size))) {
errs() << "BOLT-WARNING: internal assembler/disassembler error "
"detected for AVX512 instruction:\n";
- BC.printInstruction(errs(), TempInst, AbsoluteInstrAddr);
+ BC.printInstruction(errs(), Instruction, AbsoluteInstrAddr);
errs() << " in function " << *this << '\n';
setIgnored();
break;
--- /dev/null
+# REQUIRES: system-linux
+
+# RUN: llvm-mc -filetype=obj -triple x86_64-unknown-linux %s -o %t.o
+# RUN: ld.lld %t.o -o %t.exe -q
+# RUN: llvm-bolt %t.exe --relocs -o %t.out --check-encoding |& FileCheck %s
+
+ .text
+ .globl _start
+ .type _start, %function
+_start:
+ .cfi_startproc
+
+## Check that llvm-bolt uses non-symbolizing disassembler while validating
+## instruction encodings. If symbol "foo" below is symbolized, the encoded
+## instruction would have a different sequence of bytes from the input
+## sequence, as "foo" will not have any address assigned at that point.
+
+ movq foo(%rip), %rax
+# CHECK-NOT: mismatching LLVM encoding detected
+
+ ret
+ .cfi_endproc
+ .size _start, .-_start
+
+ .globl foo
+ .type foo, %function
+foo:
+ .cfi_startproc
+ ret
+ .cfi_endproc
+ .size foo, .-foo