* one operand with symbolic information. To determine the symbolic operand
* information for each operand, the bytes for the specific operand in the
* instruction are specified by the Offset parameter and its byte widith is the
- * size parameter. For instructions sets with fixed widths and one symbolic
- * operand per instruction, the Offset parameter will be zero and Size parameter
- * will be the instruction width. The information is returned in TagBuf and is
- * Triple specific with its specific information defined by the value of
- * TagType for that Triple. If symbolic information is returned the function
- * returns 1, otherwise it returns 0.
+ * OpSize parameter. For instructions sets with fixed widths and one symbolic
+ * operand per instruction, the Offset parameter will be zero and InstSize
+ * parameter will be the instruction width. The information is returned in
+ * TagBuf and is Triple specific with its specific information defined by the
+ * value of TagType for that Triple. If symbolic information is returned the
+ * function * returns 1, otherwise it returns 0.
*/
-typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC,
- uint64_t Offset, uint64_t Size,
+typedef int (*LLVMOpInfoCallback)(void *DisInfo, uint64_t PC, uint64_t Offset,
+ uint64_t OpSize, uint64_t InstSize,
int TagType, void *TagBuf);
/**
public:
// Helpers around MCSymbolizer
- bool tryAddingSymbolicOperand(MCInst &Inst,
- int64_t Value,
- uint64_t Address, bool IsBranch,
- uint64_t Offset, uint64_t InstSize) const;
+ bool tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, uint64_t Address,
+ bool IsBranch, uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) const;
void tryAddingPcLoadReferenceComment(int64_t Value, uint64_t Address) const;
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
int64_t Value, uint64_t Address, bool IsBranch,
- uint64_t Offset, uint64_t InstSize) override;
+ uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) override;
void tryAddingPcLoadReferenceComment(raw_ostream &CommentStream,
int64_t Value,
uint64_t Address) override;
/// \param Address - Load address of the instruction.
/// \param IsBranch - Is the instruction a branch?
/// \param Offset - Byte offset of the operand inside the inst.
+ /// \param OpSize - Size of the operand in bytes.
/// \param InstSize - Size of the instruction in bytes.
/// \return Whether a symbolic operand was added.
virtual bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
int64_t Value, uint64_t Address,
bool IsBranch, uint64_t Offset,
- uint64_t InstSize) = 0;
+ uint64_t OpSize, uint64_t InstSize) = 0;
/// Try to add a comment on the PC-relative load.
/// For instance, in Mach-O, this is used to add annotations to instructions
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
uint64_t Address, bool IsBranch,
- uint64_t Offset,
+ uint64_t Offset, uint64_t OpSize,
uint64_t InstSize) const {
if (Symbolizer)
- return Symbolizer->tryAddingSymbolicOperand(
- Inst, *CommentStream, Value, Address, IsBranch, Offset, InstSize);
+ return Symbolizer->tryAddingSymbolicOperand(Inst, *CommentStream, Value,
+ Address, IsBranch, Offset,
+ OpSize, InstSize);
return false;
}
// is found an MCExpr is created with that, else an MCExpr with Value is
// created. This function returns true if it adds an operand to the MCInst and
// false otherwise.
-bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI,
- raw_ostream &cStream,
- int64_t Value,
- uint64_t Address,
- bool IsBranch,
- uint64_t Offset,
- uint64_t InstSize) {
+bool MCExternalSymbolizer::tryAddingSymbolicOperand(
+ MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address,
+ bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
struct LLVMOpInfo1 SymbolicOp;
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
SymbolicOp.Value = Value;
if (!GetOpInfo ||
- !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) {
+ !GetOpInfo(DisInfo, Address, Offset, OpSize, InstSize, 1, &SymbolicOp)) {
// Clear SymbolicOp.Value from above and also all other fields.
std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1));
// that always makes sense to guess. But in the case of an immediate it is
// a bit more questionable if it is an address of a symbol or some other
// reference. So if the immediate Value comes from a width of 1 byte,
- // InstSize, we will not guess it is an address of a symbol. Because in
+ // OpSize, we will not guess it is an address of a symbol. Because in
// object files assembled starting at address 0 this usually leads to
// incorrect symbolication.
- if (!SymbolLookUp || (InstSize == 1 && !IsBranch))
+ if (!SymbolLookUp || (OpSize == 1 && !IsBranch))
return false;
uint64_t ReferenceType;
ImmVal |= ~((1LL << 19) - 1);
if (!Decoder->tryAddingSymbolicOperand(
- Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 4))
+ Inst, ImmVal * 4, Addr, Inst.getOpcode() != AArch64::LDRXl, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(ImmVal));
return Success;
}
}
DecodeGPR64spRegisterClass(Inst, Rn, Addr, Decoder);
- if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, offset, Addr, Fail, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(offset));
return Success;
}
imm |= ~((1LL << 21) - 1);
DecodeGPR64RegisterClass(Inst, Rd, Addr, Decoder);
- if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, imm, Addr, Fail, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(imm));
return Success;
DecodeGPR32spRegisterClass(Inst, Rn, Addr, Decoder);
}
- if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, Imm, Addr, Fail, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(ImmVal));
Inst.addOperand(MCOperand::createImm(12 * ShifterVal));
return Success;
if (imm & (1 << (26 - 1)))
imm |= ~((1LL << 26) - 1);
- if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, imm * 4, Addr, true, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(imm));
return Success;
else
DecodeGPR64RegisterClass(Inst, Rt, Addr, Decoder);
Inst.addOperand(MCOperand::createImm(bit));
- if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 4))
+ if (!Decoder->tryAddingSymbolicOperand(Inst, dst * 4, Addr, true, 0, 0, 4))
Inst.addOperand(MCOperand::createImm(dst));
return Success;
/// an operand to the MCInst and Fail otherwise.
bool AArch64ExternalSymbolizer::tryAddingSymbolicOperand(
MCInst &MI, raw_ostream &CommentStream, int64_t Value, uint64_t Address,
- bool IsBranch, uint64_t Offset, uint64_t InstSize) {
+ bool IsBranch, uint64_t Offset, uint64_t OpSize, uint64_t InstSize) {
if (!SymbolLookUp)
return false;
// FIXME: This method shares a lot of code with
SymbolicOp.Value = Value;
uint64_t ReferenceType;
const char *ReferenceName;
- if (!GetOpInfo ||
- !GetOpInfo(DisInfo, Address, 0 /* Offset */, InstSize, 1, &SymbolicOp)) {
+ if (!GetOpInfo || !GetOpInfo(DisInfo, Address, /*Offset=*/0, OpSize, InstSize,
+ 1, &SymbolicOp)) {
if (IsBranch) {
ReferenceType = LLVMDisassembler_ReferenceType_In_Branch;
const char *Name = SymbolLookUp(DisInfo, Address + Value, &ReferenceType,
bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &CommentStream,
int64_t Value, uint64_t Address, bool IsBranch,
- uint64_t Offset, uint64_t InstSize) override;
+ uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) override;
};
} // namespace llvm
APInt SignedOffset(18, Imm * 4, true);
int64_t Offset = (SignedOffset.sext(64) + 4 + Addr).getSExtValue();
- if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2))
+ if (DAsm->tryAddingSymbolicOperand(Inst, Offset, Addr, true, 2, 2, 0))
return MCDisassembler::Success;
return addOperand(Inst, MCOperand::createImm(Imm));
}
//===----------------------------------------------------------------------===//
// Try to find symbol name for specified label
-bool AMDGPUSymbolizer::tryAddingSymbolicOperand(MCInst &Inst,
- raw_ostream &/*cStream*/, int64_t Value,
- uint64_t /*Address*/, bool IsBranch,
- uint64_t /*Offset*/, uint64_t /*InstSize*/) {
+bool AMDGPUSymbolizer::tryAddingSymbolicOperand(
+ MCInst &Inst, raw_ostream & /*cStream*/, int64_t Value,
+ uint64_t /*Address*/, bool IsBranch, uint64_t /*Offset*/,
+ uint64_t /*OpSize*/, uint64_t /*InstSize*/) {
if (!IsBranch) {
return false;
: MCSymbolizer(Ctx, std::move(RelInfo)), DisInfo(disInfo) {}
bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &cStream,
- int64_t Value, uint64_t Address,
- bool IsBranch, uint64_t Offset,
+ int64_t Value, uint64_t Address, bool IsBranch,
+ uint64_t Offset, uint64_t OpSize,
uint64_t InstSize) override;
void tryAddingPcLoadReferenceComment(raw_ostream &cStream,
const MCDisassembler *Decoder) {
static const uint64_t AtLeast = 2;
return (nullptr != Decoder && Decoder->tryAddingSymbolicOperand(
- Inst, Value, Address, true, 0, AtLeast));
+ Inst, Value, Address, true, 0, AtLeast, 0));
}
static void DecodeSymbolicOperandOff(MCInst &Inst, uint64_t Address,
const MCDisassembler *Decoder) {
// FIXME: Does it make sense for value to be negative?
return Decoder->tryAddingSymbolicOperand(MI, (uint32_t)Value, Address,
- isBranch, /* Offset */ 0, InstSize);
+ isBranch, /*Offset=*/0, /*OpSize=*/0,
+ InstSize);
}
/// tryAddingPcLoadReferenceComment - trys to add a comment as to what is being
Bits = 15;
uint64_t FullValue = fullValue(Disassembler, MI, SignExtend64(tmp, Bits));
uint32_t Extended = FullValue + Address;
- if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 4))
+ if (!Disassembler.tryAddingSymbolicOperand(MI, Extended, Address, true, 0, 0,
+ 4))
HexagonMCInstrInfo::addConstant(MI, Extended, Disassembler.getContext());
return MCDisassembler::Success;
}
uint64_t Width, MCInst &MI,
const MCDisassembler *Decoder) {
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, IsBranch, Offset,
- Width);
+ Width, /*InstSize=*/0);
}
static DecodeStatus decodeBranch(MCInst &MI, unsigned Insn, uint64_t Address,
uint64_t Width, MCInst &MI,
const MCDisassembler *Decoder) {
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
- Width);
+ Width, /*InstSize=*/4);
}
static DecodeStatus DecodeCall(MCInst &MI, unsigned insn, uint64_t Address,
uint64_t Width, MCInst &MI,
const MCDisassembler *Decoder) {
return Decoder->tryAddingSymbolicOperand(MI, Value, Address, isBranch, Offset,
- Width);
+ Width, /*InstSize=*/0);
}
static DecodeStatus decodeRegisterClass(MCInst &Inst, uint64_t RegNo,
uint64_t pcrel = 0;
if (type == TYPE_REL) {
isBranch = true;
- pcrel = insn.startLocation +
- insn.immediateOffset + insn.immediateSize;
+ pcrel = insn.startLocation + insn.length;
switch (operand.encoding) {
default:
break;
break;
}
- if (!Dis->tryAddingSymbolicOperand(mcInst, immediate + pcrel,
- insn.startLocation, isBranch,
- insn.immediateOffset, insn.immediateSize))
+ if (!Dis->tryAddingSymbolicOperand(
+ mcInst, immediate + pcrel, insn.startLocation, isBranch,
+ insn.immediateOffset, insn.immediateSize, insn.length))
mcInst.addOperand(MCOperand::createImm(immediate));
if (type == TYPE_MOFFS) {
return true;
}
if (insn.mode == MODE_64BIT){
- pcrel = insn.startLocation +
- insn.displacementOffset + insn.displacementSize;
+ pcrel = insn.startLocation + insn.length;
Dis->tryAddingPcLoadReferenceComment(insn.displacement + pcrel,
insn.startLocation +
insn.displacementOffset);
mcInst.addOperand(baseReg);
mcInst.addOperand(scaleAmount);
mcInst.addOperand(indexReg);
+
+ const uint8_t dispSize =
+ (insn.eaDisplacement == EA_DISP_NONE) ? 0 : insn.displacementSize;
+
if (!Dis->tryAddingSymbolicOperand(
mcInst, insn.displacement + pcrel, insn.startLocation, false,
- insn.displacementOffset, insn.displacementSize))
+ insn.displacementOffset, dispSize, insn.length))
mcInst.addOperand(displacement);
mcInst.addOperand(segmentReg);
return false;
// value of TagType is currently 1 (for the LLVMOpInfo1 struct). If symbolic
// information is returned then this function returns 1 else it returns 0.
static int SymbolizerGetOpInfo(void *DisInfo, uint64_t Pc, uint64_t Offset,
- uint64_t Size, int TagType, void *TagBuf) {
+ uint64_t OpSize, uint64_t InstSize, int TagType,
+ void *TagBuf) {
struct DisassembleInfo *info = (struct DisassembleInfo *)DisInfo;
struct LLVMOpInfo1 *op_info = (struct LLVMOpInfo1 *)TagBuf;
uint64_t value = op_info->Value;
unsigned int Arch = info->O->getArch();
if (Arch == Triple::x86) {
- if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+ if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
return 0;
}
if (Arch == Triple::x86_64) {
- if (Size != 1 && Size != 2 && Size != 4 && Size != 0)
+ if (OpSize != 1 && OpSize != 2 && OpSize != 4 && OpSize != 0)
return 0;
// For non MH_OBJECT types, like MH_KEXT_BUNDLE, Search the external
// relocation entries of a linked image (if any) for an entry that matches
// adds the Pc. But for x86_64 external relocation entries the Value
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
- op_info->Value -= Pc + Offset + Size;
+ op_info->Value -= Pc + InstSize;
const char *name =
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
op_info->AddSymbol.Present = 1;
// adds the Pc. But for x86_64 external relocation entries the Value
// is the offset from the external symbol.
if (info->O->getAnyRelocationPCRel(RE))
- op_info->Value -= Pc + Offset + Size;
+ op_info->Value -= Pc + InstSize;
const char *name =
unwrapOrError(Symbol.getName(), info->O->getFileName()).data();
unsigned Type = info->O->getAnyRelocationType(RE);
return 0;
}
if (Arch == Triple::arm) {
- if (Offset != 0 || (Size != 4 && Size != 2))
+ if (Offset != 0 || (InstSize != 4 && InstSize != 2))
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
return 1;
}
if (Arch == Triple::aarch64) {
- if (Offset != 0 || Size != 4)
+ if (Offset != 0 || InstSize != 4)
return 0;
if (info->O->getHeader().filetype != MachO::MH_OBJECT) {
// TODO:
--- /dev/null
+set(LLVM_LINK_COMPONENTS
+ MC
+ MCDisassembler
+ Target
+ X86Desc
+ X86Disassembler
+ X86Info
+ )
+
+add_llvm_unittest(X86MCTests
+ X86MCDisassemblerTest.cpp
+ )
--- /dev/null
+//===- X86MCDisassemblerTest.cpp - Tests for X86 MCDisassembler -----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCDisassembler/MCSymbolizer.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/MC/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+namespace {
+
+struct Context {
+ const char *TripleName = "x86_64-unknown-elf";
+ std::unique_ptr<MCRegisterInfo> MRI;
+ std::unique_ptr<MCAsmInfo> MAI;
+ std::unique_ptr<MCContext> Ctx;
+ std::unique_ptr<MCSubtargetInfo> STI;
+ std::unique_ptr<MCDisassembler> DisAsm;
+
+ Context() {
+ LLVMInitializeX86TargetInfo();
+ LLVMInitializeX86TargetMC();
+ LLVMInitializeX86Disassembler();
+
+ // If we didn't build x86, do not run the test.
+ std::string Error;
+ const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
+ if (!TheTarget)
+ return;
+
+ MRI.reset(TheTarget->createMCRegInfo(TripleName));
+ MAI.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCTargetOptions()));
+ STI.reset(TheTarget->createMCSubtargetInfo(TripleName, "", ""));
+ Ctx = std::make_unique<MCContext>(Triple(TripleName), MAI.get(), MRI.get(),
+ STI.get());
+
+ DisAsm.reset(TheTarget->createMCDisassembler(*STI, *Ctx));
+ }
+
+ operator MCContext &() { return *Ctx; };
+};
+
+Context &getContext() {
+ static Context Ctxt;
+ return Ctxt;
+}
+
+class X86MCSymbolizerTest : public MCSymbolizer {
+public:
+ X86MCSymbolizerTest(MCContext &MC) : MCSymbolizer(MC, nullptr) {}
+ ~X86MCSymbolizerTest() {}
+
+ struct OpInfo {
+ int64_t Value = 0;
+ uint64_t Offset = 0;
+ uint64_t Size;
+ };
+ std::vector<OpInfo> Operands;
+ uint64_t InstructionSize = 0;
+
+ void reset() {
+ Operands.clear();
+ InstructionSize = 0;
+ }
+
+ bool tryAddingSymbolicOperand(MCInst &Inst, raw_ostream &CStream,
+ int64_t Value, uint64_t Address, bool IsBranch,
+ uint64_t Offset, uint64_t OpSize,
+ uint64_t InstSize) override {
+ Operands.push_back({Value, Offset, OpSize});
+ InstructionSize = InstSize;
+ return false;
+ }
+
+ void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value,
+ uint64_t Address) override {}
+};
+
+} // namespace
+
+TEST(X86Disassembler, X86MCSymbolizerTest) {
+ X86MCSymbolizerTest *TestSymbolizer = new X86MCSymbolizerTest(getContext());
+ getContext().DisAsm->setSymbolizer(
+ std::unique_ptr<MCSymbolizer>(TestSymbolizer));
+
+ MCDisassembler::DecodeStatus Status;
+ MCInst Inst;
+ uint64_t InstSize;
+
+ auto checkBytes = [&](ArrayRef<uint8_t> Bytes) {
+ TestSymbolizer->reset();
+ Status =
+ getContext().DisAsm->getInstruction(Inst, InstSize, Bytes, 0, nulls());
+ ASSERT_TRUE(Status == MCDisassembler::Success);
+ EXPECT_EQ(TestSymbolizer->InstructionSize, InstSize);
+ };
+
+ auto checkOperand = [&](size_t OpNo, int64_t Value, uint64_t Offset,
+ uint64_t Size) {
+ ASSERT_TRUE(TestSymbolizer->Operands.size() > OpNo);
+ EXPECT_EQ(TestSymbolizer->Operands[OpNo].Value, Value);
+ EXPECT_EQ(TestSymbolizer->Operands[OpNo].Offset, Offset);
+ EXPECT_EQ(TestSymbolizer->Operands[OpNo].Size, Size);
+ };
+
+ // movq $0x80000, 0x80000
+ checkBytes(
+ {0x48, 0xc7, 0x04, 0x25, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x08, 0x00});
+ checkOperand(0, 0x80000, 4, 4);
+ checkOperand(1, 0x80000, 8, 4);
+
+ // movq $0x2a, 0x123(%rax,%r14,8)
+ checkBytes(
+ {0x4a, 0xc7, 0x84, 0xf0, 0x23, 0x01, 0x00, 0x00, 0x2a, 0x00, 0x00, 0x00});
+ checkOperand(0, 291, 4, 4);
+ checkOperand(1, 42, 8, 4);
+
+ // movq $0xffffffffffffefe8, -0x1(%rip)
+ // Test that the value of the rip-relative operand is set correctly.
+ // The instruction address is 0 and the size is 12 bytes.
+ checkBytes(
+ {0x48, 0xc7, 0x05, 0xff, 0xff, 0xff, 0xff, 0xe8, 0xef, 0xff, 0xff});
+ checkOperand(0, /*next instr address*/ 11 - /*disp*/ 1, 3, 4);
+ checkOperand(1, 0xffffffffffffefe8, 7, 4);
+
+ // movq $0xfffffffffffffef5, (%r12)
+ // Test that the displacement operand has a size of 0, since it is not
+ // explicitly specified in the instruction.
+ checkBytes({0x49, 0xc7, 0x04, 0x24, 0xf5, 0xfe, 0xff, 0xff});
+ checkOperand(0, 0, 4, 0);
+ checkOperand(1, 0xfffffffffffffef5, 4, 4);
+}