ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const = 0;
- /// May parse any prelude that precedes instructions after the start of a
- /// symbol. Needed for some targets, e.g. WebAssembly.
+ /// Used to perform separate target specific disassembly for a particular
+ /// symbol. May parse any prelude that precedes instructions after the
+ /// start of a symbol, or the entire symbol.
+ /// This is used for example by WebAssembly to decode preludes.
+ ///
+ /// Base implementation returns None. So all targets by default ignore to
+ /// treat symbols separately.
///
/// \param Name - The name of the symbol.
/// \param Size - The number of bytes consumed.
/// byte of the symbol.
/// \param Bytes - A reference to the actual bytes at the symbol location.
/// \param CStream - The stream to print comments and annotations on.
- /// \return - MCDisassembler::Success if the bytes are valid,
- /// MCDisassembler::Fail if the bytes were invalid.
- virtual DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const;
+ /// \return - MCDisassembler::Success if bytes are decoded
+ /// successfully. Size must hold the number of bytes that
+ /// were decoded.
+ /// - MCDisassembler::Fail if the bytes are invalid. Size
+ /// must hold the number of bytes that were decoded before
+ /// failing. The target must print nothing. This can be
+ /// done by buffering the output if needed.
+ /// - None if the target doesn't want to handle the symbol
+ /// separately. Value of Size is ignored in this case.
+ virtual Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const;
+ // TODO:
+ // Implement similar hooks that can be used at other points during
+ // disassembly. Something along the following lines:
+ // - onBeforeInstructionDecode()
+ // - onAfterInstructionDecode()
+ // - onSymbolEnd()
+ // It should help move much of the target specific code from llvm-objdump to
+ // respective target disassemblers.
private:
MCContext &Ctx;
MCDisassembler::~MCDisassembler() = default;
-MCDisassembler::DecodeStatus
+Optional<MCDisassembler::DecodeStatus>
MCDisassembler::onSymbolStart(StringRef Name, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const {
- Size = 0;
- return MCDisassembler::Success;
+ return None;
}
bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value,
DecodeStatus getInstruction(MCInst &Instr, uint64_t &Size,
ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const override;
- DecodeStatus onSymbolStart(StringRef Name, uint64_t &Size,
- ArrayRef<uint8_t> Bytes, uint64_t Address,
- raw_ostream &CStream) const override;
+ Optional<DecodeStatus> onSymbolStart(StringRef Name, uint64_t &Size,
+ ArrayRef<uint8_t> Bytes,
+ uint64_t Address,
+ raw_ostream &CStream) const override;
public:
WebAssemblyDisassembler(const MCSubtargetInfo &STI, MCContext &Ctx,
return true;
}
-MCDisassembler::DecodeStatus WebAssemblyDisassembler::onSymbolStart(
+Optional<MCDisassembler::DecodeStatus> WebAssemblyDisassembler::onSymbolStart(
StringRef Name, uint64_t &Size, ArrayRef<uint8_t> Bytes, uint64_t Address,
raw_ostream &CStream) const {
Size = 0;
// Start of a code section: we're parsing only the function count.
int64_t FunctionCount;
if (!nextLEB(FunctionCount, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
outs() << " # " << FunctionCount << " functions in section.";
} else {
// Parse the start of a single function.
int64_t BodySize, LocalEntryCount;
if (!nextLEB(BodySize, Bytes, Size, false) ||
!nextLEB(LocalEntryCount, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
if (LocalEntryCount) {
outs() << " .local ";
for (int64_t I = 0; I < LocalEntryCount; I++) {
int64_t Count, Type;
if (!nextLEB(Count, Bytes, Size, false) ||
!nextLEB(Type, Bytes, Size, false))
- return MCDisassembler::Fail;
+ return None;
for (int64_t J = 0; J < Count; J++) {
if (I || J)
outs() << ", ";
continue;
}
- // Some targets (like WebAssembly) have a special prelude at the start
- // of each symbol.
- DisAsm->onSymbolStart(SymbolName, Size, Bytes.slice(Start, End - Start),
- SectionAddr + Start, CommentStream);
+ auto Status = DisAsm->onSymbolStart(SymbolName, Size,
+ Bytes.slice(Start, End - Start),
+ SectionAddr + Start, CommentStream);
+ // To have round trippable disassembly, we fall back to decoding the
+ // remaining bytes as instructions.
+ //
+ // If there is a failure, we disassemble the failed region as bytes before
+ // falling back. The target is expected to print nothing in this case.
+ //
+ // If there is Success or SoftFail i.e no 'real' failure, we go ahead by
+ // Size bytes before falling back.
+ // So if the entire symbol is 'eaten' by the target:
+ // Start += Size // Now Start = End and we will never decode as
+ // // instructions
+ //
+ // Right now, most targets return None i.e ignore to treat a symbol
+ // separately. But WebAssembly decodes preludes for some symbols.
+ //
+ if (Status.hasValue()) {
+ if (Status.getValue() == MCDisassembler::Fail) {
+ outs() << "// Error in decoding " << SymbolName
+ << " : Decoding failed region as bytes.\n";
+ for (uint64_t I = 0; I < Size; ++I) {
+ outs() << "\t.byte\t " << format_hex(Bytes[I], 1, /*Upper=*/true)
+ << "\n";
+ }
+ }
+ } else {
+ Size = 0;
+ }
+
Start += Size;
Index = Start;