--- /dev/null
+# REQUIRES: x86-registered-target
+# RUN: llvm-mc -filetype=obj -triple=x86_64 %s -o %t
+# RUN: llvm-profgen --binary=%t --perfscript=%s --output=%t1 -show-disassembly -x86-asm-syntax=intel | FileCheck %s --match-full-lines
+
+# CHECK: Disassembly of section .text [0x0, 0x66]:
+# CHECK: <foo1>:
+# CHECK: 0: push rbp
+# CHECK: 1: mov rbp, rsp
+# CHECK: 4: sub rsp, 16
+# CHECK: 8: mov dword ptr [rbp - 4], 0
+# CHECK: f: mov edi, 1
+# CHECK: 14: call 0x19
+# CHECK: 19: mov edi, 2
+# CHECK: 1e: mov dword ptr [rbp - 8], eax
+# CHECK: 21: call 0x26
+# CHECK: 26: mov ecx, dword ptr [rbp - 8]
+# CHECK: 29: add ecx, eax
+# CHECK: 2b: mov eax, ecx
+# CHECK: 2d: add rsp, 16
+# CHECK: 31: pop rbp
+# CHECK: 32: ret
+
+# CHECK: <foo2>:
+# CHECK: 33: push rbp
+# CHECK: 34: mov rbp, rsp
+# CHECK: 37: sub rsp, 16
+# CHECK: 3b: mov dword ptr [rbp - 4], 0
+# CHECK: 42: mov edi, 1
+# CHECK: 47: call 0x4c
+# CHECK: 4c: mov edi, 2
+# CHECK: 51: mov dword ptr [rbp - 8], eax
+# CHECK: 54: call 0x59
+# CHECK: 59: mov ecx, dword ptr [rbp - 8]
+# CHECK: 5c: add ecx, eax
+# CHECK: 5e: mov eax, ecx
+# CHECK: 60: add rsp, 16
+# CHECK: 64: pop rbp
+# CHECK: 65: ret
+
+
+
+.section .text
+foo1:
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $16, %rsp
+ movl $0, -4(%rbp)
+ movl $1, %edi
+ callq _Z5funcAi
+ movl $2, %edi
+ movl %eax, -8(%rbp)
+ callq _Z5funcBi
+ movl -8(%rbp), %ecx
+ addl %eax, %ecx
+ movl %ecx, %eax
+ addq $16, %rsp
+ popq %rbp
+ retq
+
+.section .text
+foo2:
+ pushq %rbp
+ movq %rsp, %rbp
+ subq $16, %rsp
+ movl $0, -4(%rbp)
+ movl $1, %edi
+ callq _Z5funcBi
+ movl $2, %edi
+ movl %eax, -8(%rbp)
+ callq _Z5funcAi
+ movl -8(%rbp), %ecx
+ addl %eax, %ecx
+ movl %ecx, %eax
+ addq $16, %rsp
+ popq %rbp
+ retq
+
+# CHECK: Disassembly of section .text.hot [0x0, 0x12]:
+# CHECK: <bar>:
+# CHECK: 0: push rbp
+# CHECK: 1: mov rbp, rsp
+# CHECK: 4: mov dword ptr [rbp - 4], edi
+# CHECK: 7: mov dword ptr [rbp - 8], esi
+# CHECK: a: mov eax, dword ptr [rbp - 4]
+# CHECK: d: add eax, dword ptr [rbp - 8]
+# CHECK: 10: pop rbp
+# CHECK: 11: ret
+
+.section .text.hot
+bar:
+ pushq %rbp
+ movq %rsp, %rbp
+ movl %edi, -4(%rbp)
+ movl %esi, -8(%rbp)
+ movl -4(%rbp), %eax
+ addl -8(%rbp), %eax
+ popq %rbp
+ retq
+
+
+# CHECK: Disassembly of section .text.unlikely [0x0, 0x12]:
+# CHECK: <baz>:
+# CHECK: 0: push rbp
+# CHECK: 1: mov rbp, rsp
+# CHECK: 4: mov dword ptr [rbp - 4], edi
+# CHECK: 7: mov dword ptr [rbp - 8], esi
+# CHECK: a: mov eax, dword ptr [rbp - 4]
+# CHECK: d: sub eax, dword ptr [rbp - 8]
+# CHECK: 10: pop rbp
+# CHECK: 11: ret
+
+.section .text.unlikely
+baz:
+ pushq %rbp
+ movq %rsp, %rbp
+ movl %edi, -4(%rbp)
+ movl %esi, -8(%rbp)
+ movl -4(%rbp), %eax
+ subl -8(%rbp), %eax
+ popq %rbp
+ retq
--- /dev/null
+//===-- ProfiledBinary.cpp - Binary decoder ---------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "ProfiledBinary.h"
+#include "ErrorHandling.h"
+#include "MCTargetDesc/X86MCTargetDesc.h"
+#include "llvm/ADT/Triple.h"
+#include "llvm/Demangle/Demangle.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/TargetRegistry.h"
+#include "llvm/Support/TargetSelect.h"
+#include "llvm/Support/X86TargetParser.h"
+
+#define DEBUG_TYPE "load-binary"
+
+using namespace llvm;
+
+static cl::opt<bool> ShowDisassembly("show-disassembly", cl::ReallyHidden,
+ cl::init(false), cl::ZeroOrMore,
+ cl::desc("Print disassembled code."));
+
+namespace llvm {
+namespace sampleprof {
+
+static const Target *getTarget(const ObjectFile *Obj) {
+ Triple TheTriple = Obj->makeTriple();
+ std::string Error;
+ std::string ArchName;
+ const Target *TheTarget =
+ TargetRegistry::lookupTarget(ArchName, TheTriple, Error);
+ if (!TheTarget)
+ exitWithError(Error, Obj->getFileName());
+ return TheTarget;
+}
+
+template <class ELFT>
+static uint64_t getELFImageLMAForSec(const ELFFile<ELFT> *Obj,
+ const object::ELFSectionRef &Sec,
+ StringRef FileName) {
+ // Search for a PT_LOAD segment containing the requested section. Return this
+ // segment's p_addr as the image load address for the section.
+ const auto &PhdrRange = unwrapOrError(Obj->program_headers(), FileName);
+ for (const typename ELFT::Phdr &Phdr : PhdrRange)
+ if ((Phdr.p_type == ELF::PT_LOAD) && (Phdr.p_vaddr <= Sec.getAddress()) &&
+ (Phdr.p_vaddr + Phdr.p_memsz > Sec.getAddress()))
+ // Segments will always be loaded at a page boundary.
+ return Phdr.p_paddr & ~(Phdr.p_align - 1U);
+ return 0;
+}
+
+// Get the image load address for a specific section. Note that an image is
+// loaded by segments (a group of sections) and segments may not be consecutive
+// in memory.
+static uint64_t getELFImageLMAForSec(const object::ELFSectionRef &Sec) {
+ if (const auto *ELFObj = dyn_cast<ELF32LEObjectFile>(Sec.getObject()))
+ return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
+ ELFObj->getFileName());
+ else if (const auto *ELFObj = dyn_cast<ELF32BEObjectFile>(Sec.getObject()))
+ return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
+ ELFObj->getFileName());
+ else if (const auto *ELFObj = dyn_cast<ELF64LEObjectFile>(Sec.getObject()))
+ return getELFImageLMAForSec(ELFObj->getELFFile(), Sec,
+ ELFObj->getFileName());
+ const auto *ELFObj = cast<ELF64BEObjectFile>(Sec.getObject());
+ return getELFImageLMAForSec(ELFObj->getELFFile(), Sec, ELFObj->getFileName());
+}
+
+void ProfiledBinary::load() {
+ // Attempt to open the binary.
+ OwningBinary<Binary> OBinary = unwrapOrError(createBinary(Path), Path);
+ Binary &Binary = *OBinary.getBinary();
+
+ auto *Obj = dyn_cast<ELFObjectFileBase>(&Binary);
+ if (!Obj)
+ exitWithError("not a valid Elf image", Path);
+
+ TheTriple = Obj->makeTriple();
+ // Current only support X86
+ if (!TheTriple.isX86())
+ exitWithError("unsupported target", TheTriple.getTriple());
+ LLVM_DEBUG(dbgs() << "Loading " << Path << "\n");
+
+ // Find the preferred base address for text sections.
+ setPreferredBaseAddress(Obj);
+
+ // Disassemble the text sections.
+ disassemble(Obj);
+
+ // TODO: decode other sections.
+
+ return;
+}
+
+void ProfiledBinary::setPreferredBaseAddress(const ELFObjectFileBase *Obj) {
+ for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
+ SI != SE; ++SI) {
+ const SectionRef &Section = *SI;
+ if (Section.isText()) {
+ PreferredBaseAddress = getELFImageLMAForSec(Section);
+ return;
+ }
+ }
+ exitWithError("no text section found", Obj->getFileName());
+}
+
+bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
+ SectionSymbolsTy &Symbols,
+ const SectionRef &Section) {
+
+ std::size_t SE = Symbols.size();
+ uint64_t SectionOffset = Section.getAddress() - PreferredBaseAddress;
+ uint64_t SectSize = Section.getSize();
+ uint64_t StartOffset = Symbols[SI].Addr - PreferredBaseAddress;
+ uint64_t EndOffset = (SI + 1 < SE)
+ ? Symbols[SI + 1].Addr - PreferredBaseAddress
+ : SectionOffset + SectSize;
+ if (StartOffset >= EndOffset)
+ return true;
+
+ std::string &&SymbolName = Symbols[SI].Name.str();
+ if (ShowDisassembly)
+ outs() << '<' << SymbolName << ">:\n";
+
+ uint64_t Offset = StartOffset;
+ while (Offset < EndOffset) {
+ MCInst Inst;
+ uint64_t Size;
+ // Disassemble an instruction.
+ if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Offset - SectionOffset),
+ Offset + PreferredBaseAddress, nulls()))
+ return false;
+
+ if (ShowDisassembly) {
+ outs() << format("%8" PRIx64 ":", Offset);
+ IP->printInst(&Inst, Offset + Size, "", *STI.get(), outs());
+ outs() << "\n";
+ }
+
+ const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
+
+ // Populate address maps.
+ CodeAddrs.push_back(Offset);
+ if (MCDesc.isCall())
+ CallAddrs.insert(Offset);
+ else if (MCDesc.isReturn())
+ RetAddrs.insert(Offset);
+
+ Offset += Size;
+ }
+
+ if (ShowDisassembly)
+ outs() << "\n";
+
+ FuncStartAddrMap[StartOffset] = Symbols[SI].Name.str();
+ return true;
+}
+
+void ProfiledBinary::setUpDisassembler(const ELFObjectFileBase *Obj) {
+ const Target *TheTarget = getTarget(Obj);
+ std::string TripleName = TheTriple.getTriple();
+ StringRef FileName = Obj->getFileName();
+
+ MRI.reset(TheTarget->createMCRegInfo(TripleName));
+ if (!MRI)
+ exitWithError("no register info for target " + TripleName, FileName);
+
+ MCTargetOptions MCOptions;
+ AsmInfo.reset(TheTarget->createMCAsmInfo(*MRI, TripleName, MCOptions));
+ if (!AsmInfo)
+ exitWithError("no assembly info for target " + TripleName, FileName);
+
+ SubtargetFeatures Features = Obj->getFeatures();
+ STI.reset(
+ TheTarget->createMCSubtargetInfo(TripleName, "", Features.getString()));
+ if (!STI)
+ exitWithError("no subtarget info for target " + TripleName, FileName);
+
+ MII.reset(TheTarget->createMCInstrInfo());
+ if (!MII)
+ exitWithError("no instruction info for target " + TripleName, FileName);
+
+ MCObjectFileInfo MOFI;
+ MCContext Ctx(AsmInfo.get(), MRI.get(), &MOFI);
+ MOFI.InitMCObjectFileInfo(Triple(TripleName), false, Ctx);
+ DisAsm.reset(TheTarget->createMCDisassembler(*STI, Ctx));
+ if (!DisAsm)
+ exitWithError("no disassembler for target " + TripleName, FileName);
+
+ MIA.reset(TheTarget->createMCInstrAnalysis(MII.get()));
+
+ int AsmPrinterVariant = AsmInfo->getAssemblerDialect();
+ IP.reset(TheTarget->createMCInstPrinter(Triple(TripleName), AsmPrinterVariant,
+ *AsmInfo, *MII, *MRI));
+ IP->setPrintBranchImmAsAddress(true);
+}
+
+void ProfiledBinary::disassemble(const ELFObjectFileBase *Obj) {
+ // Set up disassembler and related components.
+ setUpDisassembler(Obj);
+
+ // Create a mapping from virtual address to symbol name. The symbols in text
+ // sections are the candidates to dissassemble.
+ std::map<SectionRef, SectionSymbolsTy> AllSymbols;
+ StringRef FileName = Obj->getFileName();
+ for (const SymbolRef &Symbol : Obj->symbols()) {
+ const uint64_t Addr = unwrapOrError(Symbol.getAddress(), FileName);
+ const StringRef Name = unwrapOrError(Symbol.getName(), FileName);
+ section_iterator SecI = unwrapOrError(Symbol.getSection(), FileName);
+ if (SecI != Obj->section_end())
+ AllSymbols[*SecI].push_back(SymbolInfoTy(Addr, Name, ELF::STT_NOTYPE));
+ }
+
+ // Sort all the symbols. Use a stable sort to stabilize the output.
+ for (std::pair<const SectionRef, SectionSymbolsTy> &SecSyms : AllSymbols)
+ stable_sort(SecSyms.second);
+
+ if (ShowDisassembly)
+ outs() << "\nDisassembly of " << FileName << ":\n";
+
+ // Dissassemble a text section.
+ for (section_iterator SI = Obj->section_begin(), SE = Obj->section_end();
+ SI != SE; ++SI) {
+ const SectionRef &Section = *SI;
+ if (!Section.isText())
+ continue;
+
+ uint64_t ImageLoadAddr = PreferredBaseAddress;
+ uint64_t SectionOffset = Section.getAddress() - ImageLoadAddr;
+ uint64_t SectSize = Section.getSize();
+ if (!SectSize)
+ continue;
+
+ // Register the text section.
+ TextSections.insert({SectionOffset, SectSize});
+
+ if (ShowDisassembly) {
+ StringRef SectionName = unwrapOrError(Section.getName(), FileName);
+ outs() << "\nDisassembly of section " << SectionName;
+ outs() << " [" << format("0x%" PRIx64, SectionOffset) << ", "
+ << format("0x%" PRIx64, SectionOffset + SectSize) << "]:\n\n";
+ }
+
+ // Get the section data.
+ ArrayRef<uint8_t> Bytes =
+ arrayRefFromStringRef(unwrapOrError(Section.getContents(), FileName));
+
+ // Get the list of all the symbols in this section.
+ SectionSymbolsTy &Symbols = AllSymbols[Section];
+
+ // Disassemble symbol by symbol.
+ for (std::size_t SI = 0, SE = Symbols.size(); SI != SE; ++SI) {
+ if (!dissassembleSymbol(SI, Bytes, Symbols, Section))
+ exitWithError("disassembling error", FileName);
+ }
+ }
+}
+} // end namespace sampleprof
+} // end namespace llvm
#ifndef LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
#define LLVM_TOOLS_LLVM_PROFGEN_PROFILEDBINARY_H
#include "llvm/ADT/StringRef.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCContext.h"
+#include "llvm/MC/MCDisassembler/MCDisassembler.h"
+#include "llvm/MC/MCInst.h"
+#include "llvm/MC/MCInstPrinter.h"
+#include "llvm/MC/MCInstrAnalysis.h"
+#include "llvm/MC/MCInstrInfo.h"
+#include "llvm/MC/MCObjectFileInfo.h"
+#include "llvm/MC/MCRegisterInfo.h"
+#include "llvm/MC/MCSubtargetInfo.h"
+#include "llvm/MC/MCTargetOptions.h"
+#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Support/Path.h"
+#include <set>
+#include <string>
+#include <unordered_map>
+#include <unordered_set>
+#include <vector>
+
+using namespace llvm::object;
namespace llvm {
namespace sampleprof {
class ProfiledBinary {
+ // Absolute path of the binary.
std::string Path;
+ // The target triple.
+ Triple TheTriple;
+ // The runtime base address that the executable sections are loaded at.
mutable uint64_t BaseAddress = 0;
+ // The preferred base address that the executable sections are loaded at.
+ uint64_t PreferredBaseAddress = 0;
+ // Mutiple MC component info
+ std::unique_ptr<const MCRegisterInfo> MRI;
+ std::unique_ptr<const MCAsmInfo> AsmInfo;
+ std::unique_ptr<const MCSubtargetInfo> STI;
+ std::unique_ptr<const MCInstrInfo> MII;
+ std::unique_ptr<MCDisassembler> DisAsm;
+ std::unique_ptr<const MCInstrAnalysis> MIA;
+ std::unique_ptr<MCInstPrinter> IP;
+ // A list of text sections sorted by start RVA and size. Used to check
+ // if a given RVA is a valid code address.
+ std::set<std::pair<uint64_t, uint64_t>> TextSections;
+ // Function offset to name mapping.
+ std::unordered_map<uint64_t, std::string> FuncStartAddrMap;
+ // An array of offsets of all instructions sorted in increasing order. The
+ // sorting is needed to fast advance to the next forward/backward instruction.
+ std::vector<uint64_t> CodeAddrs;
+ // A set of call instruction offsets. Used by virtual unwinding.
+ std::unordered_set<uint64_t> CallAddrs;
+ // A set of return instruction offsets. Used by virtual unwinding.
+ std::unordered_set<uint64_t> RetAddrs;
+
+ void setPreferredBaseAddress(const ELFObjectFileBase *O);
+
+ // Set up disassembler and related components.
+ void setUpDisassembler(const ELFObjectFileBase *Obj);
+
+ /// Dissassemble the text section and build various address maps.
+ void disassemble(const ELFObjectFileBase *O);
+
+ /// Helper function to dissassemble the symbol and extract info for unwinding
+ bool dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
+ SectionSymbolsTy &Symbols, const SectionRef &Section);
+
+ /// Decode the interesting parts of the binary and build internal data
+ /// structures. On high level, the parts of interest are:
+ /// 1. Text sections, including the main code section and the PLT
+ /// entries that will be used to handle cross-module call transitions.
+ /// 2. The .debug_line section, used by Dwarf-based profile generation.
+ /// 3. Pseudo probe related sections, used by probe-based profile
+ /// generation.
+ void load();
public:
ProfiledBinary(StringRef Path) : Path(Path) { load(); }
const StringRef getName() const { return llvm::sys::path::filename(Path); }
uint64_t getBaseAddress() const { return BaseAddress; }
void setBaseAddress(uint64_t Address) { BaseAddress = Address; }
-
-private:
- void load() {
- // TODO:
- }
};
} // end namespace sampleprof