From 382a6d756249795a8cd114624131b42fa4727a1e Mon Sep 17 00:00:00 2001 From: Ahmed Bougacha Date: Wed, 21 Aug 2013 07:28:07 +0000 Subject: [PATCH] MC: Refactor ObjectSymbolizer to make relocation/section info generation lazy. llvm-svn: 188878 --- llvm/include/llvm/MC/MCObjectSymbolizer.h | 27 ++-- llvm/lib/MC/MCObjectSymbolizer.cpp | 222 ++++++++++++++++-------------- 2 files changed, 136 insertions(+), 113 deletions(-) diff --git a/llvm/include/llvm/MC/MCObjectSymbolizer.h b/llvm/include/llvm/MC/MCObjectSymbolizer.h index 555cf51..7d70dfd 100644 --- a/llvm/include/llvm/MC/MCObjectSymbolizer.h +++ b/llvm/include/llvm/MC/MCObjectSymbolizer.h @@ -32,22 +32,14 @@ class MCObjectSymbolizer : public MCSymbolizer { protected: const object::ObjectFile *Obj; - typedef DenseMap AddrToRelocMap; - typedef std::vector SortedSectionList; - SortedSectionList SortedSections; - // Map a load address to the first relocation that applies there. As far as I // know, if there are several relocations at the exact same address, they are // related and the others can be determined from the first that was found in // the relocation table. For instance, on x86-64 mach-o, a SUBTRACTOR // relocation (referencing the minuend symbol) is followed by an UNSIGNED // relocation (referencing the subtrahend symbol). - AddrToRelocMap AddrToReloc; - - // Helpers around SortedSections. - SortedSectionList::const_iterator findSectionContaining(uint64_t Addr) const; - void insertSection(object::SectionRef Section); - + const object::RelocationRef *findRelocationAt(uint64_t Addr); + const object::SectionRef *findSectionContaining(uint64_t Addr); MCObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, const object::ObjectFile *Obj); @@ -56,9 +48,9 @@ public: /// \name Overridden MCSymbolizer methods: /// @{ bool tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, - int64_t Value, - uint64_t Address, bool IsBranch, - uint64_t Offset, uint64_t InstSize); + int64_t Value, uint64_t Address, + bool IsBranch, uint64_t Offset, + uint64_t InstSize); void tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address); @@ -68,6 +60,15 @@ public: static MCObjectSymbolizer * createObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, const object::ObjectFile *Obj); + +private: + typedef DenseMap AddrToRelocMap; + typedef std::vector SortedSectionList; + SortedSectionList SortedSections; + AddrToRelocMap AddrToReloc; + + void buildSectionList(); + void buildRelocationByAddrMap(); }; } diff --git a/llvm/lib/MC/MCObjectSymbolizer.cpp b/llvm/lib/MC/MCObjectSymbolizer.cpp index 740e8b3..193342b 100644 --- a/llvm/lib/MC/MCObjectSymbolizer.cpp +++ b/llvm/lib/MC/MCObjectSymbolizer.cpp @@ -28,98 +28,51 @@ namespace { class MCMachObjectSymbolizer : public MCObjectSymbolizer { public: MCMachObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, - const object::MachOObjectFile *MachOOF) - : MCObjectSymbolizer(Ctx, RelInfo, MachOOF) - {} + const MachOObjectFile *MOOF) {} void tryAddingPcLoadReferenceComment(raw_ostream &cStream, - int64_t Value, uint64_t Address) { - AddrToRelocMap::iterator RI = AddrToReloc.find(Address); - if (RI != AddrToReloc.end()) { - const MCExpr *RelExpr = RelInfo->createExprForRelocation(RI->second); - if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false) - return; - } - uint64_t Addr = Value; - SortedSectionList::const_iterator SI = findSectionContaining(Addr); - if (SI != SortedSections.end()) { - const SectionRef &S = *SI; - StringRef Name; S.getName(Name); - uint64_t SAddr; S.getAddress(SAddr); - if (Name == "__cstring") { - StringRef Contents; - S.getContents(Contents); - Contents = Contents.substr(Addr - SAddr); - cStream << " ## literal pool for: " - << Contents.substr(0, Contents.find_first_of(0)); - } - } - } + int64_t Value, + uint64_t Address) LLVM_OVERRIDE; }; } // End unnamed namespace + +void MCMachObjectSymbolizer:: +tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) { + if (const RelocationRef *R = findRelocationAt(Address)) { + const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R); + if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false) + return; + } + uint64_t Addr = Value; + if (const SectionRef *S = findSectionContaining(Addr)) { + StringRef Name; S->getName(Name); + uint64_t SAddr; S->getAddress(SAddr); + if (Name == "__cstring") { + StringRef Contents; + S->getContents(Contents); + Contents = Contents.substr(Addr - SAddr); + cStream << " ## literal pool for: " + << Contents.substr(0, Contents.find_first_of(0)); + } + } +} + //===- MCObjectSymbolizer -------------------------------------------------===// MCObjectSymbolizer::MCObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, const ObjectFile *Obj) : MCSymbolizer(Ctx, RelInfo), Obj(Obj), SortedSections(), AddrToReloc() { - error_code ec; - for (section_iterator SI = Obj->begin_sections(), - SE = Obj->end_sections(); - SI != SE; - SI.increment(ec)) { - if (ec) break; - - section_iterator RelSecI = SI->getRelocatedSection(); - if (RelSecI == Obj->end_sections()) - continue; - - uint64_t StartAddr; RelSecI->getAddress(StartAddr); - uint64_t Size; RelSecI->getSize(Size); - bool RequiredForExec; RelSecI->isRequiredForExecution(RequiredForExec); - if (RequiredForExec == false || Size == 0) - continue; - insertSection(*SI); - for (relocation_iterator RI = SI->begin_relocations(), - RE = SI->end_relocations(); - RI != RE; - RI.increment(ec)) { - if (ec) break; - // FIXME: libObject is inconsistent regarding error handling. The - // overwhelming majority of methods always return object_error::success, - // and assert for simple errors.. Here, ELFObjectFile::getRelocationOffset - // asserts when the file type isn't ET_REL. - // This workaround handles x86-64 elf, the only one that has a relocinfo. - uint64_t Offset; - if (Obj->isELF()) { - const ELF64LEObjectFile *ELFObj = dyn_cast(Obj); - if (ELFObj == 0) - break; - if (ELFObj->getELFFile()->getHeader()->e_type == ELF::ET_REL) { - RI->getOffset(Offset); - Offset += StartAddr; - } else { - RI->getAddress(Offset); - } - } else { - RI->getOffset(Offset); - Offset += StartAddr; - } - // At a specific address, only keep the first relocation. - if (AddrToReloc.find(Offset) == AddrToReloc.end()) - AddrToReloc[Offset] = *RI; - } - } } bool MCObjectSymbolizer:: tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, int64_t Value, uint64_t Address, bool IsBranch, uint64_t Offset, uint64_t InstSize) { - AddrToRelocMap::iterator RI = AddrToReloc.find(Address + Offset); - if (RI != AddrToReloc.end()) { - if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(RI->second)) { + if (const RelocationRef *R = findRelocationAt(Address + Offset)) { + if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) { MI.addOperand(MCOperand::CreateExpr(RelExpr)); return true; } @@ -133,10 +86,8 @@ tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, uint64_t UValue = Value; // FIXME: map instead of looping each time? error_code ec; - for (symbol_iterator SI = Obj->begin_symbols(), - SE = Obj->end_symbols(); - SI != SE; - SI.increment(ec)) { + for (symbol_iterator SI = Obj->begin_symbols(), SE = Obj->end_symbols(); + SI != SE; SI.increment(ec)) { if (ec) break; uint64_t SymAddr; SI->getAddress(SymAddr); uint64_t SymSize; SI->getSize(SymSize); @@ -166,13 +117,16 @@ tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, uint64_t Address) { } +StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + return StringRef(); +} + MCObjectSymbolizer * MCObjectSymbolizer::createObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, const ObjectFile *Obj) { - if (const MachOObjectFile *MachOOF = dyn_cast(Obj)) { - return new MCMachObjectSymbolizer(Ctx, RelInfo, MachOOF); - } + if (const MachOObjectFile *MOOF = dyn_cast(Obj)) + return new MCMachObjectSymbolizer(Ctx, RelInfo, MOOF); return new MCObjectSymbolizer(Ctx, RelInfo, Obj); } @@ -183,32 +137,100 @@ static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) { return SAddr < Addr; } -MCObjectSymbolizer::SortedSectionList::const_iterator -MCObjectSymbolizer::findSectionContaining(uint64_t Addr) const { - SortedSectionList::const_iterator +const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) { + if (SortedSections.empty()) + buildSectionList(); + + SortedSectionList::iterator EndIt = SortedSections.end(), It = std::lower_bound(SortedSections.begin(), EndIt, Addr, SectionStartsBefore); if (It == EndIt) - return It; + return 0; uint64_t SAddr; It->getAddress(SAddr); uint64_t SSize; It->getSize(SSize); if (Addr >= SAddr + SSize) - return EndIt; - return It; + return 0; + return &*It; +} + +const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) { + if (AddrToReloc.empty()) + buildRelocationByAddrMap(); + + AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr); + if (RI == AddrToReloc.end()) + return 0; + return &RI->second; +} + +void MCObjectSymbolizer::buildSectionList() { + error_code ec; + for (section_iterator SI = Obj->begin_sections(), SE = Obj->end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) break; + + bool RequiredForExec; SI->isRequiredForExecution(RequiredForExec); + if (RequiredForExec == false) + continue; + uint64_t SAddr; SI->getAddress(SAddr); + uint64_t SSize; SI->getSize(SSize); + SortedSectionList::iterator It = std::lower_bound(SortedSections.begin(), + SortedSections.end(), + SAddr, + SectionStartsBefore); + if (It != SortedSections.end()) { + uint64_t FoundSAddr; It->getAddress(FoundSAddr); + if (FoundSAddr < SAddr + SSize) + llvm_unreachable("Inserting overlapping sections"); + } + SortedSections.insert(It, *SI); + } } -void MCObjectSymbolizer::insertSection(SectionRef Sec) { - uint64_t SAddr; Sec.getAddress(SAddr); - uint64_t SSize; Sec.getSize(SSize); - SortedSectionList::iterator It = std::lower_bound(SortedSections.begin(), - SortedSections.end(), - SAddr, - SectionStartsBefore); - if (It != SortedSections.end()) { - uint64_t FoundSAddr; It->getAddress(FoundSAddr); - if (FoundSAddr < SAddr + SSize) - llvm_unreachable("Inserting overlapping sections"); +void MCObjectSymbolizer::buildRelocationByAddrMap() { + error_code ec; + for (section_iterator SI = Obj->begin_sections(), SE = Obj->end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) break; + + section_iterator RelSecI = SI->getRelocatedSection(); + if (RelSecI == Obj->end_sections()) + continue; + + uint64_t StartAddr; RelSecI->getAddress(StartAddr); + uint64_t Size; RelSecI->getSize(Size); + bool RequiredForExec; RelSecI->isRequiredForExecution(RequiredForExec); + if (RequiredForExec == false || Size == 0) + continue; + for (relocation_iterator RI = SI->begin_relocations(), + RE = SI->end_relocations(); + RI != RE; + RI.increment(ec)) { + if (ec) break; + // FIXME: libObject is inconsistent regarding error handling. The + // overwhelming majority of methods always return object_error::success, + // and assert for simple errors.. Here, ELFObjectFile::getRelocationOffset + // asserts when the file type isn't ET_REL. + // This workaround handles x86-64 elf, the only one that has a relocinfo. + uint64_t Offset; + if (Obj->isELF()) { + const ELF64LEObjectFile *ELFObj = dyn_cast(Obj); + if (ELFObj == 0) + break; + if (ELFObj->getELFFile()->getHeader()->e_type == ELF::ET_REL) { + RI->getOffset(Offset); + Offset += StartAddr; + } else { + RI->getAddress(Offset); + } + } else { + RI->getOffset(Offset); + Offset += StartAddr; + } + // At a specific address, only keep the first relocation. + if (AddrToReloc.find(Offset) == AddrToReloc.end()) + AddrToReloc[Offset] = *RI; + } } - SortedSections.insert(It, Sec); } -- 2.7.4