From 5afbc1cda7a63bf3084e4fc9d74d5530ca8acfd9 Mon Sep 17 00:00:00 2001 From: Kevin Enderby Date: Wed, 23 Mar 2016 20:27:00 +0000 Subject: [PATCH] =?utf8?q?Fix=20a=20crash=20in=20running=20llvm-objdump=20?= =?utf8?q?-t=20with=20an=20invalid=20Mach-O=20file=20already=20in=20the=20?= =?utf8?q?test=20suite.=20While=20this=20is=20not=20really=20an=20interest?= =?utf8?q?ing=20tool=20and=20option=20to=20run=20on=20a=20Mach-O=20file=20?= =?utf8?q?to=20show=20the=20symbol=20table=20in=20a=20generic=20libObject?= =?utf8?q?=20format=20it=20shouldn=E2=80=99t=20crash.?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit The reason for the crash was in MachOObjectFile::getSymbolType() when it was calling MachOObjectFile::getSymbolSection() without checking its return value for the error case. What makes this fix require a fair bit of diffs is that the method getSymbolType() is in the class ObjectFile defined without an ErrorOr<> so I needed to add that all the sub classes.  And all of the uses needed to be updated and the return value needed to be checked for the error case. The MachOObjectFile version of getSymbolType() “can” get an error in trying to come up with the libObject’s internal SymbolRef::Type when the Mach-O symbol symbol type is an N_SECT type because the code is trying to select from the SymbolRef::ST_Data or SymbolRef::ST_Function values for the SymbolRef::Type. And it needs the Mach-O section to use isData() and isBSS to determine if it will return SymbolRef::ST_Data. One other possible fix I considered is to simply return SymbolRef::ST_Other when MachOObjectFile::getSymbolSection() returned an error. But since in the past when I did such changes that “ate an error in the libObject code” I was asked instead to push the error out of the libObject code I chose not to implement the fix this way. As currently written both the COFF and ELF versions of getSymbolType() can’t get an error. But if isReservedSectionNumber() wanted to check for the two known negative values rather than allowing all negative values or the code wanted to add the same check as in getSymbolAddress() to use getSection() and check for the error then these versions of getSymbolType() could return errors. At the end of the day the error printed now is the generic “Invalid data was encountered while parsing the file” for object_error::parse_failed. In the future when we thread Lang’s new TypedError for recoverable error handling though libObject this will improve. And where the added // Diagnostic(… comment is, it would be changed to produce and error message like “bad section index (42) for symbol at index 8” for this case. llvm-svn: 264187 --- llvm/include/llvm/Object/COFF.h | 2 +- llvm/include/llvm/Object/ELFObjectFile.h | 5 ++-- llvm/include/llvm/Object/MachO.h | 2 +- llvm/include/llvm/Object/ObjectFile.h | 6 ++-- .../DebugInfo/Symbolize/SymbolizableObjectFile.cpp | 5 +++- .../ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 4 ++- .../ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 5 +++- llvm/lib/Object/COFFObjectFile.cpp | 2 +- llvm/lib/Object/MachOObjectFile.cpp | 13 +++++++-- llvm/test/Object/macho-invalid.test | 3 ++ llvm/tools/dsymutil/MachODebugMapParser.cpp | 5 +++- llvm/tools/llvm-objdump/MachODump.cpp | 32 ++++++++++++++++++---- llvm/tools/llvm-objdump/llvm-objdump.cpp | 4 ++- llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp | 5 +++- llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp | 6 +++- 15 files changed, 75 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/Object/COFF.h b/llvm/include/llvm/Object/COFF.h index 4aad87c..5ae5805 100644 --- a/llvm/include/llvm/Object/COFF.h +++ b/llvm/include/llvm/Object/COFF.h @@ -684,7 +684,7 @@ protected: uint64_t getSymbolValueImpl(DataRefImpl Symb) const override; uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; uint32_t getSymbolFlags(DataRefImpl Symb) const override; - SymbolRef::Type getSymbolType(DataRefImpl Symb) const override; + ErrorOr getSymbolType(DataRefImpl Symb) const override; ErrorOr getSymbolSection(DataRefImpl Symb) const override; void moveSectionNext(DataRefImpl &Sec) const override; std::error_code getSectionName(DataRefImpl Sec, diff --git a/llvm/include/llvm/Object/ELFObjectFile.h b/llvm/include/llvm/Object/ELFObjectFile.h index b01fa1d..efff38f 100644 --- a/llvm/include/llvm/Object/ELFObjectFile.h +++ b/llvm/include/llvm/Object/ELFObjectFile.h @@ -205,7 +205,7 @@ protected: uint32_t getSymbolFlags(DataRefImpl Symb) const override; uint8_t getSymbolOther(DataRefImpl Symb) const override; uint8_t getSymbolELFType(DataRefImpl Symb) const override; - SymbolRef::Type getSymbolType(DataRefImpl Symb) const override; + ErrorOr getSymbolType(DataRefImpl Symb) const override; ErrorOr getSymbolSection(const Elf_Sym *Symb, const Elf_Shdr *SymTab) const; ErrorOr getSymbolSection(DataRefImpl Symb) const override; @@ -445,7 +445,8 @@ uint8_t ELFObjectFile::getSymbolELFType(DataRefImpl Symb) const { } template -SymbolRef::Type ELFObjectFile::getSymbolType(DataRefImpl Symb) const { +ErrorOr +ELFObjectFile::getSymbolType(DataRefImpl Symb) const { const Elf_Sym *ESym = getSymbol(Symb); switch (ESym->getType()) { diff --git a/llvm/include/llvm/Object/MachO.h b/llvm/include/llvm/Object/MachO.h index 252d47d..fc88b18 100644 --- a/llvm/include/llvm/Object/MachO.h +++ b/llvm/include/llvm/Object/MachO.h @@ -208,7 +208,7 @@ public: ErrorOr getSymbolAddress(DataRefImpl Symb) const override; uint32_t getSymbolAlignment(DataRefImpl Symb) const override; uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const override; - SymbolRef::Type getSymbolType(DataRefImpl Symb) const override; + ErrorOr getSymbolType(DataRefImpl Symb) const override; uint32_t getSymbolFlags(DataRefImpl Symb) const override; ErrorOr getSymbolSection(DataRefImpl Symb) const override; unsigned getSymbolSectionID(SymbolRef Symb) const; diff --git a/llvm/include/llvm/Object/ObjectFile.h b/llvm/include/llvm/Object/ObjectFile.h index 6fe7385..6aeeb87 100644 --- a/llvm/include/llvm/Object/ObjectFile.h +++ b/llvm/include/llvm/Object/ObjectFile.h @@ -143,7 +143,7 @@ public: /// @brief Get the alignment of this symbol as the actual value (not log 2). uint32_t getAlignment() const; uint64_t getCommonSize() const; - SymbolRef::Type getType() const; + ErrorOr getType() const; /// @brief Get section this symbol is defined in reference to. Result is /// end_sections() if it is undefined or is an absolute symbol. @@ -201,7 +201,7 @@ protected: virtual uint64_t getSymbolValueImpl(DataRefImpl Symb) const = 0; virtual uint32_t getSymbolAlignment(DataRefImpl Symb) const; virtual uint64_t getCommonSymbolSizeImpl(DataRefImpl Symb) const = 0; - virtual SymbolRef::Type getSymbolType(DataRefImpl Symb) const = 0; + virtual ErrorOr getSymbolType(DataRefImpl Symb) const = 0; virtual ErrorOr getSymbolSection(DataRefImpl Symb) const = 0; @@ -329,7 +329,7 @@ inline ErrorOr SymbolRef::getSection() const { return getObject()->getSymbolSection(getRawDataRefImpl()); } -inline SymbolRef::Type SymbolRef::getType() const { +inline ErrorOr SymbolRef::getType() const { return getObject()->getSymbolType(getRawDataRefImpl()); } diff --git a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp index e314624..59efdef 100644 --- a/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp +++ b/llvm/lib/DebugInfo/Symbolize/SymbolizableObjectFile.cpp @@ -119,7 +119,10 @@ std::error_code SymbolizableObjectFile::addSymbol(const SymbolRef &Symbol, uint64_t SymbolSize, DataExtractor *OpdExtractor, uint64_t OpdAddress) { - SymbolRef::Type SymbolType = Symbol.getType(); + ErrorOr SymbolTypeOrErr = Symbol.getType(); + if (auto EC = SymbolTypeOrErr.getError()) + return EC; + SymbolRef::Type SymbolType = *SymbolTypeOrErr; if (SymbolType != SymbolRef::ST_Function && SymbolType != SymbolRef::ST_Data) return std::error_code(); ErrorOr SymbolAddressOrErr = Symbol.getAddress(); diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index d16b2db..f9c2b3f 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -169,7 +169,9 @@ RuntimeDyldImpl::loadObjectImpl(const object::ObjectFile &Obj) { if (Flags & SymbolRef::SF_Common) CommonSymbols.push_back(*I); else { - object::SymbolRef::Type SymType = I->getType(); + ErrorOr SymTypeOrErr = I->getType(); + Check(SymTypeOrErr.getError()); + object::SymbolRef::Type SymType = *SymTypeOrErr; // Get symbol name. ErrorOr NameOrErr = I->getName(); diff --git a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index 539e48b..1668867 100644 --- a/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -1190,7 +1190,10 @@ relocation_iterator RuntimeDyldELF::processRelocationRef( RTDyldSymbolTable::const_iterator gsi = GlobalSymbolTable.end(); if (Symbol != Obj.symbol_end()) { gsi = GlobalSymbolTable.find(TargetName.data()); - SymType = Symbol->getType(); + ErrorOr SymTypeOrErr = Symbol->getType(); + if (std::error_code EC = SymTypeOrErr.getError()) + report_fatal_error(EC.message()); + SymType = *SymTypeOrErr; } if (gsi != GlobalSymbolTable.end()) { const auto &SymInfo = gsi->second; diff --git a/llvm/lib/Object/COFFObjectFile.cpp b/llvm/lib/Object/COFFObjectFile.cpp index 2514547..3d14408 100644 --- a/llvm/lib/Object/COFFObjectFile.cpp +++ b/llvm/lib/Object/COFFObjectFile.cpp @@ -179,7 +179,7 @@ ErrorOr COFFObjectFile::getSymbolAddress(DataRefImpl Ref) const { return Result; } -SymbolRef::Type COFFObjectFile::getSymbolType(DataRefImpl Ref) const { +ErrorOr COFFObjectFile::getSymbolType(DataRefImpl Ref) const { COFFSymbolRef Symb = getCOFFSymbol(Ref); int32_t SectionNumber = Symb.getSectionNumber(); diff --git a/llvm/lib/Object/MachOObjectFile.cpp b/llvm/lib/Object/MachOObjectFile.cpp index 3a892d2..340faa4 100644 --- a/llvm/lib/Object/MachOObjectFile.cpp +++ b/llvm/lib/Object/MachOObjectFile.cpp @@ -443,7 +443,8 @@ uint64_t MachOObjectFile::getCommonSymbolSizeImpl(DataRefImpl DRI) const { return getNValue(DRI); } -SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const { +ErrorOr +MachOObjectFile::getSymbolType(DataRefImpl Symb) const { MachO::nlist_base Entry = getSymbolTableEntryBase(this, Symb); uint8_t n_type = Entry.n_type; @@ -455,7 +456,10 @@ SymbolRef::Type MachOObjectFile::getSymbolType(DataRefImpl Symb) const { case MachO::N_UNDF : return SymbolRef::ST_Unknown; case MachO::N_SECT : - section_iterator Sec = *getSymbolSection(Symb); + ErrorOr SecOrError = getSymbolSection(Symb); + if (!SecOrError) + return SecOrError.getError(); + section_iterator Sec = *SecOrError; if (Sec->isData() || Sec->isBSS()) return SymbolRef::ST_Data; return SymbolRef::ST_Function; @@ -511,8 +515,11 @@ MachOObjectFile::getSymbolSection(DataRefImpl Symb) const { return section_end(); DataRefImpl DRI; DRI.d.a = index - 1; - if (DRI.d.a >= Sections.size()) + if (DRI.d.a >= Sections.size()){ + // Diagnostic("bad section index (" + index + ") for symbol at index " + + // SymbolIndex); return object_error::parse_failed; + } return section_iterator(SectionRef(DRI, this)); } diff --git a/llvm/test/Object/macho-invalid.test b/llvm/test/Object/macho-invalid.test index cbd378a..9117ebe 100644 --- a/llvm/test/Object/macho-invalid.test +++ b/llvm/test/Object/macho-invalid.test @@ -54,6 +54,9 @@ INVALID-SECTION-IDX-SYMBOL-SEC-m: 0000000100000000 (?,?) [referenced dynamically RUN: llvm-nm -pax %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \ RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC-pax %s INVALID-SECTION-IDX-SYMBOL-SEC-pax: 0000000100000000 0f 42 0010 00000065 __mh_execute_header +RUN: not llvm-objdump -t %p/Inputs/macho-invalid-section-index-getSectionRawName 2>&1 \ +RUN: | FileCheck -check-prefix INVALID-SECTION-IDX-SYMBOL-SEC-objdump %s +INVALID-SECTION-IDX-SYMBOL-SEC-objdump: Invalid data was encountered while parsing the file. RUN: not llvm-objdump -private-headers %p/Inputs/macho-invalid-header 2>&1 | FileCheck -check-prefix INVALID-HEADER %s INVALID-HEADER: The file was not recognized as a valid object file. diff --git a/llvm/tools/dsymutil/MachODebugMapParser.cpp b/llvm/tools/dsymutil/MachODebugMapParser.cpp index 02c3ab0..2952994 100644 --- a/llvm/tools/dsymutil/MachODebugMapParser.cpp +++ b/llvm/tools/dsymutil/MachODebugMapParser.cpp @@ -437,7 +437,10 @@ void MachODebugMapParser::loadMainBinarySymbols( section_iterator Section = MainBinary.section_end(); MainBinarySymbolAddresses.clear(); for (const auto &Sym : MainBinary.symbols()) { - SymbolRef::Type Type = Sym.getType(); + ErrorOr TypeOrErr = Sym.getType(); + if (!TypeOrErr) + continue; + SymbolRef::Type Type = *TypeOrErr; // Skip undefined and STAB entries. if ((Type & SymbolRef::ST_Debug) || (Type & SymbolRef::ST_Unknown)) continue; diff --git a/llvm/tools/llvm-objdump/MachODump.cpp b/llvm/tools/llvm-objdump/MachODump.cpp index 318d837..428b737 100644 --- a/llvm/tools/llvm-objdump/MachODump.cpp +++ b/llvm/tools/llvm-objdump/MachODump.cpp @@ -172,8 +172,16 @@ static const Target *GetTarget(const MachOObjectFile *MachOObj, struct SymbolSorter { bool operator()(const SymbolRef &A, const SymbolRef &B) { - uint64_t AAddr = (A.getType() != SymbolRef::ST_Function) ? 0 : A.getValue(); - uint64_t BAddr = (B.getType() != SymbolRef::ST_Function) ? 0 : B.getValue(); + ErrorOr ATypeOrErr = A.getType(); + if (std::error_code EC = ATypeOrErr.getError()) + report_fatal_error(EC.message()); + SymbolRef::Type AType = *ATypeOrErr; + ErrorOr BTypeOrErr = B.getType(); + if (std::error_code EC = BTypeOrErr.getError()) + report_fatal_error(EC.message()); + SymbolRef::Type BType = *ATypeOrErr; + uint64_t AAddr = (AType != SymbolRef::ST_Function) ? 0 : A.getValue(); + uint64_t BAddr = (BType != SymbolRef::ST_Function) ? 0 : B.getValue(); return AAddr < BAddr; } }; @@ -573,7 +581,10 @@ static void CreateSymbolAddressMap(MachOObjectFile *O, SymbolAddressMap *AddrMap) { // Create a map of symbol addresses to symbol names. for (const SymbolRef &Symbol : O->symbols()) { - SymbolRef::Type ST = Symbol.getType(); + ErrorOr STOrErr = Symbol.getType(); + if (std::error_code EC = STOrErr.getError()) + report_fatal_error(EC.message()); + SymbolRef::Type ST = *STOrErr; if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || ST == SymbolRef::ST_Other) { uint64_t Address = Symbol.getValue(); @@ -6083,7 +6094,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, SymbolAddressMap AddrMap; bool DisSymNameFound = false; for (const SymbolRef &Symbol : MachOOF->symbols()) { - SymbolRef::Type ST = Symbol.getType(); + ErrorOr STOrErr = Symbol.getType(); + if (std::error_code EC = STOrErr.getError()) + report_fatal_error(EC.message()); + SymbolRef::Type ST = *STOrErr; if (ST == SymbolRef::ST_Function || ST == SymbolRef::ST_Data || ST == SymbolRef::ST_Other) { uint64_t Address = Symbol.getValue(); @@ -6134,7 +6148,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, report_fatal_error(EC.message()); StringRef SymName = *SymNameOrErr; - SymbolRef::Type ST = Symbols[SymIdx].getType(); + ErrorOr STOrErr = Symbols[SymIdx].getType(); + if (std::error_code EC = STOrErr.getError()) + report_fatal_error(EC.message()); + SymbolRef::Type ST = *STOrErr; if (ST != SymbolRef::ST_Function && ST != SymbolRef::ST_Data) continue; @@ -6158,7 +6175,10 @@ static void DisassembleMachO(StringRef Filename, MachOObjectFile *MachOOF, uint64_t NextSym = 0; uint64_t NextSymIdx = SymIdx + 1; while (Symbols.size() > NextSymIdx) { - SymbolRef::Type NextSymType = Symbols[NextSymIdx].getType(); + ErrorOr STOrErr = Symbols[NextSymIdx].getType(); + if (std::error_code EC = STOrErr.getError()) + report_fatal_error(EC.message()); + SymbolRef::Type NextSymType = *STOrErr; if (NextSymType == SymbolRef::ST_Function) { containsNextSym = Sections[SectIdx].containsSymbol(Symbols[NextSymIdx]); diff --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp index f436183..28d667a 100644 --- a/llvm/tools/llvm-objdump/llvm-objdump.cpp +++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp @@ -1293,7 +1293,9 @@ void llvm::PrintSymbolTable(const ObjectFile *o) { ErrorOr AddressOrError = Symbol.getAddress(); error(AddressOrError.getError()); uint64_t Address = *AddressOrError; - SymbolRef::Type Type = Symbol.getType(); + ErrorOr TypeOrError = Symbol.getType(); + error(TypeOrError.getError()); + SymbolRef::Type Type = *TypeOrError; uint32_t Flags = Symbol.getFlags(); ErrorOr SectionOrErr = Symbol.getSection(); error(SectionOrErr.getError()); diff --git a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp index 76387c7..0f74a99 100644 --- a/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp +++ b/llvm/tools/llvm-readobj/ARMWinEHPrinter.cpp @@ -198,7 +198,10 @@ Decoder::getSectionContaining(const COFFObjectFile &COFF, uint64_t VA) { ErrorOr Decoder::getSymbol(const COFFObjectFile &COFF, uint64_t VA, bool FunctionOnly) { for (const auto &Symbol : COFF.symbols()) { - if (FunctionOnly && Symbol.getType() != SymbolRef::ST_Function) + ErrorOr Type = Symbol.getType(); + if (std::error_code EC = Type.getError()) + return EC; + if (FunctionOnly && *Type != SymbolRef::ST_Function) continue; ErrorOr Address = Symbol.getAddress(); diff --git a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp index 81b9c7d..3b5c182 100644 --- a/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp +++ b/llvm/tools/llvm-rtdyld/llvm-rtdyld.cpp @@ -330,7 +330,11 @@ static int printLineInfoForInput(bool LoadObjects, bool UseDebugObj) { // Use symbol info to iterate functions in the object. for (const auto &P : SymAddr) { object::SymbolRef Sym = P.first; - if (Sym.getType() == object::SymbolRef::ST_Function) { + ErrorOr TypeOrErr = Sym.getType(); + if (!TypeOrErr) + continue; + SymbolRef::Type Type = *TypeOrErr; + if (Type == object::SymbolRef::ST_Function) { ErrorOr Name = Sym.getName(); if (!Name) continue; -- 2.7.4