From 001ba65375f79a76491677cc2de05637f15a7a57 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Mon, 3 May 2021 18:31:23 -0400 Subject: [PATCH] [lld-macho] De-templatize mach_header operations @thakis pointed out that `mach_header` and `mach_header_64` actually have the same set of (used) fields, with the 64-bit version having extra padding. So we can access the fields we need using the single `mach_header` type instead of using templates to switch between the two. I also spotted a potential issue where hasObjCSection tries to parse a file w/o checking if it does indeed match the target arch... As such, I've added a quick magic number check to ensure we don't access invalid memory during `findCommand()`. Addresses PR50180. Reviewed By: #lld-macho, thakis Differential Revision: https://reviews.llvm.org/D101724 --- lld/MachO/Driver.cpp | 6 +----- lld/MachO/InputFiles.cpp | 26 ++++++++------------------ lld/MachO/InputFiles.h | 15 +++++++-------- lld/MachO/ObjC.cpp | 3 +++ lld/MachO/SyntheticSections.cpp | 29 ++++++----------------------- lld/MachO/SyntheticSections.h | 9 +++++---- lld/MachO/Target.h | 4 ++++ lld/MachO/Writer.cpp | 6 ++---- lld/MachO/Writer.h | 2 +- lld/test/MachO/objc.s | 13 ++++++++----- 10 files changed, 45 insertions(+), 68 deletions(-) diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp index dafd914..e3d266b 100644 --- a/lld/MachO/Driver.cpp +++ b/lld/MachO/Driver.cpp @@ -1142,11 +1142,7 @@ bool macho::link(ArrayRef argsArr, bool canExitEarly, "\n>>> referenced from option -exported_symbol(s_list)"); } - if (target->wordSize == 8) - createSyntheticSections(); - else - createSyntheticSections(); - + createSyntheticSections(); createSyntheticSymbols(); for (const Arg *arg : args.filtered(OPT_sectcreate)) { diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp index a553754..5f1f772 100644 --- a/lld/MachO/InputFiles.cpp +++ b/lld/MachO/InputFiles.cpp @@ -102,13 +102,11 @@ static VersionTuple decodeVersion(uint32_t version) { return VersionTuple(major, minor, subMinor); } -template static Optional getPlatformInfo(const InputFile *input) { if (!isa(input) && !isa(input)) return None; - using Header = typename LP::mach_header; - auto *hdr = reinterpret_cast(input->mb.getBufferStart()); + const char *hdr = input->mb.getBufferStart(); PlatformInfo platformInfo; if (const auto *cmd = @@ -141,8 +139,8 @@ static Optional getPlatformInfo(const InputFile *input) { return None; } -template static bool checkCompatibility(const InputFile *input) { - Optional platformInfo = getPlatformInfo(input); +static bool checkCompatibility(const InputFile *input) { + Optional platformInfo = getPlatformInfo(input); if (!platformInfo) return true; // TODO: Correctly detect simulator platforms or relax this check. @@ -639,7 +637,7 @@ template void ObjFile::parse() { return; } - if (!checkCompatibility(this)) + if (!checkCompatibility(this)) return; if (const load_command *cmd = findCommand(hdr, LC_LINKER_OPTION)) { @@ -782,16 +780,8 @@ DylibFile::DylibFile(MemoryBufferRef mb, DylibFile *umbrella, if (umbrella == nullptr) umbrella = this; - if (target->wordSize == 8) - parse(umbrella); - else - parse(umbrella); -} - -template void DylibFile::parse(DylibFile *umbrella) { - using Header = typename LP::mach_header; auto *buf = reinterpret_cast(mb.getBufferStart()); - auto *hdr = reinterpret_cast(mb.getBufferStart()); + auto *hdr = reinterpret_cast(mb.getBufferStart()); // Initialize dylibName. if (const load_command *cmd = findCommand(hdr, LC_ID_DYLIB)) { @@ -806,7 +796,7 @@ template void DylibFile::parse(DylibFile *umbrella) { return; } - if (!checkCompatibility(this)) + if (!checkCompatibility(this)) return; // Initialize symbols. @@ -825,7 +815,8 @@ template void DylibFile::parse(DylibFile *umbrella) { return; } - const uint8_t *p = reinterpret_cast(hdr) + sizeof(Header); + const uint8_t *p = + reinterpret_cast(hdr) + target->headerSize; for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { auto *cmd = reinterpret_cast(p); p += cmd->cmdsize; @@ -1008,4 +999,3 @@ BitcodeFile::BitcodeFile(MemoryBufferRef mbref) } template void ObjFile::parse(); -template void DylibFile::parse(DylibFile *umbrella); diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h index 066d450..d57e174 100644 --- a/lld/MachO/InputFiles.h +++ b/lld/MachO/InputFiles.h @@ -10,6 +10,7 @@ #define LLD_MACHO_INPUT_FILES_H #include "MachOStructs.h" +#include "Target.h" #include "lld/Common/LLVM.h" #include "lld/Common/Memory.h" @@ -158,9 +159,6 @@ public: // implemented in the bundle. When used like this, it is very similar // to a Dylib, so we re-used the same class to represent it. bool isBundleLoader; - -private: - template void parse(DylibFile *umbrella); }; // .a file @@ -189,12 +187,13 @@ extern llvm::SetVector inputFiles; llvm::Optional readFile(StringRef path); -template -const CommandType *findCommand(const Header *hdr, Types... types) { +// anyHdr should be a pointer to either mach_header or mach_header_64 +template +const CommandType *findCommand(const void *anyHdr, Types... types) { std::initializer_list typesList{types...}; - const uint8_t *p = reinterpret_cast(hdr) + sizeof(Header); - + const auto *hdr = reinterpret_cast(anyHdr); + const uint8_t *p = + reinterpret_cast(hdr) + target->headerSize; for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { auto *cmd = reinterpret_cast(p); if (llvm::is_contained(typesList, cmd->cmd)) diff --git a/lld/MachO/ObjC.cpp b/lld/MachO/ObjC.cpp index 8a748ce..7ed8008 100644 --- a/lld/MachO/ObjC.cpp +++ b/lld/MachO/ObjC.cpp @@ -24,6 +24,9 @@ template static bool hasObjCSection(MemoryBufferRef mb) { auto *hdr = reinterpret_cast(mb.getBufferStart()); + if (hdr->magic != LP::magic) + return false; + if (const auto *c = findCommand(hdr, LP::segmentLCType)) { auto sectionHeaders = diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp index cc595b3..0805f78 100644 --- a/lld/MachO/SyntheticSections.cpp +++ b/lld/MachO/SyntheticSections.cpp @@ -71,22 +71,8 @@ void MachHeaderSection::addLoadCommand(LoadCommand *lc) { sizeOfCmds += lc->getSize(); } -// This serves to hide (type-erase) the template parameter from -// MachHeaderSection. -template class MachHeaderSectionImpl : public MachHeaderSection { -public: - MachHeaderSectionImpl() = default; - uint64_t getSize() const override; - void writeTo(uint8_t *buf) const override; -}; - -template MachHeaderSection *macho::makeMachHeaderSection() { - return make>(); -} - -template uint64_t MachHeaderSectionImpl::getSize() const { - uint64_t size = - sizeof(typename LP::mach_header) + sizeOfCmds + config->headerPad; +uint64_t MachHeaderSection::getSize() const { + uint64_t size = target->headerSize + sizeOfCmds + config->headerPad; // If we are emitting an encryptable binary, our load commands must have a // separate (non-encrypted) page to themselves. if (config->emitEncryptionInfo) @@ -106,10 +92,9 @@ static uint32_t cpuSubtype() { return subtype; } -template -void MachHeaderSectionImpl::writeTo(uint8_t *buf) const { - auto *hdr = reinterpret_cast(buf); - hdr->magic = LP::magic; +void MachHeaderSection::writeTo(uint8_t *buf) const { + auto *hdr = reinterpret_cast(buf); + hdr->magic = target->magic; hdr->cputype = target->cpuType; hdr->cpusubtype = cpuSubtype(); hdr->filetype = config->outputType; @@ -144,7 +129,7 @@ void MachHeaderSectionImpl::writeTo(uint8_t *buf) const { } } - uint8_t *p = reinterpret_cast(hdr + 1); + uint8_t *p = reinterpret_cast(hdr) + target->headerSize; for (const LoadCommand *lc : loadCommands) { lc->writeTo(p); p += lc->getSize(); @@ -1154,7 +1139,5 @@ void macho::createSyntheticSymbols() { addHeaderSymbol("___dso_handle"); } -template MachHeaderSection *macho::makeMachHeaderSection(); -template MachHeaderSection *macho::makeMachHeaderSection(); template SymtabSection *macho::makeSymtabSection(StringTableSection &); template SymtabSection *macho::makeSymtabSection(StringTableSection &); diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h index 38fee86..1da7dc2 100644 --- a/lld/MachO/SyntheticSections.h +++ b/lld/MachO/SyntheticSections.h @@ -80,17 +80,18 @@ public: // The header of the Mach-O file, which must have a file offset of zero. class MachHeaderSection : public SyntheticSection { public: - void addLoadCommand(LoadCommand *); + MachHeaderSection(); bool isHidden() const override { return true; } + uint64_t getSize() const override; + void writeTo(uint8_t *buf) const override; + + void addLoadCommand(LoadCommand *); protected: - MachHeaderSection(); std::vector loadCommands; uint32_t sizeOfCmds = 0; }; -template MachHeaderSection *makeMachHeaderSection(); - // A hidden section that exists solely for the purpose of creating the // __PAGEZERO segment, which is used to catch null pointer dereferences. class PageZeroSection : public SyntheticSection { diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h index 4084db6..eb34c45 100644 --- a/lld/MachO/Target.h +++ b/lld/MachO/Target.h @@ -32,7 +32,9 @@ public: template TargetInfo(LP) { // Having these values available in TargetInfo allows us to access them // without having to resort to templates. + magic = LP::magic; pageZeroSize = LP::pageZeroSize; + headerSize = sizeof(typename LP::mach_header); wordSize = LP::wordSize; } @@ -67,10 +69,12 @@ public: return getRelocAttrs(type).hasAttr(bit); } + uint32_t magic; uint32_t cpuType; uint32_t cpuSubtype; uint64_t pageZeroSize; + size_t headerSize; size_t stubSize; size_t stubHelperHeaderSize; size_t stubHelperEntrySize; diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp index ca46eec..5c4465e 100644 --- a/lld/MachO/Writer.cpp +++ b/lld/MachO/Writer.cpp @@ -1063,8 +1063,8 @@ template void Writer::run() { template void macho::writeResult() { Writer().run(); } -template void macho::createSyntheticSections() { - in.header = makeMachHeaderSection(); +void macho::createSyntheticSections() { + in.header = make(); in.rebase = make(); in.binding = make(); in.weakBinding = make(); @@ -1083,5 +1083,3 @@ OutputSection *macho::firstTLVDataSection = nullptr; template void macho::writeResult(); template void macho::writeResult(); -template void macho::createSyntheticSections(); -template void macho::createSyntheticSections(); diff --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h index 7369a6d..56f6f7a 100644 --- a/lld/MachO/Writer.h +++ b/lld/MachO/Writer.h @@ -27,7 +27,7 @@ public: template void writeResult(); -template void createSyntheticSections(); +void createSyntheticSections(); // Add bindings for symbols that need weak or non-lazy bindings. void addNonLazyBindingEntries(const Symbol *, const InputSection *, diff --git a/lld/test/MachO/objc.s b/lld/test/MachO/objc.s index d727497..39edf04 100644 --- a/lld/test/MachO/objc.s +++ b/lld/test/MachO/objc.s @@ -5,11 +5,12 @@ # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-objc-category.s -o %t/has-objc-category.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/has-swift.s -o %t/has-swift.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/no-objc.s -o %t/no-objc.o -# RUN: llvm-ar rcs %t/libHasSomeObjC.a %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/no-objc.o +## Make sure we don't mis-parse a 32-bit file as 64-bit +# RUN: llvm-mc -filetype=obj -triple=armv7-apple-watchos %t/no-objc.s -o %t/wrong-arch.o +# RUN: llvm-ar rcs %t/libHasSomeObjC.a %t/has-objc-symbol.o %t/has-objc-category.o %t/has-swift.o %t/no-objc.o %t/wrong-arch.o # RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o -# RUN: %lld -lSystem %t/test.o -o %t/test \ -# RUN: -L%t -lHasSomeObjC -ObjC +# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC -ObjC # RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=OBJC # OBJC: Sections: @@ -22,8 +23,7 @@ # OBJC-NEXT: g F __TEXT,__text _main # OBJC-NEXT: g F __TEXT,__text _OBJC_CLASS_$_MyObject -# RUN: %lld -lSystem %t/test.o -o %t/test \ -# RUN: -L%t -lHasSomeObjC +# RUN: %lld -lSystem %t/test.o -o %t/test -L%t -lHasSomeObjC # RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=NO-OBJC # NO-OBJC: Sections: @@ -56,6 +56,9 @@ _OBJC_CLASS_$_MyObject: foo: .quad 0x1234 +.section __DATA,bar +.section __DATA,baz + #--- test.s .globl _main _main: -- 2.7.4