From 03f43b3aca363e16c45d8733400fd0083b1af4d8 Mon Sep 17 00:00:00 2001 From: Jez Ng Date: Tue, 31 Mar 2020 11:45:47 -0700 Subject: [PATCH] [lld] Initial commit for new Mach-O backend Summary: This is the first commit for the new Mach-O backend, designed to roughly follow the architecture of the existing ELF and COFF backends, and building off work that @ruiu and @pcc did in a branch a while back. Note that this is a very stripped-down commit with the bare minimum of functionality for ease of review. We'll be following up with more diffs soon. Currently, we're able to generate a simple "Hello World!" executable that runs on OS X Catalina (and possibly on earlier OS X versions; I haven't tested them). (This executable can be obtained by compiling `test/MachO/relocations.s`.) We're mocking out a few load commands to achieve this -- for example, we can't load dynamic libraries, but Catalina requires binaries to be linked against `dyld`, so we hardcode the emission of a `LC_LOAD_DYLIB` command. Other mocked out load commands include LC_SYMTAB and LC_DYSYMTAB. Differential Revision: https://reviews.llvm.org/D75382 --- lld/CMakeLists.txt | 1 + lld/MachO/Arch/X86_64.cpp | 60 ++++++ lld/MachO/CMakeLists.txt | 34 +++ lld/MachO/Config.h | 29 +++ lld/MachO/Driver.cpp | 150 +++++++++++++ lld/MachO/Driver.h | 35 ++++ lld/MachO/InputFiles.cpp | 204 ++++++++++++++++++ lld/MachO/InputFiles.h | 70 +++++++ lld/MachO/InputSection.cpp | 39 ++++ lld/MachO/InputSection.h | 52 +++++ lld/MachO/Options.td | 18 ++ lld/MachO/OutputSegment.cpp | 30 +++ lld/MachO/OutputSegment.h | 34 +++ lld/MachO/SymbolTable.cpp | 62 ++++++ lld/MachO/SymbolTable.h | 44 ++++ lld/MachO/Symbols.cpp | 23 ++ lld/MachO/Symbols.h | 100 +++++++++ lld/MachO/Target.cpp | 14 ++ lld/MachO/Target.h | 41 ++++ lld/MachO/Writer.cpp | 359 ++++++++++++++++++++++++++++++++ lld/MachO/Writer.h | 20 ++ lld/include/lld/Common/Driver.h | 5 + lld/test/MachO/alignment-too-large.yaml | 58 ++++++ lld/test/MachO/arch.s | 11 + lld/test/MachO/duplicate-symbol.s | 12 ++ lld/test/MachO/entry-symbol.s | 13 ++ lld/test/MachO/invalid-executable.s | 11 + lld/test/MachO/load-commands.s | 17 ++ lld/test/MachO/no-such-file.s | 4 + lld/test/MachO/relocations.s | 21 ++ lld/test/MachO/section-headers.s | 46 ++++ lld/test/MachO/segments.s | 20 ++ lld/test/MachO/silent-ignore.test | 8 + lld/test/MachO/text-segment.s | 15 ++ lld/tools/lld/CMakeLists.txt | 1 + lld/tools/lld/lld.cpp | 12 +- 36 files changed, 1669 insertions(+), 4 deletions(-) create mode 100644 lld/MachO/Arch/X86_64.cpp create mode 100644 lld/MachO/CMakeLists.txt create mode 100644 lld/MachO/Config.h create mode 100644 lld/MachO/Driver.cpp create mode 100644 lld/MachO/Driver.h create mode 100644 lld/MachO/InputFiles.cpp create mode 100644 lld/MachO/InputFiles.h create mode 100644 lld/MachO/InputSection.cpp create mode 100644 lld/MachO/InputSection.h create mode 100644 lld/MachO/Options.td create mode 100644 lld/MachO/OutputSegment.cpp create mode 100644 lld/MachO/OutputSegment.h create mode 100644 lld/MachO/SymbolTable.cpp create mode 100644 lld/MachO/SymbolTable.h create mode 100644 lld/MachO/Symbols.cpp create mode 100644 lld/MachO/Symbols.h create mode 100644 lld/MachO/Target.cpp create mode 100644 lld/MachO/Target.h create mode 100644 lld/MachO/Writer.cpp create mode 100644 lld/MachO/Writer.h create mode 100644 lld/test/MachO/alignment-too-large.yaml create mode 100644 lld/test/MachO/arch.s create mode 100644 lld/test/MachO/duplicate-symbol.s create mode 100644 lld/test/MachO/entry-symbol.s create mode 100644 lld/test/MachO/invalid-executable.s create mode 100644 lld/test/MachO/load-commands.s create mode 100644 lld/test/MachO/no-such-file.s create mode 100644 lld/test/MachO/relocations.s create mode 100644 lld/test/MachO/section-headers.s create mode 100644 lld/test/MachO/segments.s create mode 100644 lld/test/MachO/silent-ignore.test create mode 100644 lld/test/MachO/text-segment.s diff --git a/lld/CMakeLists.txt b/lld/CMakeLists.txt index a506afa..3bd9e95 100644 --- a/lld/CMakeLists.txt +++ b/lld/CMakeLists.txt @@ -194,5 +194,6 @@ endif() add_subdirectory(docs) add_subdirectory(COFF) add_subdirectory(ELF) +add_subdirectory(MachO) add_subdirectory(MinGW) add_subdirectory(wasm) diff --git a/lld/MachO/Arch/X86_64.cpp b/lld/MachO/Arch/X86_64.cpp new file mode 100644 index 0000000..6c0989e --- /dev/null +++ b/lld/MachO/Arch/X86_64.cpp @@ -0,0 +1,60 @@ +//===- X86_64.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Target.h" +#include "lld/Common/ErrorHandler.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Endian.h" + +using namespace llvm::MachO; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::macho; + +namespace { + +struct X86_64 : TargetInfo { + X86_64(); + uint64_t getImplicitAddend(const uint8_t *loc, uint8_t type) const override; + void relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const override; +}; + +X86_64::X86_64() { + cpuType = CPU_TYPE_X86_64; + cpuSubtype = CPU_SUBTYPE_X86_64_ALL; +} + +uint64_t X86_64::getImplicitAddend(const uint8_t *loc, uint8_t type) const { + switch (type) { + case X86_64_RELOC_SIGNED: + return read32le(loc); + default: + error("TODO: Unhandled relocation type " + std::to_string(type)); + return 0; + } +} + +void X86_64::relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const { + switch (type) { + case X86_64_RELOC_SIGNED: + // This type is only used for pc-relative relocations, so offset by 4 since + // the RIP has advanced by 4 at this point. + write32le(loc, val - 4); + break; + default: + llvm_unreachable( + "getImplicitAddend should have flagged all unhandled relocation types"); + } +} + +} // namespace + +TargetInfo *macho::createX86_64TargetInfo() { + static X86_64 t; + return &t; +} diff --git a/lld/MachO/CMakeLists.txt b/lld/MachO/CMakeLists.txt new file mode 100644 index 0000000..2c63281 --- /dev/null +++ b/lld/MachO/CMakeLists.txt @@ -0,0 +1,34 @@ +set(LLVM_TARGET_DEFINITIONS Options.td) +tablegen(LLVM Options.inc -gen-opt-parser-defs) +add_public_tablegen_target(MachOOptionsTableGen) + +add_lld_library(lldMachO2 + Arch/X86_64.cpp + Driver.cpp + InputFiles.cpp + InputSection.cpp + OutputSegment.cpp + SymbolTable.cpp + Symbols.cpp + Target.cpp + Writer.cpp + + LINK_COMPONENTS + ${LLVM_TARGETS_TO_BUILD} + BinaryFormat + Core + DebugInfoDWARF + LTO + MC + Object + Option + Support + + LINK_LIBS + lldCommon + ${LLVM_PTHREAD_LIB} + + DEPENDS + MachOOptionsTableGen + ${tablegen_deps} + ) diff --git a/lld/MachO/Config.h b/lld/MachO/Config.h new file mode 100644 index 0000000..ea8df75 --- /dev/null +++ b/lld/MachO/Config.h @@ -0,0 +1,29 @@ +//===- Config.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_CONFIG_H +#define LLD_MACHO_CONFIG_H + +#include "llvm/ADT/StringRef.h" + +namespace lld { +namespace macho { + +class Symbol; + +struct Configuration { + llvm::StringRef outputFile; + Symbol *entry; +}; + +extern Configuration *config; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp new file mode 100644 index 0000000..475f81a --- /dev/null +++ b/lld/MachO/Driver.cpp @@ -0,0 +1,150 @@ +//===- Driver.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Driver.h" +#include "Config.h" +#include "InputFiles.h" +#include "OutputSegment.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Target.h" +#include "Writer.h" + +#include "lld/Common/Args.h" +#include "lld/Common/Driver.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/LLVM.h" +#include "lld/Common/Memory.h" +#include "lld/Common/Version.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/BinaryFormat/Magic.h" +#include "llvm/Option/ArgList.h" +#include "llvm/Option/Option.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace llvm::sys; +using namespace lld; +using namespace lld::macho; + +Configuration *lld::macho::config; + +// Create prefix string literals used in Options.td +#define PREFIX(NAME, VALUE) const char *NAME[] = VALUE; +#include "Options.inc" +#undef PREFIX + +// Create table mapping all options defined in Options.td +static const opt::OptTable::Info optInfo[] = { +#define OPTION(X1, X2, ID, KIND, GROUP, ALIAS, X7, X8, X9, X10, X11, X12) \ + {X1, X2, X10, X11, OPT_##ID, opt::Option::KIND##Class, \ + X9, X8, OPT_##GROUP, OPT_##ALIAS, X7, X12}, +#include "Options.inc" +#undef OPTION +}; + +MachOOptTable::MachOOptTable() : OptTable(optInfo) {} + +opt::InputArgList MachOOptTable::parse(ArrayRef argv) { + // Make InputArgList from string vectors. + unsigned missingIndex; + unsigned missingCount; + SmallVector vec(argv.data(), argv.data() + argv.size()); + + opt::InputArgList args = ParseArgs(vec, missingIndex, missingCount); + + if (missingCount) + error(Twine(args.getArgString(missingIndex)) + ": missing argument"); + + for (opt::Arg *arg : args.filtered(OPT_UNKNOWN)) + error("unknown argument: " + arg->getSpelling()); + return args; +} + +static TargetInfo *createTargetInfo(opt::InputArgList &args) { + StringRef s = args.getLastArgValue(OPT_arch, "x86_64"); + if (s != "x86_64") + error("missing or unsupported -arch " + s); + return createX86_64TargetInfo(); +} + +static void addFile(StringRef path) { + Optional buffer = readFile(path); + if (!buffer) + return; + MemoryBufferRef mbref = *buffer; + + switch (identify_magic(mbref.getBuffer())) { + case file_magic::macho_object: + inputFiles.push_back(make(mbref)); + break; + default: + error(path + ": unhandled file type"); + } +} + +bool macho::link(llvm::ArrayRef argsArr, bool canExitEarly, + raw_ostream &stdoutOS, raw_ostream &stderrOS) { + lld::stdoutOS = &stdoutOS; + lld::stderrOS = &stderrOS; + + MachOOptTable parser; + opt::InputArgList args = parser.parse(argsArr.slice(1)); + + if (args.hasArg(OPT_v)) { + message(getLLDVersion()); + freeArena(); + return !errorCount(); + } + + config = make(); + symtab = make(); + target = createTargetInfo(args); + + config->entry = symtab->addUndefined(args.getLastArgValue(OPT_e, "_main")); + config->outputFile = args.getLastArgValue(OPT_o, "a.out"); + + getOrCreateOutputSegment("__TEXT", VM_PROT_READ | VM_PROT_EXECUTE); + getOrCreateOutputSegment("__DATA", VM_PROT_READ | VM_PROT_WRITE); + + for (opt::Arg *arg : args) { + switch (arg->getOption().getID()) { + case OPT_INPUT: + addFile(arg->getValue()); + break; + } + } + + if (!isa(config->entry)) { + error("undefined symbol: " + config->entry->getName()); + return false; + } + + // Initialize InputSections. + for (InputFile *file : inputFiles) + for (InputSection *sec : file->sections) + inputSections.push_back(sec); + + // Add input sections to output segments. + for (InputSection *isec : inputSections) { + OutputSegment *os = + getOrCreateOutputSegment(isec->segname, VM_PROT_READ | VM_PROT_WRITE); + os->sections[isec->name].push_back(isec); + } + + // Write to an output file. + writeResult(); + + if (canExitEarly) + exitLld(errorCount() ? 1 : 0); + + freeArena(); + return !errorCount(); +} diff --git a/lld/MachO/Driver.h b/lld/MachO/Driver.h new file mode 100644 index 0000000..142f00d --- /dev/null +++ b/lld/MachO/Driver.h @@ -0,0 +1,35 @@ +//===- Driver.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_DRIVER_H +#define LLD_MACHO_DRIVER_H + +#include "lld/Common/LLVM.h" +#include "llvm/Option/OptTable.h" + +namespace lld { +namespace macho { + +class MachOOptTable : public llvm::opt::OptTable { +public: + MachOOptTable(); + llvm::opt::InputArgList parse(ArrayRef argv); +}; + +// Create enum with OPT_xxx values for each option in Options.td +enum { + OPT_INVALID = 0, +#define OPTION(_1, _2, ID, _4, _5, _6, _7, _8, _9, _10, _11, _12) OPT_##ID, +#include "Options.inc" +#undef OPTION +}; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/InputFiles.cpp b/lld/MachO/InputFiles.cpp new file mode 100644 index 0000000..1204fc4 --- /dev/null +++ b/lld/MachO/InputFiles.cpp @@ -0,0 +1,204 @@ +//===- InputFiles.cpp -----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file contains functions to parse Mach-O object files. In this comment, +// we describe the Mach-O file structure and how we parse it. +// +// Mach-O is not very different from ELF or COFF. The notion of symbols, +// sections and relocations exists in Mach-O as it does in ELF and COFF. +// +// Perhaps the notion that is new to those who know ELF/COFF is "subsections". +// In ELF/COFF, sections are an atomic unit of data copied from input files to +// output files. When we merge or garbage-collect sections, we treat each +// section as an atomic unit. In Mach-O, that's not the case. Sections can +// consist of multiple subsections, and subsections are a unit of merging and +// garbage-collecting. Therefore, Mach-O's subsections are more similar to +// ELF/COFF's sections than Mach-O's sections are. +// +// A section can have multiple symbols. A symbol that does not have the +// N_ALT_ENTRY attribute indicates a beginning of a subsection. Therefore, by +// definition, a symbol is always present at the beginning of each subsection. A +// symbol with N_ALT_ENTRY attribute does not start a new subsection and can +// point to a middle of a subsection. +// +// The notion of subsections also affects how relocations are represented in +// Mach-O. All references within a section need to be explicitly represented as +// relocations if they refer to different subsections, because we obviously need +// to fix up addresses if subsections are laid out in an output file differently +// than they were in object files. To represent that, Mach-O relocations can +// refer to an unnamed location via its address. Scattered relocations (those +// with the R_SCATTERED bit set) always refer to unnamed locations. +// Non-scattered relocations refer to an unnamed location if r_extern is not set +// and r_symbolnum is zero. +// +// Without the above differences, I think you can use your knowledge about ELF +// and COFF for Mach-O. +// +//===----------------------------------------------------------------------===// + +#include "InputFiles.h" +#include "InputSection.h" +#include "OutputSegment.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Target.h" + +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace llvm::support::endian; +using namespace lld; +using namespace lld::macho; + +std::vector macho::inputFiles; + +// Open a given file path and return it as a memory-mapped file. +Optional macho::readFile(StringRef path) { + // Open a file. + auto mbOrErr = MemoryBuffer::getFile(path); + if (auto ec = mbOrErr.getError()) { + error("cannot open " + path + ": " + ec.message()); + return None; + } + + std::unique_ptr &mb = *mbOrErr; + MemoryBufferRef mbref = mb->getMemBufferRef(); + make>(std::move(mb)); // take mb ownership + return mbref; +} + +static const load_command *findCommand(const mach_header_64 *hdr, + uint32_t type) { + const uint8_t *p = + reinterpret_cast(hdr) + sizeof(mach_header_64); + + for (uint32_t i = 0, n = hdr->ncmds; i < n; ++i) { + auto *cmd = reinterpret_cast(p); + if (cmd->cmd == type) + return cmd; + p += cmd->cmdsize; + } + return nullptr; +} + +std::vector +InputFile::parseSections(ArrayRef sections) { + std::vector ret; + ret.reserve(sections.size()); + + auto *buf = reinterpret_cast(mb.getBufferStart()); + + for (const section_64 &sec : sections) { + InputSection *isec = make(); + isec->file = this; + isec->name = StringRef(sec.sectname, strnlen(sec.sectname, 16)); + isec->segname = StringRef(sec.segname, strnlen(sec.segname, 16)); + isec->data = {buf + sec.offset, sec.size}; + if (sec.align >= 32) + error("alignment " + std::to_string(sec.align) + " of section " + + isec->name + " is too large"); + else + isec->align = 1 << sec.align; + isec->flags = sec.flags; + ret.push_back(isec); + } + + return ret; +} + +void InputFile::parseRelocations(const section_64 &sec, + std::vector &relocs) { + auto *buf = reinterpret_cast(mb.getBufferStart()); + ArrayRef relInfos( + reinterpret_cast(buf + sec.reloff), + sec.nreloc); + + for (const any_relocation_info &anyRel : relInfos) { + Reloc r; + if (anyRel.r_word0 & R_SCATTERED) { + error("TODO: Scattered relocations not supported"); + } else { + auto rel = reinterpret_cast(anyRel); + r.type = rel.r_type; + r.offset = rel.r_address; + r.addend = target->getImplicitAddend(buf + sec.offset + r.offset, r.type); + if (rel.r_extern) + r.target = symbols[rel.r_symbolnum]; + else { + error("TODO: Non-extern relocations are not supported"); + continue; + } + } + relocs.push_back(r); + } +} + +ObjFile::ObjFile(MemoryBufferRef mb) : InputFile(ObjKind, mb) { + auto *buf = reinterpret_cast(mb.getBufferStart()); + auto *hdr = reinterpret_cast(mb.getBufferStart()); + ArrayRef objSections; + + if (const load_command *cmd = findCommand(hdr, LC_SEGMENT_64)) { + auto *c = reinterpret_cast(cmd); + objSections = ArrayRef{ + reinterpret_cast(c + 1), c->nsects}; + sections = parseSections(objSections); + } + + if (const load_command *cmd = findCommand(hdr, LC_SYMTAB)) { + auto *c = reinterpret_cast(cmd); + const char *strtab = reinterpret_cast(buf) + c->stroff; + ArrayRef nList( + reinterpret_cast(buf + c->symoff), c->nsyms); + + symbols.reserve(c->nsyms); + + for (const nlist_64 &sym : nList) { + StringRef name = strtab + sym.n_strx; + + // Undefined symbol + if (!sym.n_sect) { + error("TODO: Support undefined symbols"); + continue; + } + + InputSection *isec = sections[sym.n_sect - 1]; + const section_64 &objSec = objSections[sym.n_sect - 1]; + uint64_t value = sym.n_value - objSec.addr; + + // Global defined symbol + if (sym.n_type & N_EXT) { + symbols.push_back(symtab->addDefined(name, isec, value)); + continue; + } + + // Local defined symbol + symbols.push_back(make(name, isec, value)); + } + } + + // The relocations may refer to the symbols, so we parse them after we have + // the symbols loaded. + if (!sections.empty()) { + auto it = sections.begin(); + for (const section_64 &sec : objSections) { + parseRelocations(sec, (*it)->relocs); + ++it; + } + } +} + +// Returns "" or "baz.o". +std::string lld::toString(const InputFile *file) { + return file ? std::string(file->getName()) : ""; +} diff --git a/lld/MachO/InputFiles.h b/lld/MachO/InputFiles.h new file mode 100644 index 0000000..7b13b1f --- /dev/null +++ b/lld/MachO/InputFiles.h @@ -0,0 +1,70 @@ +//===- InputFiles.h ---------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_INPUT_FILES_H +#define LLD_MACHO_INPUT_FILES_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Object/Archive.h" +#include "llvm/Support/MemoryBuffer.h" +#include + +namespace lld { +namespace macho { + +class InputSection; +class Symbol; +struct Reloc; + +class InputFile { +public: + enum Kind { + ObjKind, + }; + + virtual ~InputFile() = default; + + Kind kind() const { return fileKind; } + StringRef getName() const { return mb.getBufferIdentifier(); } + + MemoryBufferRef mb; + std::vector symbols; + std::vector sections; + StringRef dylibName; + +protected: + InputFile(Kind kind, MemoryBufferRef mb) : mb(mb), fileKind(kind) {} + + std::vector parseSections(ArrayRef); + + void parseRelocations(const llvm::MachO::section_64 &, + std::vector &relocs); + +private: + const Kind fileKind; +}; + +// .o file +class ObjFile : public InputFile { +public: + explicit ObjFile(MemoryBufferRef mb); + static bool classof(const InputFile *f) { return f->kind() == ObjKind; } +}; + +extern std::vector inputFiles; + +llvm::Optional readFile(StringRef path); + +} // namespace macho + +std::string toString(const macho::InputFile *file); +} // namespace lld + +#endif diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp new file mode 100644 index 0000000..49dccf6 --- /dev/null +++ b/lld/MachO/InputSection.cpp @@ -0,0 +1,39 @@ +//===- InputSection.cpp ---------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "InputSection.h" +#include "Symbols.h" +#include "Target.h" +#include "lld/Common/Memory.h" +#include "llvm/Support/Endian.h" + +using namespace llvm::MachO; +using namespace llvm::support; +using namespace lld; +using namespace lld::macho; + +std::vector macho::inputSections; + +void InputSection::writeTo(uint8_t *buf) { + memcpy(buf, data.data(), data.size()); + + for (Reloc &r : relocs) { + uint64_t va = 0; + if (auto *s = r.target.dyn_cast()) + va = s->getVA(); + else if (auto *isec = r.target.dyn_cast()) + va = isec->addr; + else + llvm_unreachable("Unknown relocation target"); + + uint64_t val = va + r.addend; + if (1) // TODO: handle non-pcrel relocations + val -= addr - ImageBase + r.offset; + target->relocateOne(buf + r.offset, r.type, val); + } +} diff --git a/lld/MachO/InputSection.h b/lld/MachO/InputSection.h new file mode 100644 index 0000000..05da497 --- /dev/null +++ b/lld/MachO/InputSection.h @@ -0,0 +1,52 @@ +//===- InputSection.h -------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_INPUT_SECTION_H +#define LLD_MACHO_INPUT_SECTION_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/PointerUnion.h" +#include "llvm/BinaryFormat/MachO.h" + +namespace lld { +namespace macho { + +class InputFile; +class InputSection; +class Symbol; + +struct Reloc { + uint8_t type; + uint32_t addend; + uint32_t offset; + llvm::PointerUnion target; +}; + +class InputSection { +public: + void writeTo(uint8_t *buf); + + InputFile *file = nullptr; + StringRef name; + StringRef segname; + + ArrayRef data; + uint64_t addr = 0; + uint32_t align = 1; + uint32_t flags = 0; + + std::vector relocs; +}; + +extern std::vector inputSections; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/Options.td b/lld/MachO/Options.td new file mode 100644 index 0000000..2d9713c --- /dev/null +++ b/lld/MachO/Options.td @@ -0,0 +1,18 @@ +include "llvm/Option/OptParser.td" + +def arch: Separate<["-"], "arch">, MetaVarName<"">, + HelpText<"Architecture to link">; + +def e: Separate<["-"], "e">, HelpText<"Name of entry point symbol">; + +def o: Separate<["-"], "o">, MetaVarName<"">, + HelpText<"Path to file to write output">; + +def v: Flag<["-"], "v">, HelpText<"Display the version number and exit">; + +// Ignored options +def: Flag<["-"], "demangle">; +def: Flag<["-"], "dynamic">; +def: Flag<["-"], "no_deduplicate">; +def: Separate<["-"], "lto_library">; +def: Separate<["-"], "macosx_version_min">; diff --git a/lld/MachO/OutputSegment.cpp b/lld/MachO/OutputSegment.cpp new file mode 100644 index 0000000..75f5c20 --- /dev/null +++ b/lld/MachO/OutputSegment.cpp @@ -0,0 +1,30 @@ +//===- OutputSegment.cpp --------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "OutputSegment.h" +#include "lld/Common/Memory.h" + +using namespace llvm; +using namespace lld; +using namespace lld::macho; + +std::vector macho::outputSegments; + +OutputSegment *macho::getOrCreateOutputSegment(StringRef name, uint32_t perms) { + for (OutputSegment *os : outputSegments) + if (os->name == name) + // TODO: assert that os->perms == perms, once we figure out what to do + // about default-created segments. + return os; + + auto *os = make(); + os->name = name; + os->perms = perms; + outputSegments.push_back(os); + return os; +} diff --git a/lld/MachO/OutputSegment.h b/lld/MachO/OutputSegment.h new file mode 100644 index 0000000..108c180 --- /dev/null +++ b/lld/MachO/OutputSegment.h @@ -0,0 +1,34 @@ +//===- OutputSegment.h ------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_OUTPUT_SEGMENT_H +#define LLD_MACHO_OUTPUT_SEGMENT_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/MapVector.h" + +namespace lld { +namespace macho { + +class InputSection; + +class OutputSegment { +public: + StringRef name; + uint32_t perms; + llvm::MapVector> sections; +}; + +extern std::vector outputSegments; + +OutputSegment *getOrCreateOutputSegment(StringRef name, uint32_t perms); + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp new file mode 100644 index 0000000..ce54416 --- /dev/null +++ b/lld/MachO/SymbolTable.cpp @@ -0,0 +1,62 @@ +//===- SymbolTable.cpp ----------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "SymbolTable.h" +#include "InputFiles.h" +#include "Symbols.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" + +using namespace llvm; +using namespace lld; +using namespace lld::macho; + +Symbol *SymbolTable::find(StringRef name) { + auto it = symMap.find(llvm::CachedHashStringRef(name)); + if (it == symMap.end()) + return nullptr; + return symVector[it->second]; +} + +std::pair SymbolTable::insert(StringRef name) { + auto p = symMap.insert({CachedHashStringRef(name), (int)symVector.size()}); + + // Name already present in the symbol table. + if (!p.second) + return {symVector[p.first->second], false}; + + // Name is a new symbol. + Symbol *sym = reinterpret_cast(make()); + symVector.push_back(sym); + return {sym, true}; +} + +Symbol *SymbolTable::addDefined(StringRef name, InputSection *isec, + uint32_t value) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name); + + if (!wasInserted && isa(s)) + error("duplicate symbol: " + name); + + replaceSymbol(s, name, isec, value); + return s; +} + +Symbol *SymbolTable::addUndefined(StringRef name) { + Symbol *s; + bool wasInserted; + std::tie(s, wasInserted) = insert(name); + + if (wasInserted) + replaceSymbol(s, name); + return s; +} + +SymbolTable *macho::symtab; diff --git a/lld/MachO/SymbolTable.h b/lld/MachO/SymbolTable.h new file mode 100644 index 0000000..2027d36 --- /dev/null +++ b/lld/MachO/SymbolTable.h @@ -0,0 +1,44 @@ +//===- SymbolTable.h --------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_SYMBOL_TABLE_H +#define LLD_MACHO_SYMBOL_TABLE_H + +#include "lld/Common/LLVM.h" +#include "llvm/ADT/CachedHashString.h" +#include "llvm/Object/Archive.h" + +namespace lld { +namespace macho { + +class InputFile; +class InputSection; +class ArchiveFile; +class Symbol; + +class SymbolTable { +public: + Symbol *addDefined(StringRef name, InputSection *isec, uint32_t value); + + Symbol *addUndefined(StringRef name); + + ArrayRef getSymbols() const { return symVector; } + Symbol *find(StringRef name); + +private: + std::pair insert(StringRef name); + llvm::DenseMap symMap; + std::vector symVector; +}; + +extern SymbolTable *symtab; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/Symbols.cpp b/lld/MachO/Symbols.cpp new file mode 100644 index 0000000..ec383ec --- /dev/null +++ b/lld/MachO/Symbols.cpp @@ -0,0 +1,23 @@ +//===- Symbols.cpp --------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Symbols.h" +#include "InputFiles.h" +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Strings.h" + +using namespace llvm; +using namespace lld; +using namespace lld::macho; + +// Returns a symbol for an error message. +std::string lld::toString(const Symbol &sym) { + if (Optional s = demangleItanium(sym.getName())) + return *s; + return std::string(sym.getName()); +} diff --git a/lld/MachO/Symbols.h b/lld/MachO/Symbols.h new file mode 100644 index 0000000..f754ec8 --- /dev/null +++ b/lld/MachO/Symbols.h @@ -0,0 +1,100 @@ +//===- Symbols.h ------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_SYMBOLS_H +#define LLD_MACHO_SYMBOLS_H + +#include "InputSection.h" +#include "Target.h" +#include "lld/Common/Strings.h" +#include "llvm/Object/Archive.h" + +namespace lld { +namespace macho { + +class InputSection; +class InputFile; +class ArchiveFile; + +struct StringRefZ { + StringRefZ(const char *s) : data(s), size(-1) {} + StringRefZ(StringRef s) : data(s.data()), size(s.size()) {} + + const char *data; + const uint32_t size; +}; + +class Symbol { +public: + enum Kind { + DefinedKind, + UndefinedKind, + }; + + Kind kind() const { return static_cast(symbolKind); } + + StringRef getName() const { return {name.data, name.size}; } + + uint64_t getVA() const; + + InputFile *file; + +protected: + Symbol(Kind k, InputFile *file, StringRefZ name) + : file(file), symbolKind(k), name(name) {} + + Kind symbolKind; + StringRefZ name; +}; + +class Defined : public Symbol { +public: + Defined(StringRefZ name, InputSection *isec, uint32_t value) + : Symbol(DefinedKind, nullptr, name), isec(isec), value(value) {} + + InputSection *isec; + uint32_t value; + + static bool classof(const Symbol *s) { return s->kind() == DefinedKind; } +}; + +class Undefined : public Symbol { +public: + Undefined(StringRefZ name) : Symbol(UndefinedKind, nullptr, name) {} + + static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; } +}; + +inline uint64_t Symbol::getVA() const { + if (auto *d = dyn_cast(this)) + return d->isec->addr + d->value - ImageBase; + return 0; +} + +union SymbolUnion { + alignas(Defined) char a[sizeof(Defined)]; + alignas(Undefined) char b[sizeof(Undefined)]; +}; + +template +void replaceSymbol(Symbol *s, ArgT &&... arg) { + static_assert(sizeof(T) <= sizeof(SymbolUnion), "SymbolUnion too small"); + static_assert(alignof(T) <= alignof(SymbolUnion), + "SymbolUnion not aligned enough"); + assert(static_cast(static_cast(nullptr)) == nullptr && + "Not a Symbol"); + + new (s) T(std::forward(arg)...); +} + +} // namespace macho + +std::string toString(const macho::Symbol &); +} // namespace lld + +#endif diff --git a/lld/MachO/Target.cpp b/lld/MachO/Target.cpp new file mode 100644 index 0000000..0f70776 --- /dev/null +++ b/lld/MachO/Target.cpp @@ -0,0 +1,14 @@ +//===- Target.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Target.h" + +using namespace lld; +using namespace lld::macho; + +TargetInfo *macho::target = nullptr; diff --git a/lld/MachO/Target.h b/lld/MachO/Target.h new file mode 100644 index 0000000..9ebc56c --- /dev/null +++ b/lld/MachO/Target.h @@ -0,0 +1,41 @@ +//===- Target.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_TARGET_H +#define LLD_MACHO_TARGET_H + +#include + +namespace lld { +namespace macho { + +enum { + PageSize = 4096, + ImageBase = 4096, + MaxAlignmentPowerOf2 = 32, +}; + +class TargetInfo { +public: + virtual ~TargetInfo() = default; + virtual uint64_t getImplicitAddend(const uint8_t *loc, + uint8_t type) const = 0; + virtual void relocateOne(uint8_t *loc, uint8_t type, uint64_t val) const = 0; + + uint32_t cpuType; + uint32_t cpuSubtype; +}; + +TargetInfo *createX86_64TargetInfo(); + +extern TargetInfo *target; + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp new file mode 100644 index 0000000..ce0c30d --- /dev/null +++ b/lld/MachO/Writer.cpp @@ -0,0 +1,359 @@ +//===- Writer.cpp ---------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "Writer.h" +#include "Config.h" +#include "InputFiles.h" +#include "InputSection.h" +#include "OutputSegment.h" +#include "SymbolTable.h" +#include "Symbols.h" +#include "Target.h" + +#include "lld/Common/ErrorHandler.h" +#include "lld/Common/Memory.h" +#include "llvm/BinaryFormat/MachO.h" +#include "llvm/Support/LEB128.h" +#include "llvm/Support/MathExtras.h" + +using namespace llvm; +using namespace llvm::MachO; +using namespace llvm::support; +using namespace lld; +using namespace lld::macho; + +namespace { +class LCLinkEdit; +class LCDyldInfo; +class LCSymtab; + +class LoadCommand { +public: + virtual ~LoadCommand() = default; + virtual uint32_t getSize() const = 0; + virtual void writeTo(uint8_t *buf) const = 0; +}; + +class Writer { +public: + Writer() : buffer(errorHandler().outputBuffer) {} + + void createLoadCommands(); + void assignAddresses(); + + void openFile(); + void writeHeader(); + void writeSections(); + + void run(); + + std::vector loadCommands; + std::unique_ptr &buffer; + uint64_t fileSize = 0; + uint64_t sizeofCmds = 0; + LCLinkEdit *linkEditSeg = nullptr; + LCDyldInfo *dyldInfoSeg = nullptr; + LCSymtab *symtabSeg = nullptr; +}; + +class LCPagezero : public LoadCommand { +public: + uint32_t getSize() const override { return sizeof(segment_command_64); } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + c->cmd = LC_SEGMENT_64; + c->cmdsize = getSize(); + strcpy(c->segname, "__PAGEZERO"); + c->vmsize = PageSize; + } +}; + +class LCLinkEdit : public LoadCommand { +public: + uint32_t getSize() const override { return sizeof(segment_command_64); } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + c->cmd = LC_SEGMENT_64; + c->cmdsize = getSize(); + strcpy(c->segname, "__LINKEDIT"); + c->fileoff = fileOff; + c->filesize = contents.size(); + c->maxprot = VM_PROT_READ | VM_PROT_WRITE; + c->initprot = VM_PROT_READ; + } + + uint64_t getOffset() const { return fileOff + contents.size(); } + + uint64_t fileOff = 0; + SmallVector contents; +}; + +class LCDyldInfo : public LoadCommand { +public: + uint32_t getSize() const override { return sizeof(dyld_info_command); } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + c->cmd = LC_DYLD_INFO_ONLY; + c->cmdsize = getSize(); + c->export_off = exportOff; + c->export_size = exportSize; + } + + uint64_t exportOff = 0; + uint64_t exportSize = 0; +}; + +class LCDysymtab : public LoadCommand { +public: + uint32_t getSize() const override { return sizeof(dysymtab_command); } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + c->cmd = LC_DYSYMTAB; + c->cmdsize = getSize(); + } +}; + +class LCSegment : public LoadCommand { +public: + LCSegment(StringRef name, OutputSegment *seg) : name(name), seg(seg) {} + + uint32_t getSize() const override { + return sizeof(segment_command_64) + + seg->sections.size() * sizeof(section_64); + } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + buf += sizeof(segment_command_64); + + c->cmd = LC_SEGMENT_64; + c->cmdsize = getSize(); + memcpy(c->segname, name.data(), name.size()); + + InputSection *firstSec = seg->sections.front().second[0]; + InputSection *lastSec = seg->sections.back().second.back(); + + // dyld3's MachOLoaded::getSlide() assumes that the __TEXT segment starts + // from the beginning of the file (i.e. the header). + // TODO: replace this logic by creating a synthetic __TEXT,__mach_header + // section instead. + c->fileoff = name == "__TEXT" ? 0 : firstSec->addr - ImageBase; + c->vmaddr = c->fileoff + ImageBase; + c->vmsize = c->filesize = lastSec->addr + lastSec->data.size() - c->vmaddr; + c->maxprot = VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE; + c->initprot = seg->perms; + c->nsects = seg->sections.size(); + + for (auto &p : seg->sections) { + StringRef s = p.first; + std::vector §ions = p.second; + + auto *sectHdr = reinterpret_cast(buf); + buf += sizeof(section_64); + + memcpy(sectHdr->sectname, s.data(), s.size()); + memcpy(sectHdr->segname, name.data(), name.size()); + + sectHdr->addr = sections[0]->addr; + sectHdr->offset = sections[0]->addr - ImageBase; + sectHdr->align = sections[0]->align; + uint32_t maxAlign = 0; + for (const InputSection *section : sections) + maxAlign = std::max(maxAlign, section->align); + sectHdr->align = Log2_32(maxAlign); + sectHdr->flags = sections[0]->flags; + sectHdr->size = sections.back()->addr + sections.back()->data.size() - + sections[0]->addr; + } + } + +private: + StringRef name; + OutputSegment *seg; +}; + +class LCMain : public LoadCommand { + uint32_t getSize() const override { return sizeof(entry_point_command); } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + c->cmd = LC_MAIN; + c->cmdsize = getSize(); + c->entryoff = config->entry->getVA(); + c->stacksize = 0; + } +}; + +class LCSymtab : public LoadCommand { +public: + uint32_t getSize() const override { return sizeof(symtab_command); } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + c->cmd = LC_SYMTAB; + c->cmdsize = getSize(); + } +}; + +class LCLoadDylib : public LoadCommand { +public: + LCLoadDylib(StringRef path) : path(path) {} + + uint32_t getSize() const override { + return alignTo(sizeof(dylib_command) + path.size() + 1, 8); + } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + buf += sizeof(dylib_command); + + c->cmd = LC_LOAD_DYLIB; + c->cmdsize = getSize(); + c->dylib.name = sizeof(dylib_command); + + memcpy(buf, path.data(), path.size()); + buf[path.size()] = '\0'; + } + +private: + StringRef path; +}; + +class LCLoadDylinker : public LoadCommand { +public: + uint32_t getSize() const override { + return alignTo(sizeof(dylinker_command) + path.size() + 1, 8); + } + + void writeTo(uint8_t *buf) const override { + auto *c = reinterpret_cast(buf); + buf += sizeof(dylinker_command); + + c->cmd = LC_LOAD_DYLINKER; + c->cmdsize = getSize(); + c->name = sizeof(dylinker_command); + + memcpy(buf, path.data(), path.size()); + buf[path.size()] = '\0'; + } + +private: + // Recent versions of Darwin won't run any binary that has dyld at a + // different location. + const StringRef path = "/usr/lib/dyld"; +}; +} // namespace + +void Writer::createLoadCommands() { + linkEditSeg = make(); + dyldInfoSeg = make(); + symtabSeg = make(); + + loadCommands.push_back(linkEditSeg); + loadCommands.push_back(dyldInfoSeg); + loadCommands.push_back(symtabSeg); + loadCommands.push_back(make()); + loadCommands.push_back(make()); + loadCommands.push_back(make()); + loadCommands.push_back(make()); + // TODO: dyld requires libSystem to be loaded. libSystem is a universal + // binary and we don't have support for that yet, so mock it out here. + loadCommands.push_back(make("/usr/lib/libSystem.B.dylib")); + + for (OutputSegment *seg : outputSegments) + if (!seg->sections.empty()) + loadCommands.push_back(make(seg->name, seg)); +} + +void Writer::assignAddresses() { + uint64_t addr = ImageBase + sizeof(mach_header_64); + + uint64_t size = 0; + for (LoadCommand *lc : loadCommands) + size += lc->getSize(); + sizeofCmds = size; + addr += size; + + for (OutputSegment *seg : outputSegments) { + addr = alignTo(addr, PageSize); + + for (auto &p : seg->sections) { + ArrayRef sections = p.second; + for (InputSection *isec : sections) { + addr = alignTo(addr, isec->align); + isec->addr = addr; + addr += isec->data.size(); + } + } + } + + linkEditSeg->fileOff = addr - ImageBase; +} + +void Writer::openFile() { + Expected> bufferOrErr = + FileOutputBuffer::create(config->outputFile, fileSize, + FileOutputBuffer::F_executable); + + if (!bufferOrErr) + error("failed to open " + config->outputFile + ": " + + llvm::toString(bufferOrErr.takeError())); + else + buffer = std::move(*bufferOrErr); +} + +void Writer::writeHeader() { + auto *hdr = reinterpret_cast(buffer->getBufferStart()); + hdr->magic = MH_MAGIC_64; + hdr->cputype = CPU_TYPE_X86_64; + hdr->cpusubtype = CPU_SUBTYPE_X86_64_ALL | CPU_SUBTYPE_LIB64; + hdr->filetype = MH_EXECUTE; + hdr->ncmds = loadCommands.size(); + hdr->sizeofcmds = sizeofCmds; + hdr->flags = MH_NOUNDEFS | MH_DYLDLINK | MH_TWOLEVEL; + + uint8_t *p = reinterpret_cast(hdr + 1); + for (LoadCommand *lc : loadCommands) { + lc->writeTo(p); + p += lc->getSize(); + } +} + +void Writer::writeSections() { + uint8_t *buf = buffer->getBufferStart(); + + for (OutputSegment *seg : outputSegments) + for (auto § : seg->sections) + for (InputSection *isec : sect.second) + isec->writeTo(buf + isec->addr - ImageBase); + + memcpy(buf + linkEditSeg->fileOff, linkEditSeg->contents.data(), + linkEditSeg->contents.size()); +} + +void Writer::run() { + createLoadCommands(); + assignAddresses(); + fileSize = linkEditSeg->fileOff + linkEditSeg->contents.size(); + + openFile(); + if (errorCount()) + return; + + writeHeader(); + writeSections(); + + if (auto e = buffer->commit()) + error("failed to write to the output file: " + toString(std::move(e))); +} + +void macho::writeResult() { Writer().run(); } diff --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h new file mode 100644 index 0000000..a0b90631 --- /dev/null +++ b/lld/MachO/Writer.h @@ -0,0 +1,20 @@ +//===- Writer.h -------------------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLD_MACHO_WRITER_H +#define LLD_MACHO_WRITER_H + +namespace lld { +namespace macho { + +void writeResult(); + +} // namespace macho +} // namespace lld + +#endif diff --git a/lld/include/lld/Common/Driver.h b/lld/include/lld/Common/Driver.h index 0a358d8..6db3d23 100644 --- a/lld/include/lld/Common/Driver.h +++ b/lld/include/lld/Common/Driver.h @@ -33,6 +33,11 @@ bool link(llvm::ArrayRef args, bool canExitEarly, llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS); } +namespace macho { +bool link(llvm::ArrayRef args, bool canExitEarly, + llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS); +} + namespace wasm { bool link(llvm::ArrayRef args, bool canExitEarly, llvm::raw_ostream &stdoutOS, llvm::raw_ostream &stderrOS); diff --git a/lld/test/MachO/alignment-too-large.yaml b/lld/test/MachO/alignment-too-large.yaml new file mode 100644 index 0000000..18d133f --- /dev/null +++ b/lld/test/MachO/alignment-too-large.yaml @@ -0,0 +1,58 @@ +# RUN: yaml2obj %s -o %t.o +# RUN: not lld -flavor darwinnew -o %t %t.o 2>&1 | FileCheck %s +# +# CHECK: alignment 32 of section __text is too large +--- !mach-o +FileHeader: + magic: 0xFEEDFACF + cputype: 0x01000007 + cpusubtype: 0x00000003 + filetype: 0x00000001 + ncmds: 4 + sizeofcmds: 280 + flags: 0x00000000 + reserved: 0x00000000 +LoadCommands: + - cmd: LC_SEGMENT_64 + cmdsize: 152 + segname: '' + vmaddr: 0 + vmsize: 8 + fileoff: 312 + filesize: 8 + maxprot: 7 + initprot: 7 + nsects: 1 + flags: 0 + Sections: + - sectname: __text + segname: __TEXT + addr: 0x0000000000000000 + size: 8 + offset: 0x00000138 + align: 32 + reloff: 0x00000000 + nreloc: 0 + flags: 0x80000400 + reserved1: 0x00000000 + reserved2: 0x00000000 + reserved3: 0x00000000 + content: 48C7C000000000C3 + - cmd: LC_SYMTAB + cmdsize: 24 + symoff: 320 + nsyms: 1 + stroff: 336 + strsize: 8 +LinkEditData: + NameList: + - n_strx: 1 + n_type: 0x0F + n_sect: 1 + n_desc: 0 + n_value: 0 + StringTable: + - '' + - _main + - '' +... diff --git a/lld/test/MachO/arch.s b/lld/test/MachO/arch.s new file mode 100644 index 0000000..3136549 --- /dev/null +++ b/lld/test/MachO/arch.s @@ -0,0 +1,11 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -arch x86_64 -o /dev/null %t.o +# RUN: not lld -flavor darwinnew -arch i386 -o /dev/null %t.o 2>&1 | FileCheck %s +# CHECK: missing or unsupported -arch i386 + +.text +.global _main +_main: + mov $0, %rax + ret diff --git a/lld/test/MachO/duplicate-symbol.s b/lld/test/MachO/duplicate-symbol.s new file mode 100644 index 0000000..47f597f --- /dev/null +++ b/lld/test/MachO/duplicate-symbol.s @@ -0,0 +1,12 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t-dup.o +# RUN: not lld -flavor darwinnew -o /dev/null %t-dup.o %t.o 2>&1 | FileCheck %s + +# CHECK: duplicate symbol: _main + +.text +.global _main +_main: + mov $0, %rax + ret diff --git a/lld/test/MachO/entry-symbol.s b/lld/test/MachO/entry-symbol.s new file mode 100644 index 0000000..1890f40 --- /dev/null +++ b/lld/test/MachO/entry-symbol.s @@ -0,0 +1,13 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o /dev/null %t.o -e _not_main +# RUN: not lld -flavor darwinnew -o /dev/null %t.o -e _missing 2>&1 | FileCheck %s +# RUN: not lld -flavor darwinnew -o /dev/null %t.o 2>&1 | FileCheck %s --check-prefix=DEFAULT_ENTRY + +# CHECK: undefined symbol: _missing +# DEFAULT_ENTRY: undefined symbol: _main + +.text +.global _not_main +_not_main: + ret diff --git a/lld/test/MachO/invalid-executable.s b/lld/test/MachO/invalid-executable.s new file mode 100644 index 0000000..9ad9f51 --- /dev/null +++ b/lld/test/MachO/invalid-executable.s @@ -0,0 +1,11 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-unknown-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: not lld -flavor darwinnew -o /dev/null %t 2>&1 | FileCheck %s +# CHECK: unhandled file type + +.text +.global _main +_main: + mov $0, %rax + ret diff --git a/lld/test/MachO/load-commands.s b/lld/test/MachO/load-commands.s new file mode 100644 index 0000000..298cb76 --- /dev/null +++ b/lld/test/MachO/load-commands.s @@ -0,0 +1,17 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: obj2yaml %t | FileCheck %s + +# Check for the presence of a couple of load commands that are essential for +# a working binary. + +# CHECK-DAG: cmd: LC_DYLD_INFO_ONLY +# CHECK-DAG: cmd: LC_SYMTAB +# CHECK-DAG: cmd: LC_DYSYMTAB + +.text +.global _main +_main: + mov $0, %rax + ret diff --git a/lld/test/MachO/no-such-file.s b/lld/test/MachO/no-such-file.s new file mode 100644 index 0000000..840b8f9 --- /dev/null +++ b/lld/test/MachO/no-such-file.s @@ -0,0 +1,4 @@ +# REQUIRES: x86 +# RUN: not lld -flavor darwinnew -o /dev/null %t-no-such-file.o 2>&1 | FileCheck %s + +# CHECK: cannot open {{.*}}no-such-file.o diff --git a/lld/test/MachO/relocations.s b/lld/test/MachO/relocations.s new file mode 100644 index 0000000..cf38f96 --- /dev/null +++ b/lld/test/MachO/relocations.s @@ -0,0 +1,21 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-objdump -d %t | FileCheck %s + +# CHECK: leaq 17(%rip), %rsi + +.section __TEXT,__text +.globl _main +_main: + movl $0x2000004, %eax # write() syscall + mov $1, %rdi # stdout + leaq str(%rip), %rsi + mov $13, %rdx # length of str + syscall + mov $0, %rax + ret + +.section __TEXT,__cstring +str: + .asciz "Hello world!\n" diff --git a/lld/test/MachO/section-headers.s b/lld/test/MachO/section-headers.s new file mode 100644 index 0000000..9fafc5a --- /dev/null +++ b/lld/test/MachO/section-headers.s @@ -0,0 +1,46 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj --section-headers %t | FileCheck %s + +# CHECK: Name: __text +# CHECK-NEXT: Segment: __TEXT +# CHECK-NOT: } +# CHECK: Alignment: 1 +# CHECK-NOT: } +# CHECK: Type: Regular (0x0) +# CHECK-NEXT: Attributes [ (0x800004) +# CHECK-NEXT: PureInstructions (0x800000) +# CHECK-NEXT: SomeInstructions (0x4) +# CHECK-NEXT: ] + +# CHECK: Name: __cstring +# CHECK-NEXT: Segment: __TEXT +# CHECK-NOT: } +# CHECK: Alignment: 2 +# CHECK-NOT: } +# CHECK: Type: CStringLiterals (0x2) +# CHECK-NEXT: Attributes [ (0x0) +# CHECK-NEXT: ] + +# CHECK: Name: maxlen_16ch_name +# CHECK-NEXT: Segment: __TEXT +# CHECK-NOT: } +# CHECK: Alignment: 3 +# CHECK-NOT: } +# CHECK: Type: Regular (0x0) + +.text +.align 1 +.global _main +_main: + mov $0, %rax + ret + +.section __TEXT,__cstring +.align 2 +str: + .asciz "Hello world!\n" + +.section __TEXT,maxlen_16ch_name +.align 3 diff --git a/lld/test/MachO/segments.s b/lld/test/MachO/segments.s new file mode 100644 index 0000000..69e3d9f --- /dev/null +++ b/lld/test/MachO/segments.s @@ -0,0 +1,20 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj --macho-segment %t | FileCheck %s + +# These segments must always be present. +# CHECK-DAG: Name: __PAGEZERO +# CHECK-DAG: Name: __LINKEDIT +# CHECK-DAG: Name: __TEXT + +# Check that we handle max-length names correctly. +# CHECK-DAG: Name: maxlen_16ch_name + +.text +.global _main +_main: + mov $0, %rax + ret + +.section maxlen_16ch_name,foo diff --git a/lld/test/MachO/silent-ignore.test b/lld/test/MachO/silent-ignore.test new file mode 100644 index 0000000..e863aac --- /dev/null +++ b/lld/test/MachO/silent-ignore.test @@ -0,0 +1,8 @@ +RUN: lld -flavor darwinnew -v \ +RUN: -demangle \ +RUN: -dynamic \ +RUN: -no_deduplicate \ +RUN: -lto_library /lib/foo \ +RUN: -macosx_version_min 0 +RUN: not lld -flavor darwinnew -v --not-an-ignored-argument 2>&1 | FileCheck %s +CHECK: unknown argument: --not-an-ignored-argument diff --git a/lld/test/MachO/text-segment.s b/lld/test/MachO/text-segment.s new file mode 100644 index 0000000..a3c7edb --- /dev/null +++ b/lld/test/MachO/text-segment.s @@ -0,0 +1,15 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t.o +# RUN: lld -flavor darwinnew -o %t %t.o +# RUN: llvm-readobj --macho-segment %t | FileCheck %s + +# CHECK: Name: __TEXT +# CHECK-NOT: } +# dyld3 assumes that the __TEXT segment starts from the file header +# CHECK: fileoff: 0 + +.text +.global _main +_main: + mov $0, %rax + ret diff --git a/lld/tools/lld/CMakeLists.txt b/lld/tools/lld/CMakeLists.txt index a37c2c7..2cdd9b8 100644 --- a/lld/tools/lld/CMakeLists.txt +++ b/lld/tools/lld/CMakeLists.txt @@ -16,6 +16,7 @@ target_link_libraries(lld lldCOFF lldDriver lldELF + lldMachO2 lldMinGW lldWasm ) diff --git a/lld/tools/lld/lld.cpp b/lld/tools/lld/lld.cpp index 646fc3d..58a291d 100644 --- a/lld/tools/lld/lld.cpp +++ b/lld/tools/lld/lld.cpp @@ -45,10 +45,11 @@ using namespace llvm::sys; enum Flavor { Invalid, - Gnu, // -flavor gnu - WinLink, // -flavor link - Darwin, // -flavor darwin - Wasm, // -flavor wasm + Gnu, // -flavor gnu + WinLink, // -flavor link + Darwin, // -flavor darwin + DarwinNew, // -flavor darwinnew + Wasm, // -flavor wasm }; LLVM_ATTRIBUTE_NORETURN static void die(const Twine &s) { @@ -62,6 +63,7 @@ static Flavor getFlavor(StringRef s) { .CasesLower("wasm", "ld-wasm", Wasm) .CaseLower("link", WinLink) .CasesLower("ld64", "ld64.lld", "darwin", Darwin) + .CaseLower("darwinnew", DarwinNew) .Default(Invalid); } @@ -157,6 +159,8 @@ int main(int argc, const char **argv) { return !coff::link(args, canExitEarly(), llvm::outs(), llvm::errs()); case Darwin: return !mach_o::link(args, canExitEarly(), llvm::outs(), llvm::errs()); + case DarwinNew: + return !macho::link(args, canExitEarly(), llvm::outs(), llvm::errs()); case Wasm: return !wasm::link(args, canExitEarly(), llvm::outs(), llvm::errs()); default: -- 2.7.4