#ifndef LLD_MACHO_CONFIG_H
#define LLD_MACHO_CONFIG_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
namespace macho {
class Symbol;
+struct SymbolPriorityEntry;
struct Configuration {
Symbol *entry;
llvm::StringRef outputFile;
llvm::MachO::HeaderFileType outputType;
std::vector<llvm::StringRef> searchPaths;
+ llvm::DenseMap<llvm::StringRef, SymbolPriorityEntry> priorities;
+};
+
+// The symbol with the highest priority should be ordered first in the output
+// section (modulo input section contiguity constraints). Using priority
+// (highest first) instead of order (lowest first) has the convenient property
+// that the default-constructed zero priority -- for symbols/sections without a
+// user-defined order -- naturally ends up putting them at the end of the
+// output.
+struct SymbolPriorityEntry {
+ // The priority given to a matching symbol, regardless of which object file
+ // it originated from.
+ size_t anyObjectFile = 0;
+ // The priority given to a matching symbol from a particular object file.
+ llvm::DenseMap<llvm::StringRef, size_t> objectFiles;
};
extern Configuration *config;
#include "lld/Common/LLVM.h"
#include "lld/Common/Memory.h"
#include "lld/Common/Version.h"
+#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/BinaryFormat/MachO.h"
}
}
+static std::array<StringRef, 6> archNames{"arm", "arm64", "i386",
+ "x86_64", "ppc", "ppc64"};
+static bool isArchString(StringRef s) {
+ static DenseSet<StringRef> archNamesSet(archNames.begin(), archNames.end());
+ return archNamesSet.find(s) != archNamesSet.end();
+}
+
+// An order file has one entry per line, in the following format:
+//
+// <arch>:<object file>:<symbol name>
+//
+// <arch> and <object file> are optional. If not specified, then that entry
+// matches any symbol of that name.
+//
+// If a symbol is matched by multiple entries, then it takes the lowest-ordered
+// entry (the one nearest to the front of the list.)
+//
+// The file can also have line comments that start with '#'.
+void parseOrderFile(StringRef path) {
+ Optional<MemoryBufferRef> buffer = readFile(path);
+ if (!buffer) {
+ error("Could not read order file at " + path);
+ return;
+ }
+
+ MemoryBufferRef mbref = *buffer;
+ size_t priority = std::numeric_limits<size_t>::max();
+ for (StringRef rest : args::getLines(mbref)) {
+ StringRef arch, objectFile, symbol;
+
+ std::array<StringRef, 3> fields;
+ uint8_t fieldCount = 0;
+ while (rest != "" && fieldCount < 3) {
+ std::pair<StringRef, StringRef> p = getToken(rest, ": \t\n\v\f\r");
+ StringRef tok = p.first;
+ rest = p.second;
+
+ // Check if we have a comment
+ if (tok == "" || tok[0] == '#')
+ break;
+
+ fields[fieldCount++] = tok;
+ }
+
+ switch (fieldCount) {
+ case 3:
+ arch = fields[0];
+ objectFile = fields[1];
+ symbol = fields[2];
+ break;
+ case 2:
+ (isArchString(fields[0]) ? arch : objectFile) = fields[0];
+ symbol = fields[1];
+ break;
+ case 1:
+ symbol = fields[0];
+ break;
+ case 0:
+ break;
+ default:
+ llvm_unreachable("too many fields in order file");
+ }
+
+ if (!arch.empty()) {
+ if (!isArchString(arch)) {
+ error("invalid arch \"" + arch + "\" in order file: expected one of " +
+ llvm::join(archNames, ", "));
+ continue;
+ }
+
+ // TODO: Update when we extend support for other archs
+ if (arch != "x86_64")
+ continue;
+ }
+
+ if (!objectFile.empty() && !objectFile.endswith(".o")) {
+ error("invalid object file name \"" + objectFile +
+ "\" in order file: should end with .o");
+ continue;
+ }
+
+ if (!symbol.empty()) {
+ SymbolPriorityEntry &entry = config->priorities[symbol];
+ if (!objectFile.empty())
+ entry.objectFiles.insert(std::make_pair(objectFile, priority));
+ else
+ entry.anyObjectFile = std::max(entry.anyObjectFile, priority);
+ }
+
+ --priority;
+ }
+}
+
// We expect sub-library names of the form "libfoo", which will match a dylib
// with a path of .*/libfoo.dylib.
static bool markSubLibrary(StringRef searchName) {
error("-sub_library " + searchName + " does not match a supplied dylib");
}
+ StringRef orderFile = args.getLastArgValue(OPT_order_file);
+ if (!orderFile.empty())
+ parseOrderFile(orderFile);
+
// dyld requires us to load libSystem. Since we may run tests on non-OSX
// systems which do not have libSystem, we mock it out here.
// TODO: Replace this with a stub tbd file once we have TAPI support.
": could not get the buffer for the member defining symbol " +
sym.getName());
auto file = make<ObjFile>(mb);
+ symbols.insert(symbols.end(), file->symbols.begin(), file->symbols.end());
sections.insert(sections.end(), file->sections.begin(), file->sections.end());
}
// in the final binary.
class MergedOutputSection : public OutputSection {
public:
- MergedOutputSection(StringRef name) : OutputSection(name) {}
+ MergedOutputSection(StringRef name) : OutputSection(MergedKind, name) {}
const InputSection *firstSection() const { return inputs.front(); }
const InputSection *lastSection() const { return inputs.back(); }
std::vector<InputSection *> inputs;
+ static bool classof(const OutputSection *sec) {
+ return sec->kind() == MergedKind;
+ }
+
private:
void mergeFlags(uint32_t inputFlags);
def o: Separate<["-"], "o">, MetaVarName<"<path>">,
HelpText<"Path to file to write output">;
+def order_file: Separate<["-"], "order_file">,
+ HelpText<"Lay out symbols within each section in the order specified by the "
+ "order file">;
+
def sub_library: Separate<["-"], "sub_library">, MetaVarName<"<libname>">,
HelpText<"Re-export the specified dylib">;
// linker with the same segment / section name.
class OutputSection {
public:
- OutputSection(StringRef name) : name(name) {}
+ enum Kind {
+ MergedKind,
+ SyntheticKind,
+ };
+
+ OutputSection(Kind kind, StringRef name) : name(name), sectionKind(kind) {}
virtual ~OutputSection() = default;
+ Kind kind() const { return sectionKind; }
// These accessors will only be valid after finalizing the section.
uint64_t getSegmentOffset() const;
uint64_t fileOff = 0;
uint32_t align = 1;
uint32_t flags = 0;
+
+private:
+ Kind sectionKind;
};
class OutputSectionComparator {
outputSegments.push_back(segRef);
return segRef;
}
-
-void macho::sortOutputSegmentsAndSections() {
- // Sorting only can happen once all outputs have been collected.
- // Since output sections are grouped by segment, sorting happens
- // first over all segments, then over sections per segment.
- auto comparator = OutputSegmentComparator();
- llvm::stable_sort(outputSegments, comparator);
-
- // Now that the output sections are sorted, assign the final
- // output section indices.
- uint32_t sectionIndex = 0;
- for (OutputSegment *seg : outputSegments) {
- seg->sortOutputSections(&comparator);
- for (auto &p : seg->getSections()) {
- OutputSection *section = p.second;
- if (!section->isHidden())
- section->index = ++sectionIndex;
- }
- }
-}
OutputSegment *getOutputSegment(StringRef name);
OutputSegment *getOrCreateOutputSegment(StringRef name);
-void sortOutputSegmentsAndSections();
} // namespace macho
} // namespace lld
namespace macho {
SyntheticSection::SyntheticSection(const char *segname, const char *name)
- : OutputSection(name) {
+ : OutputSection(SyntheticKind, name) {
// Synthetic sections always know which segment they belong to so hook
// them up when they're made
getOrCreateOutputSegment(segname)->addOutputSection(this);
public:
SyntheticSection(const char *segname, const char *name);
virtual ~SyntheticSection() = default;
+
+ static bool classof(const OutputSection *sec) {
+ return sec->kind() == SyntheticKind;
+ }
};
// The header of the Mach-O file, which must have a file offset of zero.
#include "Config.h"
#include "InputFiles.h"
#include "InputSection.h"
+#include "MergedOutputSection.h"
+#include "OutputSection.h"
#include "OutputSegment.h"
#include "SymbolTable.h"
#include "Symbols.h"
#include "llvm/BinaryFormat/MachO.h"
#include "llvm/Support/LEB128.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/Path.h"
using namespace llvm;
using namespace llvm::MachO;
}
}
+static size_t getSymbolPriority(const SymbolPriorityEntry &entry,
+ const InputFile &file) {
+ return std::max(entry.objectFiles.lookup(sys::path::filename(file.getName())),
+ entry.anyObjectFile);
+}
+
+// Each section gets assigned the priority of the highest-priority symbol it
+// contains.
+static DenseMap<const InputSection *, size_t> buildInputSectionPriorities() {
+ DenseMap<const InputSection *, size_t> sectionPriorities;
+
+ if (config->priorities.empty())
+ return sectionPriorities;
+
+ auto addSym = [&](Defined &sym) {
+ auto it = config->priorities.find(sym.getName());
+ if (it == config->priorities.end())
+ return;
+
+ SymbolPriorityEntry &entry = it->second;
+ size_t &priority = sectionPriorities[sym.isec];
+ priority = std::max(priority, getSymbolPriority(entry, *sym.isec->file));
+ };
+
+ // TODO: Make sure this handles weak symbols correctly.
+ for (InputFile *file : inputFiles)
+ if (isa<ObjFile>(file) || isa<ArchiveFile>(file))
+ for (Symbol *sym : file->symbols)
+ if (auto *d = dyn_cast<Defined>(sym))
+ addSym(*d);
+
+ return sectionPriorities;
+}
+
+// Sorting only can happen once all outputs have been collected. Here we sort
+// segments, output sections within each segment, and input sections within each
+// output segment.
+static void sortSegmentsAndSections() {
+ auto comparator = OutputSegmentComparator();
+ llvm::stable_sort(outputSegments, comparator);
+
+ DenseMap<const InputSection *, size_t> isecPriorities =
+ buildInputSectionPriorities();
+
+ uint32_t sectionIndex = 0;
+ for (OutputSegment *seg : outputSegments) {
+ seg->sortOutputSections(&comparator);
+ for (auto &p : seg->getSections()) {
+ OutputSection *section = p.second;
+ // Now that the output sections are sorted, assign the final
+ // output section indices.
+ if (!section->isHidden())
+ section->index = ++sectionIndex;
+
+ if (!isecPriorities.empty()) {
+ if (auto *merged = dyn_cast<MergedOutputSection>(section)) {
+ llvm::stable_sort(merged->inputs,
+ [&](InputSection *a, InputSection *b) {
+ return isecPriorities[a] > isecPriorities[b];
+ });
+ }
+ }
+ }
+ }
+}
+
void Writer::createOutputSections() {
// First, create hidden sections
headerSection = make<MachHeaderSection>();
in.stubHelper->setup();
// Sort and assign sections to their respective segments. No more sections nor
- // segments may be created after this method runs.
+ // segments may be created after these methods run.
createOutputSections();
- sortOutputSegmentsAndSections();
+ sortSegmentsAndSections();
createLoadCommands();
--- /dev/null
+# REQUIRES: x86
+# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o
+# RUN: not lld -flavor darwinnew -o %t %t.o -order_file %s 2>&1 | FileCheck %s
+# CHECK: error: invalid arch "sparc" in order file: expected one of arm, arm64, i386, x86_64, ppc, ppc64
+# CHECK-EMPTY:
+
+_barsymbol
+sparc:hello.o:_foosymbol
+i386:hello.o:_foosymbol
--- /dev/null
+# REQUIRES: x86
+# RUN: echo ".globl _main; .text; _main: ret" | llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t.o
+# RUN: not lld -flavor darwinnew -o %t %t.o -order_file %s 2>&1 | FileCheck %s
+# CHECK: invalid object file name "helloo" in order file: should end with .o
+# CHECK: invalid object file name "z80" in order file: should end with .o
+# CHECK-EMPTY:
+
+_barsymbol
+x86_64:helloo:_foosymbol
+z80:_foosymbol
--- /dev/null
+# REQUIRES: x86
+# RUN: mkdir -p %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %s -o %t/test.o
+# RUN: echo ".globl _foo; .text; _foo: _bar: ret" | \
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin -o %t/foo.o
+# RUN: rm -f %t/foo.a
+# RUN: llvm-ar rcs %t/foo.a %t/foo.o
+
+# FOO-FIRST: <_foo>:
+# FOO-FIRST: <_main>:
+
+# FOO-SECOND: <_main>:
+# FOO-SECOND: <_foo>:
+
+# RUN: echo "_foo # just a comment" > %t/ord-1
+# RUN: echo "_main # another comment" >> %t/ord-1
+# RUN: lld -flavor darwinnew -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-1
+# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST
+## Output should be the same regardless of the command-line order of object files
+# RUN: lld -flavor darwinnew -o %t/test-1 %t/foo.o %t/test.o -order_file %t/ord-1
+# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST
+
+# RUN: echo "_main # just a comment" > %t/ord-2
+# RUN: echo "_foo # another comment" >> %t/ord-2
+# RUN: lld -flavor darwinnew -o %t/test-2 %t/test.o %t/foo.o -order_file %t/ord-2
+# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-SECOND
+# RUN: lld -flavor darwinnew -o %t/test-2 %t/foo.o %t/test.o -order_file %t/ord-2
+# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-SECOND
+
+# RUN: echo "foo.o:_foo" > %t/ord-file-match
+# RUN: echo "_main" >> %t/ord-file-match
+# RUN: lld -flavor darwinnew -o %t/test-file-match %t/test.o %t/foo.o -order_file %t/ord-file-match
+# RUN: llvm-objdump -d %t/test-file-match | FileCheck %s --check-prefix=FOO-FIRST
+## Output should be the same regardless of the command-line order of object files
+# RUN: lld -flavor darwinnew -o %t/test-file-match %t/foo.o %t/test.o -order_file %t/ord-file-match
+# RUN: llvm-objdump -d %t/test-file-match | FileCheck %s --check-prefix=FOO-FIRST
+
+# RUN: echo "bar.o:_foo" > %t/ord-file-nomatch
+# RUN: echo "_main" >> %t/ord-file-nomatch
+# RUN: echo "_foo" >> %t/ord-file-nomatch
+# RUN: lld -flavor darwinnew -o %t/test-file-nomatch %t/test.o %t/foo.o -order_file %t/ord-file-nomatch
+# RUN: llvm-objdump -d %t/test-file-nomatch | FileCheck %s --check-prefix=FOO-SECOND
+# RUN: lld -flavor darwinnew -o %t/test-file-nomatch %t/foo.o %t/test.o -order_file %t/ord-file-nomatch
+# RUN: llvm-objdump -d %t/test-file-nomatch | FileCheck %s --check-prefix=FOO-SECOND
+
+# RUN: echo "x86_64:_foo" > %t/ord-arch-match
+# RUN: echo "_main" >> %t/ord-arch-match
+# RUN: lld -flavor darwinnew -o %t/test-arch-match %t/test.o %t/foo.o -order_file %t/ord-arch-match
+# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-arch-match %t/foo.o %t/test.o -order_file %t/ord-arch-match
+# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST
+
+# RUN: echo "ppc:_foo" > %t/ord-arch-nomatch
+# RUN: echo "_main" >> %t/ord-arch-nomatch
+# RUN: echo "_foo" >> %t/ord-arch-nomatch
+# RUN: lld -flavor darwinnew -o %t/test-arch-nomatch %t/test.o %t/foo.o -order_file %t/ord-arch-nomatch
+# RUN: llvm-objdump -d %t/test-arch-nomatch | FileCheck %s --check-prefix=FOO-SECOND
+# RUN: lld -flavor darwinnew -o %t/test-arch-nomatch %t/foo.o %t/test.o -order_file %t/ord-arch-nomatch
+# RUN: llvm-objdump -d %t/test-arch-nomatch | FileCheck %s --check-prefix=FOO-SECOND
+
+# RUN: echo "x86_64:bar.o:_foo" > %t/ord-arch-file-match
+# RUN: echo "_main" >> %t/ord-arch-match
+# RUN: lld -flavor darwinnew -o %t/test-arch-match %t/test.o %t/foo.o -order_file %t/ord-arch-match
+# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-arch-match %t/foo.o %t/test.o -order_file %t/ord-arch-match
+# RUN: llvm-objdump -d %t/test-arch-match | FileCheck %s --check-prefix=FOO-FIRST
+
+## Test archives
+
+# RUN: lld -flavor darwinnew -o %t/test-archive-1 %t/test.o %t/foo.a -order_file %t/ord-1
+# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-archive-1 %t/foo.a %t/test.o -order_file %t/ord-1
+# RUN: llvm-objdump -d %t/test-archive-1 | FileCheck %s --check-prefix=FOO-FIRST
+
+# RUN: lld -flavor darwinnew -o %t/test-archive-file-no-match %t/test.o %t/foo.a -order_file %t/ord-file-nomatch
+# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND
+# RUN: lld -flavor darwinnew -o %t/test-archive %t/foo.a %t/test.o -order_file %t/ord-file-nomatch
+# RUN: llvm-objdump -d %t/test-archive-file-no-match | FileCheck %s --check-prefix=FOO-SECOND
+
+## The following tests check that if an address is matched by multiple order
+## file entries, it should always use the lowest-ordered match.
+
+# RUN: echo "_foo" > %t/ord-multiple-1
+# RUN: echo "_main" >> %t/ord-multiple-1
+# RUN: echo "foo.o:_foo" >> %t/ord-multiple-1
+# RUN: lld -flavor darwinnew -o %t/test-1 %t/test.o %t/foo.o -order_file %t/ord-multiple-1
+# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-1 %t/foo.o %t/test.o -order_file %t/ord-multiple-1
+# RUN: llvm-objdump -d %t/test-1 | FileCheck %s --check-prefix=FOO-FIRST
+
+# RUN: echo "foo.o:_foo" > %t/ord-multiple-2
+# RUN: echo "_main" >> %t/ord-multiple-2
+# RUN: echo "_foo" >> %t/ord-multiple-2
+# RUN: lld -flavor darwinnew -o %t/test-2 %t/test.o %t/foo.o -order_file %t/ord-multiple-2
+# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-2 %t/foo.o %t/test.o -order_file %t/ord-multiple-2
+# RUN: llvm-objdump -d %t/test-2 | FileCheck %s --check-prefix=FOO-FIRST
+
+# RUN: echo "_foo" > %t/ord-multiple-3
+# RUN: echo "_main" >> %t/ord-multiple-3
+# RUN: echo "_foo" >> %t/ord-multiple-3
+# RUN: lld -flavor darwinnew -o %t/test-3 %t/test.o %t/foo.o -order_file %t/ord-multiple-3
+# RUN: llvm-objdump -d %t/test-3 | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-3 %t/foo.o %t/test.o -order_file %t/ord-multiple-3
+# RUN: llvm-objdump -d %t/test-3 | FileCheck %s --check-prefix=FOO-FIRST
+
+# RUN: echo "foo.o:_foo" > %t/ord-multiple-4
+# RUN: echo "_main" >> %t/ord-multiple-4
+# RUN: echo "foo.o:_foo" >> %t/ord-multiple-4
+# RUN: lld -flavor darwinnew -o %t/test-4 %t/test.o %t/foo.o -order_file %t/ord-multiple-4
+# RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-4 %t/foo.o %t/test.o -order_file %t/ord-multiple-4
+# RUN: llvm-objdump -d %t/test-4 | FileCheck %s --check-prefix=FOO-FIRST
+
+## _foo and _bar both point to the same location. When both symbols appear in
+## an order file, the location in question should be ordered according to the
+## lowest-ordered symbol that references it.
+# RUN: echo "_bar" > %t/ord-alias
+# RUN: echo "_main" >> %t/ord-alias
+# RUN: echo "_foo" >> %t/ord-alias
+# RUN: lld -flavor darwinnew -o %t/test-alias %t/test.o %t/foo.o -order_file %t/ord-alias
+# RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST
+# RUN: lld -flavor darwinnew -o %t/test-alias %t/foo.o %t/test.o -order_file %t/ord-alias
+# RUN: llvm-objdump -d %t/test-alias | FileCheck %s --check-prefix=FOO-FIRST
+
+.text
+.globl _main
+
+_main:
+ callq _foo
+ ret