return false;
}
+// Replaces common symbols with defined symbols residing in __common sections.
+// This function must be called after all symbol names are resolved (i.e. after
+// all InputFiles have been loaded.) As a result, later operations won't see
+// any CommonSymbols.
+static void replaceCommonSymbols() {
+ for (macho::Symbol *sym : symtab->getSymbols()) {
+ auto *common = dyn_cast<CommonSymbol>(sym);
+ if (common == nullptr)
+ continue;
+
+ auto *isec = make<InputSection>();
+ isec->file = common->file;
+ isec->name = section_names::common;
+ isec->segname = segment_names::data;
+ isec->align = common->align;
+ // Casting to size_t will truncate large values on 32-bit architectures,
+ // but it's not really worth supporting the linking of 64-bit programs on
+ // 32-bit archs.
+ isec->data = {nullptr, static_cast<size_t>(common->size)};
+ isec->flags = S_ZEROFILL;
+ inputSections.push_back(isec);
+
+ replaceSymbol<Defined>(sym, sym->getName(), isec, /*value=*/0,
+ /*isWeakDef=*/false,
+ /*isExternal=*/true);
+ }
+}
+
static inline char toLowerDash(char x) {
if (x >= 'A' && x <= 'Z')
return x - 'A' + 'a';
error("-sub_library " + searchName + " does not match a supplied dylib");
}
+ replaceCommonSymbols();
+
StringRef orderFile = args.getLastArgValue(OPT_order_file);
if (!orderFile.empty())
parseOrderFile(orderFile);
for (size_t i = 0, n = nList.size(); i < n; ++i) {
const structs::nlist_64 &sym = nList[i];
- // Undefined symbol
- if (!sym.n_sect) {
+ if ((sym.n_type & N_TYPE) == N_UNDF) {
StringRef name = strtab + sym.n_strx;
- symbols[i] = symtab->addUndefined(name);
+ symbols[i] = sym.n_value == 0
+ ? symtab->addUndefined(name)
+ : symtab->addCommon(name, this, sym.n_value,
+ 1 << GET_COMM_ALIGN(sym.n_desc));
continue;
}
return s;
}
+Symbol *SymbolTable::addCommon(StringRef name, InputFile *file, uint64_t size,
+ uint32_t align) {
+ Symbol *s;
+ bool wasInserted;
+ std::tie(s, wasInserted) = insert(name);
+
+ if (!wasInserted) {
+ if (auto *common = dyn_cast<CommonSymbol>(s)) {
+ if (size < common->size)
+ return s;
+ } else if (!isa<Undefined>(s)) {
+ error("TODO: implement common symbol resolution with other symbol kinds");
+ return s;
+ }
+ }
+
+ replaceSymbol<CommonSymbol>(s, name, file, size, align);
+ return s;
+}
+
Symbol *SymbolTable::addDylib(StringRef name, DylibFile *file, bool isWeakDef,
bool isTlv) {
Symbol *s;
class ArchiveFile;
class DylibFile;
+class InputFile;
class InputSection;
class MachHeaderSection;
class Symbol;
Symbol *addUndefined(StringRef name);
+ Symbol *addCommon(StringRef name, InputFile *, uint64_t size, uint32_t align);
+
Symbol *addDylib(StringRef name, DylibFile *file, bool isWeakDef, bool isTlv);
Symbol *addLazy(StringRef name, ArchiveFile *file,
#include "lld/Common/ErrorHandler.h"
#include "lld/Common/Strings.h"
#include "llvm/Object/Archive.h"
+#include "llvm/Support/MathExtras.h"
namespace lld {
namespace macho {
enum Kind {
DefinedKind,
UndefinedKind,
+ CommonKind,
DylibKind,
LazyKind,
DSOHandleKind,
static bool classof(const Symbol *s) { return s->kind() == UndefinedKind; }
};
+// On Unix, it is traditionally allowed to write variable definitions without
+// initialization expressions (such as "int foo;") to header files. These are
+// called tentative definitions.
+//
+// Using tentative definitions is usually considered a bad practice; you should
+// write only declarations (such as "extern int foo;") to header files.
+// Nevertheless, the linker and the compiler have to do something to support
+// bad code by allowing duplicate definitions for this particular case.
+//
+// The compiler creates common symbols when it sees tentative definitions.
+// (You can suppress this behavior and let the compiler create a regular
+// defined symbol by passing -fno-common.) When linking the final binary, if
+// there are remaining common symbols after name resolution is complete, the
+// linker converts them to regular defined symbols in a __common section.
+class CommonSymbol : public Symbol {
+public:
+ CommonSymbol(StringRefZ name, InputFile *file, uint64_t size, uint32_t align)
+ : Symbol(CommonKind, name), file(file), size(size),
+ align(align != 1 ? align : llvm::PowerOf2Ceil(size)) {
+ // TODO: cap maximum alignment
+ }
+
+ static bool classof(const Symbol *s) { return s->kind() == CommonKind; }
+
+ InputFile *const file;
+ const uint64_t size;
+ const uint32_t align;
+};
+
class DylibSymbol : public Symbol {
public:
DylibSymbol(DylibFile *file, StringRefZ name, bool isWeakDef, bool isTlv)
union SymbolUnion {
alignas(Defined) char a[sizeof(Defined)];
alignas(Undefined) char b[sizeof(Undefined)];
- alignas(DylibSymbol) char c[sizeof(DylibSymbol)];
- alignas(LazySymbol) char d[sizeof(LazySymbol)];
+ alignas(CommonSymbol) char c[sizeof(CommonSymbol)];
+ alignas(DylibSymbol) char d[sizeof(DylibSymbol)];
+ alignas(LazySymbol) char e[sizeof(LazySymbol)];
+ alignas(DSOHandle) char f[sizeof(DSOHandle)];
};
template <typename T, typename... ArgT>
namespace section_names {
constexpr const char pageZero[] = "__pagezero";
+constexpr const char common[] = "__common";
constexpr const char header[] = "__mach_header";
constexpr const char binding[] = "__binding";
constexpr const char weakBinding[] = "__weak_binding";
--- /dev/null
+# REQUIRES: x86
+# RUN: split-file %s %t
+
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/test.s -o %t/test.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/same-size.s -o %t/same-size.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/smaller-size.s -o %t/smaller-size.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align.s -o %t/zero-align.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/zero-align-round-up.s -o %t/zero-align-round-up.o
+
+## Check that we pick the definition with the larger size, regardless of
+## its alignment.
+# RUN: lld -flavor darwinnew %t/test.o %t/smaller-size.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/smaller-size.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+
+## When the sizes are equal, we pick the symbol whose file occurs later in the
+## command-line argument list.
+# RUN: lld -flavor darwinnew %t/test.o %t/same-size.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/same-size.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=SMALLER-ALIGNMENT
+
+# RUN: lld -flavor darwinnew %t/test.o %t/zero-align.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/zero-align.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+
+# RUN: lld -flavor darwinnew %t/test.o %t/zero-align-round-up.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+# RUN: lld -flavor darwinnew %t/zero-align-round-up.o %t/test.o -order_file %t/order -o %t/test
+# RUN: llvm-objdump --section-headers --syms %t/test | FileCheck %s --check-prefix=LARGER-ALIGNMENT
+
+# SMALLER-ALIGNMENT-LABEL: Sections:
+# SMALLER-ALIGNMENT: __common {{[0-9a-f]+}} [[#%x, COMMON_START:]] BSS
+
+# SMALLER-ALIGNMENT-LABEL: SYMBOL TABLE:
+# SMALLER-ALIGNMENT-DAG: [[#COMMON_START]] g O __DATA,__common _check_size
+# SMALLER-ALIGNMENT-DAG: [[#COMMON_START + 2]] g O __DATA,__common _end_marker
+# SMALLER-ALIGNMENT-DAG: [[#COMMON_START + 8]] g O __DATA,__common _check_alignment
+
+# LARGER-ALIGNMENT-LABEL: Sections:
+# LARGER-ALIGNMENT: __common {{[0-9a-f]+}} [[#%x, COMMON_START:]] BSS
+
+# LARGER-ALIGNMENT-LABEL: SYMBOL TABLE:
+# LARGER-ALIGNMENT-DAG: [[#COMMON_START]] g O __DATA,__common _check_size
+# LARGER-ALIGNMENT-DAG: [[#COMMON_START + 2]] g O __DATA,__common _end_marker
+# LARGER-ALIGNMENT-DAG: [[#COMMON_START + 16]] g O __DATA,__common _check_alignment
+
+#--- order
+## Order is important as we determine the size of a given symbol via the
+## address of the next symbol.
+_check_size
+_end_marker
+_check_alignment
+
+#--- smaller-size.s
+.comm _check_size, 1, 1
+.comm _check_alignment, 1, 4
+
+#--- same-size.s
+.comm _check_size, 2, 1
+.comm _check_alignment, 2, 4
+
+#--- zero-align.s
+.comm _check_size, 2, 1
+## If alignment is set to zero, use the size to determine the alignment.
+.comm _check_alignment, 16, 0
+
+#--- zero-align-round-up.s
+.comm _check_size, 2, 1
+## If alignment is set to zero, use the size to determine the alignment. If the
+## size is not a power of two, round it up. (In this case, 14 rounds to 16.)
+.comm _check_alignment, 14, 0
+
+#--- test.s
+.comm _check_size, 2, 1
+.comm _end_marker, 1
+.comm _check_alignment, 2, 3
+
+.globl _main
+_main:
+ ret