for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) {
if (!isec->pieces[i].live)
continue;
- uint32_t pieceAlign = MinAlign(isec->pieces[i].inSecOff, align);
+ // See comment above DeduplicatedCStringSection for how alignment is
+ // handled.
+ uint32_t pieceAlign =
+ 1 << countTrailingZeros(isec->align | isec->pieces[i].inSecOff);
offset = alignTo(offset, pieceAlign);
isec->pieces[i].outSecOff = offset;
isec->isFinal = true;
}
size = offset;
}
+
// Mergeable cstring literals are found under the __TEXT,__cstring section. In
// contrast to ELF, which puts strings that need different alignments into
// different sections, clang's Mach-O backend puts them all in one section.
// Strings that need to be aligned have the .p2align directive emitted before
-// them, which simply translates into zero padding in the object file.
+// them, which simply translates into zero padding in the object file. In other
+// words, we have to infer the desired alignment of these cstrings from their
+// addresses.
//
-// I *think* ld64 extracts the desired per-string alignment from this data by
-// preserving each string's offset from the last section-aligned address. I'm
-// not entirely certain since it doesn't seem consistent about doing this, and
-// in fact doesn't seem to be correct in general: we can in fact can induce ld64
-// to produce a crashing binary just by linking in an additional object file
-// that only contains a duplicate cstring at a different alignment. See PR50563
-// for details.
+// We differ slightly from ld64 in how we've chosen to align these cstrings.
+// Both LLD and ld64 preserve the number of trailing zeros in each cstring's
+// address in the input object files. When deduplicating identical cstrings,
+// both linkers pick the cstring whose address has more trailing zeros, and
+// preserve the alignment of that address in the final binary. However, ld64
+// goes a step further and also preserves the offset of the cstring from the
+// last section-aligned address. I.e. if a cstring is at offset 18 in the
+// input, with a section alignment of 16, then both LLD and ld64 will ensure the
+// final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
+// ensure that the final address is of the form 16 * k + 2 for some k.
//
-// On x86_64, the cstrings we've seen so far that require special alignment are
-// all accessed by SIMD operations -- x86_64 requires SIMD accesses to be
-// 16-byte-aligned. arm64 also seems to require 16-byte-alignment in some cases
-// (PR50791), but I haven't tracked down the root cause. So for now, I'm just
-// aligning all strings to 16 bytes. This is indeed wasteful, but
-// implementation-wise it's simpler than preserving per-string
-// alignment+offsets. It also avoids the aforementioned crash after
-// deduplication of differently-aligned strings. Finally, the overhead is not
-// huge: using 16-byte alignment (vs no alignment) is only a 0.5% size overhead
-// when linking chromium_framework on x86_64.
-DeduplicatedCStringSection::DeduplicatedCStringSection()
- : builder(StringTableBuilder::RAW, /*Alignment=*/16) {}
-
+// Note that ld64's heuristic means that a dedup'ed cstring's final address is
+// dependent on the order of the input object files. E.g. if in addition to the
+// cstring at offset 18 above, we have a duplicate one in another file with a
+// `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
+// the cstring from the object file earlier on the command line (since both have
+// the same number of trailing zeros in their address). So the final cstring may
+// either be at some address `16 * k + 2` or at some address `2 * k`.
+//
+// I've opted not to follow this behavior primarily for implementation
+// simplicity, and secondarily to save a few more bytes. It's not clear to me
+// that preserving the section alignment + offset is ever necessary, and there
+// are many cases that are clearly redundant. In particular, if an x86_64 object
+// file contains some strings that are accessed via SIMD instructions, then the
+// .cstring section in the object file will be 16-byte-aligned (since SIMD
+// requires its operand addresses to be 16-byte aligned). However, there will
+// typically also be other cstrings in the same file that aren't used via SIMD
+// and don't need this alignment. They will be emitted at some arbitrary address
+// `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
+// % A`.
void DeduplicatedCStringSection::finalizeContents() {
- // Add all string pieces to the string table builder to create section
- // contents.
+ // Find the largest alignment required for each string.
+ for (const CStringInputSection *isec : inputs) {
+ for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) {
+ const StringPiece &piece = isec->pieces[i];
+ if (!piece.live)
+ continue;
+ auto s = isec->getCachedHashStringRef(i);
+ assert(isec->align != 0);
+ uint8_t trailingZeros = countTrailingZeros(isec->align | piece.inSecOff);
+ auto it = stringOffsetMap.insert(
+ std::make_pair(s, StringOffset(trailingZeros)));
+ if (!it.second && it.first->second.trailingZeros < trailingZeros)
+ it.first->second.trailingZeros = trailingZeros;
+ }
+ }
+
+ // Assign an offset for each string and save it to the corresponding
+ // StringPieces for easy access.
for (CStringInputSection *isec : inputs) {
- for (size_t i = 0, e = isec->pieces.size(); i != e; ++i)
- if (isec->pieces[i].live)
- isec->pieces[i].outSecOff =
- builder.add(isec->getCachedHashStringRef(i));
+ for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) {
+ if (!isec->pieces[i].live)
+ continue;
+ auto s = isec->getCachedHashStringRef(i);
+ auto it = stringOffsetMap.find(s);
+ assert(it != stringOffsetMap.end());
+ StringOffset &offsetInfo = it->second;
+ if (offsetInfo.outSecOff == UINT64_MAX) {
+ offsetInfo.outSecOff = alignTo(size, 1 << offsetInfo.trailingZeros);
+ size = offsetInfo.outSecOff + s.size();
+ }
+ isec->pieces[i].outSecOff = offsetInfo.outSecOff;
+ }
isec->isFinal = true;
}
+}
- builder.finalizeInOrder();
+void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
+ for (const auto &p : stringOffsetMap) {
+ StringRef data = p.first.val();
+ uint64_t off = p.second.outSecOff;
+ if (!data.empty())
+ memcpy(buf + off, data.data(), data.size());
+ }
}
// This section is actually emitted as __TEXT,__const by ld64, but clang may
class DeduplicatedCStringSection final : public CStringSection {
public:
- DeduplicatedCStringSection();
- uint64_t getSize() const override { return builder.getSize(); }
+ uint64_t getSize() const override { return size; }
void finalizeContents() override;
- void writeTo(uint8_t *buf) const override { builder.write(buf); }
+ void writeTo(uint8_t *buf) const override;
private:
- llvm::StringTableBuilder builder;
+ struct StringOffset {
+ uint8_t trailingZeros;
+ uint64_t outSecOff = UINT64_MAX;
+
+ explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
+ };
+ llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
+ size_t size = 0;
};
/*
occurring. In particular, programs which compare string literals via pointer
equality must be fixed to use value equality instead.
+String Alignment
+****************
+LLD is slightly less conservative about aligning cstrings, allowing it to pack
+them more compactly. This should not result in any meaningful semantic
+difference.
+
``-no_deduplicate`` Flag
**********************
- LD64:
--- /dev/null
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-empty.s -o %t/align-empty.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4-0.s -o %t/align-4-0.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4-2.s -o %t/align-4-2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-0.s -o %t/align-16-0.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-2.s -o %t/align-16-2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-4.s -o %t/align-16-4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-8.s -o %t/align-16-8.o
+
+## Check that we preserve the alignment of cstrings. Alignment is determined
+## not by section alignment but by the number of trailing zeros of the cstring's
+## address in the input object file.
+
+## The non-dedup case is not particularly interesting since the null bytes don't
+## get dedup'ed, meaning that the output strings get their offsets "naturally"
+## preserved.
+
+# RUN: %lld -dylib %t/align-empty.o %t/align-4-0.o %t/align-16-0.o -o %t/align-4-0-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-4-0-16-0 | \
+# RUN: FileCheck %s -D#OFF1=4 -D#OFF2=16
+# RUN: %lld -dylib %t/align-empty.o %t/align-16-0.o %t/align-4-0.o -o %t/align-16-0-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-16-0-4-0 | \
+# RUN: FileCheck %s -D#OFF1=16 -D#OFF2=20
+
+# RUN: %lld -dylib %t/align-empty.o %t/align-4-2.o %t/align-16-0.o -o %t/align-4-2-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-4-2-16-0 | \
+# RUN: FileCheck %s -D#OFF1=6 -D#OFF2=16
+# RUN: %lld -dylib %t/align-empty.o %t/align-16-0.o %t/align-4-2.o -o %t/align-16-0-4-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-16-0-4-2 | \
+# RUN: FileCheck %s -D#OFF1=16 -D#OFF2=22
+
+# RUN: %lld -dylib %t/align-empty.o %t/align-4-0.o %t/align-16-2.o -o %t/align-4-0-16-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-4-0-16-2 | \
+# RUN: FileCheck %s -D#OFF1=4 -D#OFF2=18
+# RUN: %lld -dylib %t/align-empty.o %t/align-16-2.o %t/align-4-0.o -o %t/align-16-2-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-16-2-4-0 | \
+# RUN: FileCheck %s -D#OFF1=18 -D#OFF2=20
+
+# CHECK: Contents of (__TEXT,__cstring) section
+# CHECK-NEXT: [[#%.16x,START:]] {{$}}
+# CHECK: [[#%.16x,START+OFF1]] a{{$}}
+# CHECK: [[#%.16x,START+OFF2]] a{{$}}
+# CHECK-EMPTY:
+
+## The dedup cases are more interesting...
+
+## Same offset, different alignments => pick higher alignment
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-0.o -o %t/dedup-4-0-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-0 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=16
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-0.o %t/align-4-0.o -o %t/dedup-16-0-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-0-4-0 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=16
+
+## 16 byte alignment vs 2 byte offset => align to 16 bytes
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-2.o %t/align-16-0.o -o %t/dedup-4-2-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-2-16-0 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=16
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-0.o %t/align-4-2.o -o %t/dedup-16-0-4-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-0-4-2 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=16
+
+## 4 byte alignment vs 2 byte offset => align to 4 bytes
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-2.o -o %t/dedup-4-0-16-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-2 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=4
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-2.o %t/align-4-0.o -o %t/dedup-16-2-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-2-4-0 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=4
+
+## Both inputs are 4-byte aligned, one via offset and the other via section alignment
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-4.o -o %t/dedup-4-0-16-4
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-4 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=4
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-4.o %t/align-4-0.o -o %t/dedup-16-4-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-4-4-0 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=4
+
+## 8-byte offset vs 4-byte section alignment => align to 8 bytes
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-8.o -o %t/dedup-4-0-16-8
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-8 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=8
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-8.o %t/align-4-0.o -o %t/dedup-16-8-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-8-4-0 | \
+# RUN: FileCheck %s --check-prefix=DEDUP -D#OFF=8
+
+# DEDUP: Contents of (__TEXT,__cstring) section
+# DEDUP-NEXT: [[#%.16x,START:]] {{$}}
+# DEDUP: [[#%.16x,START+OFF]] a{{$}}
+# DEDUP-EMPTY:
+
+#--- align-empty.s
+## We use this file to create an empty string at the start of every output
+## file's .cstring section. This makes the test cases more interesting since LLD
+## can't place the string "a" at the trivially-aligned zero offset.
+.cstring
+.p2align 2
+.asciz ""
+
+#--- align-4-0.s
+.cstring
+.p2align 2
+.asciz "a"
+
+#--- align-4-2.s
+.cstring
+.p2align 2
+.zero 0x2
+.asciz "a"
+
+#--- align-16-0.s
+.cstring
+.p2align 4
+.asciz "a"
+
+#--- align-16-2.s
+.cstring
+.p2align 4
+.zero 0x2
+.asciz "a"
+
+#--- align-16-4.s
+.cstring
+.p2align 4
+.zero 0x4
+.asciz "a"
+
+#--- align-16-8.s
+.cstring
+.p2align 4
+.zero 0x8
+.asciz "a"
# RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s
# RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER
-## Make sure we only have 3 deduplicated strings in __cstring, and that they
-## are 16-byte-aligned.
+## Make sure we only have 3 deduplicated strings in __cstring.
# STR: Contents of (__TEXT,__cstring) section
-# STR: {{.*}}0 foo
-# STR: {{.*}}0 barbaz
-# STR: {{.*}}0 {{$}}
+# STR: {{[[:xdigit:]]+}} foo
+# STR: {{[[:xdigit:]]+}} barbaz
+# STR: {{[[:xdigit:]]+}} {{$}}
## Make sure both symbol and section relocations point to the right thing.
# CHECK: Contents of (__DATA,ptrs) section