[lld-macho] Align cstrings less conservatively

author Jez Ng <jezng@fb.com>

Thu, 10 Mar 2022 20:04:31 +0000 (15:04 -0500)

committer Jez Ng <jezng@fb.com>

Thu, 10 Mar 2022 20:18:15 +0000 (15:18 -0500)
author Jez Ng <jezng@fb.com>
Thu, 10 Mar 2022 20:04:31 +0000 (15:04 -0500)
committer Jez Ng <jezng@fb.com>
Thu, 10 Mar 2022 20:18:15 +0000 (15:18 -0500)
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp

index 9d246e9..019481b 100644 (file)
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -1341,7 +1341,10 @@ void CStringSection::finalizeContents() {
      for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) {
        if (!isec->pieces[i].live)
          continue;
-      uint32_t pieceAlign = MinAlign(isec->pieces[i].inSecOff, align);
+      // See comment above DeduplicatedCStringSection for how alignment is
+      // handled.
+      uint32_t pieceAlign =
+          1 << countTrailingZeros(isec->align | isec->pieces[i].inSecOff);
        offset = alignTo(offset, pieceAlign);
        isec->pieces[i].outSecOff = offset;
        isec->isFinal = true;
@@ -1351,45 +1354,89 @@ void CStringSection::finalizeContents() {
    }
    size = offset;
  }
+
  // Mergeable cstring literals are found under the __TEXT,__cstring section. In
  // contrast to ELF, which puts strings that need different alignments into
  // different sections, clang's Mach-O backend puts them all in one section.
  // Strings that need to be aligned have the .p2align directive emitted before
-// them, which simply translates into zero padding in the object file.
+// them, which simply translates into zero padding in the object file. In other
+// words, we have to infer the desired alignment of these cstrings from their
+// addresses.
  //
-// I *think* ld64 extracts the desired per-string alignment from this data by
-// preserving each string's offset from the last section-aligned address. I'm
-// not entirely certain since it doesn't seem consistent about doing this, and
-// in fact doesn't seem to be correct in general: we can in fact can induce ld64
-// to produce a crashing binary just by linking in an additional object file
-// that only contains a duplicate cstring at a different alignment. See PR50563
-// for details.
+// We differ slightly from ld64 in how we've chosen to align these cstrings.
+// Both LLD and ld64 preserve the number of trailing zeros in each cstring's
+// address in the input object files. When deduplicating identical cstrings,
+// both linkers pick the cstring whose address has more trailing zeros, and
+// preserve the alignment of that address in the final binary. However, ld64
+// goes a step further and also preserves the offset of the cstring from the
+// last section-aligned address.  I.e. if a cstring is at offset 18 in the
+// input, with a section alignment of 16, then both LLD and ld64 will ensure the
+// final address is 2-byte aligned (since 18 == 16 + 2). But ld64 will also
+// ensure that the final address is of the form 16 * k + 2 for some k.
  //
-// On x86_64, the cstrings we've seen so far that require special alignment are
-// all accessed by SIMD operations -- x86_64 requires SIMD accesses to be
-// 16-byte-aligned. arm64 also seems to require 16-byte-alignment in some cases
-// (PR50791), but I haven't tracked down the root cause. So for now, I'm just
-// aligning all strings to 16 bytes.  This is indeed wasteful, but
-// implementation-wise it's simpler than preserving per-string
-// alignment+offsets. It also avoids the aforementioned crash after
-// deduplication of differently-aligned strings.  Finally, the overhead is not
-// huge: using 16-byte alignment (vs no alignment) is only a 0.5% size overhead
-// when linking chromium_framework on x86_64.
-DeduplicatedCStringSection::DeduplicatedCStringSection()
-    : builder(StringTableBuilder::RAW, /*Alignment=*/16) {}
-
+// Note that ld64's heuristic means that a dedup'ed cstring's final address is
+// dependent on the order of the input object files. E.g. if in addition to the
+// cstring at offset 18 above, we have a duplicate one in another file with a
+// `.cstring` section alignment of 2 and an offset of zero, then ld64 will pick
+// the cstring from the object file earlier on the command line (since both have
+// the same number of trailing zeros in their address). So the final cstring may
+// either be at some address `16 * k + 2` or at some address `2 * k`.
+//
+// I've opted not to follow this behavior primarily for implementation
+// simplicity, and secondarily to save a few more bytes. It's not clear to me
+// that preserving the section alignment + offset is ever necessary, and there
+// are many cases that are clearly redundant. In particular, if an x86_64 object
+// file contains some strings that are accessed via SIMD instructions, then the
+// .cstring section in the object file will be 16-byte-aligned (since SIMD
+// requires its operand addresses to be 16-byte aligned). However, there will
+// typically also be other cstrings in the same file that aren't used via SIMD
+// and don't need this alignment. They will be emitted at some arbitrary address
+// `A`, but ld64 will treat them as being 16-byte aligned with an offset of `16
+// % A`.
  void DeduplicatedCStringSection::finalizeContents() {
-  // Add all string pieces to the string table builder to create section
-  // contents.
+  // Find the largest alignment required for each string.
+  for (const CStringInputSection *isec : inputs) {
+    for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) {
+      const StringPiece &piece = isec->pieces[i];
+      if (!piece.live)
+        continue;
+      auto s = isec->getCachedHashStringRef(i);
+      assert(isec->align != 0);
+      uint8_t trailingZeros = countTrailingZeros(isec->align | piece.inSecOff);
+      auto it = stringOffsetMap.insert(
+          std::make_pair(s, StringOffset(trailingZeros)));
+      if (!it.second && it.first->second.trailingZeros < trailingZeros)
+        it.first->second.trailingZeros = trailingZeros;
+    }
+  }
+
+  // Assign an offset for each string and save it to the corresponding
+  // StringPieces for easy access.
    for (CStringInputSection *isec : inputs) {
-    for (size_t i = 0, e = isec->pieces.size(); i != e; ++i)
-      if (isec->pieces[i].live)
-        isec->pieces[i].outSecOff =
-            builder.add(isec->getCachedHashStringRef(i));
+    for (size_t i = 0, e = isec->pieces.size(); i != e; ++i) {
+      if (!isec->pieces[i].live)
+        continue;
+      auto s = isec->getCachedHashStringRef(i);
+      auto it = stringOffsetMap.find(s);
+      assert(it != stringOffsetMap.end());
+      StringOffset &offsetInfo = it->second;
+      if (offsetInfo.outSecOff == UINT64_MAX) {
+        offsetInfo.outSecOff = alignTo(size, 1 << offsetInfo.trailingZeros);
+        size = offsetInfo.outSecOff + s.size();
+      }
+      isec->pieces[i].outSecOff = offsetInfo.outSecOff;
+    }
      isec->isFinal = true;
    }
+}
  
-  builder.finalizeInOrder();
+void DeduplicatedCStringSection::writeTo(uint8_t *buf) const {
+  for (const auto &p : stringOffsetMap) {
+    StringRef data = p.first.val();
+    uint64_t off = p.second.outSecOff;
+    if (!data.empty())
+      memcpy(buf + off, data.data(), data.size());
+  }
  }
  
  // This section is actually emitted as __TEXT,__const by ld64, but clang may
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h

index 58faa87..1a1a495 100644 (file)
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -531,13 +531,19 @@ private:
  
  class DeduplicatedCStringSection final : public CStringSection {
  public:
-  DeduplicatedCStringSection();
-  uint64_t getSize() const override { return builder.getSize(); }
+  uint64_t getSize() const override { return size; }
    void finalizeContents() override;
-  void writeTo(uint8_t *buf) const override { builder.write(buf); }
+  void writeTo(uint8_t *buf) const override;
  
  private:
-  llvm::StringTableBuilder builder;
+  struct StringOffset {
+    uint8_t trailingZeros;
+    uint64_t outSecOff = UINT64_MAX;
+
+    explicit StringOffset(uint8_t zeros) : trailingZeros(zeros) {}
+  };
+  llvm::DenseMap<llvm::CachedHashStringRef, StringOffset> stringOffsetMap;
+  size_t size = 0;
  };
  
  /*
diff --git a/lld/MachO/ld64-vs-lld.rst b/lld/MachO/ld64-vs-lld.rst

index b31b4bb..4855014 100644 (file)
--- a/lld/MachO/ld64-vs-lld.rst
+++ b/lld/MachO/ld64-vs-lld.rst
@@ -13,6 +13,12 @@ programs which have (incorrectly) relied on string deduplication always
  occurring. In particular, programs which compare string literals via pointer
  equality must be fixed to use value equality instead.
  
+String Alignment
+****************
+LLD is slightly less conservative about aligning cstrings, allowing it to pack
+them more compactly. This should not result in any meaningful semantic
+difference.
+
  ``-no_deduplicate`` Flag
  **********************
  - LD64:
diff --git a/lld/test/MachO/cstring-align.s b/lld/test/MachO/cstring-align.s

new file mode 100644 (file)

index 0000000..f3e995c
--- /dev/null
+++ b/lld/test/MachO/cstring-align.s
@@ -0,0 +1,133 @@
+# REQUIRES: x86
+# RUN: rm -rf %t; split-file %s %t
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-empty.s -o %t/align-empty.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4-0.s -o %t/align-4-0.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-4-2.s -o %t/align-4-2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-0.s -o %t/align-16-0.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-2.s -o %t/align-16-2.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-4.s -o %t/align-16-4.o
+# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-darwin %t/align-16-8.s -o %t/align-16-8.o
+
+## Check that we preserve the alignment of cstrings. Alignment is determined
+## not by section alignment but by the number of trailing zeros of the cstring's
+## address in the input object file.
+
+## The non-dedup case is not particularly interesting since the null bytes don't
+## get dedup'ed, meaning that the output strings get their offsets "naturally"
+## preserved.
+
+# RUN: %lld -dylib %t/align-empty.o %t/align-4-0.o %t/align-16-0.o -o %t/align-4-0-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-4-0-16-0 | \
+# RUN:   FileCheck %s -D#OFF1=4 -D#OFF2=16
+# RUN: %lld -dylib %t/align-empty.o %t/align-16-0.o %t/align-4-0.o -o %t/align-16-0-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-16-0-4-0 | \
+# RUN:   FileCheck %s -D#OFF1=16 -D#OFF2=20
+
+# RUN: %lld -dylib %t/align-empty.o %t/align-4-2.o %t/align-16-0.o -o %t/align-4-2-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-4-2-16-0 | \
+# RUN:   FileCheck %s -D#OFF1=6 -D#OFF2=16
+# RUN: %lld -dylib %t/align-empty.o %t/align-16-0.o %t/align-4-2.o -o %t/align-16-0-4-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-16-0-4-2 | \
+# RUN:   FileCheck %s -D#OFF1=16 -D#OFF2=22
+
+# RUN: %lld -dylib %t/align-empty.o %t/align-4-0.o %t/align-16-2.o -o %t/align-4-0-16-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-4-0-16-2 | \
+# RUN:   FileCheck %s -D#OFF1=4 -D#OFF2=18
+# RUN: %lld -dylib %t/align-empty.o %t/align-16-2.o %t/align-4-0.o -o %t/align-16-2-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/align-16-2-4-0 | \
+# RUN:   FileCheck %s -D#OFF1=18 -D#OFF2=20
+
+# CHECK:       Contents of (__TEXT,__cstring) section
+# CHECK-NEXT:  [[#%.16x,START:]]     {{$}}
+# CHECK:       [[#%.16x,START+OFF1]] a{{$}}
+# CHECK:       [[#%.16x,START+OFF2]] a{{$}}
+# CHECK-EMPTY:
+
+## The dedup cases are more interesting...
+
+## Same offset, different alignments => pick higher alignment
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-0.o -o %t/dedup-4-0-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-0 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=16
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-0.o %t/align-4-0.o -o %t/dedup-16-0-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-0-4-0 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=16
+
+## 16 byte alignment vs 2 byte offset => align to 16 bytes
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-2.o %t/align-16-0.o -o %t/dedup-4-2-16-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-2-16-0 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=16
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-0.o %t/align-4-2.o -o %t/dedup-16-0-4-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-0-4-2 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=16
+
+## 4 byte alignment vs 2 byte offset => align to 4 bytes
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-2.o -o %t/dedup-4-0-16-2
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-2 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=4
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-2.o %t/align-4-0.o -o %t/dedup-16-2-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-2-4-0 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=4
+
+## Both inputs are 4-byte aligned, one via offset and the other via section alignment
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-4.o -o %t/dedup-4-0-16-4
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-4 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=4
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-4.o %t/align-4-0.o -o %t/dedup-16-4-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-4-4-0 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=4
+
+## 8-byte offset vs 4-byte section alignment => align to 8 bytes
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-4-0.o %t/align-16-8.o -o %t/dedup-4-0-16-8
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-4-0-16-8 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=8
+# RUN: %lld -dylib --deduplicate-literals %t/align-empty.o %t/align-16-8.o %t/align-4-0.o -o %t/dedup-16-8-4-0
+# RUN: llvm-objdump --macho --section="__TEXT,__cstring" %t/dedup-16-8-4-0 | \
+# RUN:   FileCheck %s --check-prefix=DEDUP -D#OFF=8
+
+# DEDUP:       Contents of (__TEXT,__cstring) section
+# DEDUP-NEXT:  [[#%.16x,START:]]    {{$}}
+# DEDUP:       [[#%.16x,START+OFF]] a{{$}}
+# DEDUP-EMPTY:
+
+#--- align-empty.s
+## We use this file to create an empty string at the start of every output
+## file's .cstring section. This makes the test cases more interesting since LLD
+## can't place the string "a" at the trivially-aligned zero offset.
+.cstring
+.p2align 2
+.asciz ""
+
+#--- align-4-0.s
+.cstring
+.p2align 2
+.asciz "a"
+
+#--- align-4-2.s
+.cstring
+.p2align 2
+.zero 0x2
+.asciz "a"
+
+#--- align-16-0.s
+.cstring
+.p2align 4
+.asciz "a"
+
+#--- align-16-2.s
+.cstring
+.p2align 4
+.zero 0x2
+.asciz "a"
+
+#--- align-16-4.s
+.cstring
+.p2align 4
+.zero 0x4
+.asciz "a"
+
+#--- align-16-8.s
+.cstring
+.p2align 4
+.zero 0x8
+.asciz "a"
diff --git a/lld/test/MachO/cstring-dedup.s b/lld/test/MachO/cstring-dedup.s

index 86ba812..144e77d 100644 (file)
--- a/lld/test/MachO/cstring-dedup.s
+++ b/lld/test/MachO/cstring-dedup.s
@@ -8,12 +8,11 @@
  # RUN: llvm-objdump --macho --section="__DATA,ptrs" --syms %t/test | FileCheck %s
  # RUN: llvm-readobj --section-headers %t/test | FileCheck %s --check-prefix=HEADER
  
-## Make sure we only have 3 deduplicated strings in __cstring, and that they
-## are 16-byte-aligned.
+## Make sure we only have 3 deduplicated strings in __cstring.
  # STR: Contents of (__TEXT,__cstring) section
-# STR: {{.*}}0 foo
-# STR: {{.*}}0 barbaz
-# STR: {{.*}}0 {{$}}
+# STR: {{[[:xdigit:]]+}} foo
+# STR: {{[[:xdigit:]]+}} barbaz
+# STR: {{[[:xdigit:]]+}} {{$}}
  
  ## Make sure both symbol and section relocations point to the right thing.
  # CHECK:      Contents of (__DATA,ptrs) section
author	Jez Ng <jezng@fb.com>
	Thu, 10 Mar 2022 20:04:31 +0000 (15:04 -0500)
committer	Jez Ng <jezng@fb.com>
	Thu, 10 Mar 2022 20:18:15 +0000 (15:18 -0500)
lld/MachO/SyntheticSections.cpp		patch \| blob \| history
lld/MachO/SyntheticSections.h		patch \| blob \| history
lld/MachO/ld64-vs-lld.rst		patch \| blob \| history
lld/test/MachO/cstring-align.s	[new file with mode: 0644]	patch \| blob
lld/test/MachO/cstring-dedup.s		patch \| blob \| history