[lld-macho] Have ICF operate on all sections at once

author Jez Ng <jezng@fb.com>

Sat, 17 Jul 2021 17:42:26 +0000 (13:42 -0400)

committer Jez Ng <jezng@fb.com>

Sat, 17 Jul 2021 17:42:51 +0000 (13:42 -0400)
author Jez Ng <jezng@fb.com>
Sat, 17 Jul 2021 17:42:26 +0000 (13:42 -0400)
committer Jez Ng <jezng@fb.com>
Sat, 17 Jul 2021 17:42:51 +0000 (13:42 -0400)
diff --git a/lld/MachO/ConcatOutputSection.cpp b/lld/MachO/ConcatOutputSection.cpp

index 87a6b3f..b98af92 100644 (file)
--- a/lld/MachO/ConcatOutputSection.cpp
+++ b/lld/MachO/ConcatOutputSection.cpp
@@ -24,7 +24,10 @@ using namespace llvm::MachO;
  using namespace lld;
  using namespace lld::macho;
  
+MapVector<NamePair, ConcatOutputSection *> macho::concatOutputSections;
+
  void ConcatOutputSection::addInput(ConcatInputSection *input) {
+  assert(input->parent == this);
    if (inputs.empty()) {
      align = input->align;
      flags = input->getFlags();
@@ -33,7 +36,6 @@ void ConcatOutputSection::addInput(ConcatInputSection *input) {
      finalizeFlags(input);
    }
    inputs.push_back(input);
-  input->parent = this;
  }
  
  // Branch-range extension can be implemented in two ways, either through ...
@@ -357,3 +359,22 @@ void ConcatOutputSection::finalizeFlags(InputSection *input) {
      break;
    }
  }
+
+ConcatOutputSection *
+ConcatOutputSection::getOrCreateForInput(const InputSection *isec) {
+  NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
+  ConcatOutputSection *&osec = concatOutputSections[names];
+  if (!osec)
+    osec = make<ConcatOutputSection>(names.second);
+  return osec;
+}
+
+NamePair macho::maybeRenameSection(NamePair key) {
+  auto newNames = config->sectionRenameMap.find(key);
+  if (newNames != config->sectionRenameMap.end())
+    return newNames->second;
+  auto newName = config->segmentRenameMap.find(key.first);
+  if (newName != config->segmentRenameMap.end())
+    return std::make_pair(newName->second, key.second);
+  return key;
+}
diff --git a/lld/MachO/ConcatOutputSection.h b/lld/MachO/ConcatOutputSection.h

index 531983c..ec3d6bf 100644 (file)
--- a/lld/MachO/ConcatOutputSection.h
+++ b/lld/MachO/ConcatOutputSection.h
@@ -31,6 +31,7 @@ public:
  
    const ConcatInputSection *firstSection() const { return inputs.front(); }
    const ConcatInputSection *lastSection() const { return inputs.back(); }
+  bool isNeeded() const override { return !inputs.empty(); }
  
    // These accessors will only be valid after finalizing the section
    uint64_t getSize() const override { return size; }
@@ -50,6 +51,8 @@ public:
      return sec->kind() == ConcatKind;
    }
  
+  static ConcatOutputSection *getOrCreateForInput(const InputSection *);
+
  private:
    void finalizeFlags(InputSection *input);
  
@@ -79,6 +82,12 @@ struct ThunkInfo {
    uint8_t sequence = 0;        // how many thunks created so-far?
  };
  
+NamePair maybeRenameSection(NamePair key);
+
+// Output sections are added to output segments in iteration order
+// of ConcatOutputSection, so must have deterministic iteration order.
+extern llvm::MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
+
  extern llvm::DenseMap<Symbol *, ThunkInfo> thunkMap;
  
  } // namespace macho
diff --git a/lld/MachO/Driver.cpp b/lld/MachO/Driver.cpp

index feb0803..7fcaeac 100644 (file)
--- a/lld/MachO/Driver.cpp
+++ b/lld/MachO/Driver.cpp
@@ -577,6 +577,7 @@ static void compileBitcodeFiles() {
  // any CommonSymbols.
  static void replaceCommonSymbols() {
    TimeTraceScope timeScope("Replace common symbols");
+  ConcatOutputSection *osec = nullptr;
    for (Symbol *sym : symtab->getSymbols()) {
      auto *common = dyn_cast<CommonSymbol>(sym);
      if (common == nullptr)
@@ -589,6 +590,9 @@ static void replaceCommonSymbols() {
      auto *isec = make<ConcatInputSection>(
          segment_names::data, section_names::common, common->getFile(), data,
          common->align, S_ZEROFILL);
+    if (!osec)
+      osec = ConcatOutputSection::getOrCreateForInput(isec);
+    isec->parent = osec;
      inputSections.push_back(isec);
  
      // FIXME: CommonSymbol should store isReferencedDynamically, noDeadStrip
@@ -1037,6 +1041,7 @@ static void gatherInputSections() {
    int inputOrder = 0;
    for (const InputFile *file : inputFiles) {
      for (const SubsectionMap &map : file->subsections) {
+      ConcatOutputSection *osec = nullptr;
        for (const SubsectionEntry &entry : map) {
          if (auto *isec = dyn_cast<ConcatInputSection>(entry.isec)) {
            if (isec->isCoalescedWeak())
@@ -1047,6 +1052,9 @@ static void gatherInputSections() {
              continue;
            }
            isec->outSecOff = inputOrder++;
+          if (!osec)
+            osec = ConcatOutputSection::getOrCreateForInput(isec);
+          isec->parent = osec;
            inputSections.push_back(isec);
          } else if (auto *isec = dyn_cast<CStringInputSection>(entry.isec)) {
            if (in.cStringSection->inputOrder == UnspecifiedInputOrder)
diff --git a/lld/MachO/ICF.cpp b/lld/MachO/ICF.cpp

index a27a0f5..9a41bb2 100644 (file)
--- a/lld/MachO/ICF.cpp
+++ b/lld/MachO/ICF.cpp
@@ -52,22 +52,18 @@ ICF::ICF(std::vector<ConcatInputSection *> &inputs) {
  //
  // Summary of segments & sections:
  //
-// Since folding never occurs across output-section boundaries,
-// ConcatOutputSection is the natural input for ICF.
-//
  // The __TEXT segment is readonly at the MMU. Some sections are already
  // deduplicated elsewhere (__TEXT,__cstring & __TEXT,__literal*) and some are
  // synthetic and inherently free of duplicates (__TEXT,__stubs &
-// __TEXT,__unwind_info). We only run ICF on __TEXT,__text. One might hope ICF
-// could work on __TEXT,__concat, but doing so induces many test failures.
+// __TEXT,__unwind_info). Note that we don't yet run ICF on __TEXT,__const,
+// because doing so induces many test failures.
  //
  // The __LINKEDIT segment is readonly at the MMU, yet entirely synthetic, and
  // thus ineligible for ICF.
  //
  // The __DATA_CONST segment is read/write at the MMU, but is logically const to
-// the application after dyld applies fixups to pointer data. Some sections are
-// deduplicated elsewhere (__DATA_CONST,__cfstring), and some are synthetic
-// (__DATA_CONST,__got). There are no ICF opportunities here.
+// the application after dyld applies fixups to pointer data. We currently
+// fold only the __DATA_CONST,__cfstring section.
  //
  // The __DATA segment is read/write at the MMU, and as application-writeable
  // data, none of its sections are eligible for ICF.
@@ -84,12 +80,13 @@ static std::atomic<bool> icfRepeat{false};
  // Compare everything except the relocation referents
  static bool equalsConstant(const ConcatInputSection *ia,
                             const ConcatInputSection *ib) {
+  // We can only fold within the same OutputSection.
+  if (ia->parent != ib->parent)
+    return false;
    if (ia->data.size() != ib->data.size())
      return false;
    if (ia->data != ib->data)
      return false;
-  if (ia->getFlags() != ib->getFlags())
-    return false;
    if (ia->relocs.size() != ib->relocs.size())
      return false;
    auto f = [&](const Reloc &ra, const Reloc &rb) {
@@ -323,8 +320,6 @@ void macho::foldIdenticalSections() {
    // relocs to find every referenced InputSection, but that precludes easy
    // parallelization. Therefore, we hash every InputSection here where we have
    // them all accessible as simple vectors.
-  std::vector<ConcatInputSection *> codeSections;
-  std::vector<ConcatInputSection *> cfStringSections;
  
    // ICF can't fold functions with unwind info
    DenseSet<const InputSection *> functionsWithUnwindInfo =
@@ -338,32 +333,22 @@ void macho::foldIdenticalSections() {
    // coexist with equivalence-class IDs, this is not necessary, but might help
    // someone keep the numbers straight in case we ever need to debug the
    // ICF::segregate()
+  std::vector<ConcatInputSection *> hashable;
    uint64_t icfUniqueID = inputSections.size();
    for (ConcatInputSection *isec : inputSections) {
+    // FIXME: consider non-code __text sections as hashable?
      bool isHashable = (isCodeSection(isec) || isCfStringSection(isec)) &&
                        !isec->shouldOmitFromOutput() &&
                        !functionsWithUnwindInfo.contains(isec) &&
                        isec->isHashableForICF();
-    if (isHashable) {
-      if (isCodeSection(isec))
-        codeSections.push_back(isec);
-      else {
-        assert(isCfStringSection(isec));
-        cfStringSections.push_back(isec);
-      }
-    } else {
+    if (isHashable)
+      hashable.push_back(isec);
+    else
        isec->icfEqClass[0] = ++icfUniqueID;
-    }
    }
-  std::vector<ConcatInputSection *> hashable(codeSections);
-  hashable.insert(hashable.end(), cfStringSections.begin(),
-                  cfStringSections.end());
    parallelForEach(hashable,
                    [](ConcatInputSection *isec) { isec->hashForICF(); });
    // Now that every input section is either hashed or marked as unique, run the
    // segregation algorithm to detect foldable subsections.
-  // We dedup cfStringSections first since code sections may refer to them, but
-  // not vice-versa.
-  ICF(cfStringSections).run();
-  ICF(codeSections).run();
+  ICF(hashable).run();
  }
diff --git a/lld/MachO/InputSection.cpp b/lld/MachO/InputSection.cpp

index 5762e4e..b28bfc1 100644 (file)
--- a/lld/MachO/InputSection.cpp
+++ b/lld/MachO/InputSection.cpp
@@ -7,6 +7,7 @@
  //===----------------------------------------------------------------------===//
  
  #include "InputSection.h"
+#include "ConcatOutputSection.h"
  #include "Config.h"
  #include "InputFiles.h"
  #include "OutputSegment.h"
diff --git a/lld/MachO/SyntheticSections.cpp b/lld/MachO/SyntheticSections.cpp

index aaed5b8..d65cc9c 100644 (file)
--- a/lld/MachO/SyntheticSections.cpp
+++ b/lld/MachO/SyntheticSections.cpp
@@ -15,7 +15,6 @@
  #include "OutputSegment.h"
  #include "SymbolTable.h"
  #include "Symbols.h"
-#include "Writer.h"
  
  #include "lld/Common/ErrorHandler.h"
  #include "lld/Common/Memory.h"
@@ -596,6 +595,8 @@ void StubHelperSection::setup() {
  
    in.got->addEntry(stubBinder);
  
+  in.imageLoaderCache->parent =
+      ConcatOutputSection::getOrCreateForInput(in.imageLoaderCache);
    inputSections.push_back(in.imageLoaderCache);
    // Since this isn't in the symbol table or in any input file, the noDeadStrip
    // argument doesn't matter. It's kept alive by ImageLoaderCacheSection()
diff --git a/lld/MachO/SyntheticSections.h b/lld/MachO/SyntheticSections.h

index 162e69e..bbb7adc 100644 (file)
--- a/lld/MachO/SyntheticSections.h
+++ b/lld/MachO/SyntheticSections.h
@@ -15,6 +15,7 @@
  #include "OutputSection.h"
  #include "OutputSegment.h"
  #include "Target.h"
+#include "Writer.h"
  
  #include "llvm/ADT/DenseMap.h"
  #include "llvm/ADT/Hashing.h"
diff --git a/lld/MachO/UnwindInfoSection.cpp b/lld/MachO/UnwindInfoSection.cpp

index ea94c8d..3efc646 100644 (file)
--- a/lld/MachO/UnwindInfoSection.cpp
+++ b/lld/MachO/UnwindInfoSection.cpp
@@ -144,6 +144,7 @@ template <class Ptr>
  void UnwindInfoSectionImpl<Ptr>::addInput(ConcatInputSection *isec) {
    assert(isec->getSegName() == segment_names::ld &&
           isec->getName() == section_names::compactUnwind);
+  isec->parent = compactUnwindSection;
    compactUnwindSection->addInput(isec);
  }
  
diff --git a/lld/MachO/Writer.cpp b/lld/MachO/Writer.cpp

index f311a64..8686f11 100644 (file)
--- a/lld/MachO/Writer.cpp
+++ b/lld/MachO/Writer.cpp
@@ -76,10 +76,6 @@ public:
  
    LCUuid *uuidCommand = nullptr;
    OutputSegment *linkEditSegment = nullptr;
-
-  // Output sections are added to output segments in iteration order
-  // of ConcatOutputSection, so must have deterministic iteration order.
-  MapVector<NamePair, ConcatOutputSection *> concatOutputSections;
  };
  
  // LC_DYLD_INFO_ONLY stores the offsets of symbol import/export information.
@@ -879,16 +875,6 @@ static void sortSegmentsAndSections() {
    }
  }
  
-NamePair macho::maybeRenameSection(NamePair key) {
-  auto newNames = config->sectionRenameMap.find(key);
-  if (newNames != config->sectionRenameMap.end())
-    return newNames->second;
-  auto newName = config->segmentRenameMap.find(key.first);
-  if (newName != config->segmentRenameMap.end())
-    return std::make_pair(newName->second, key.second);
-  return key;
-}
-
  template <class LP> void Writer::createOutputSections() {
    TimeTraceScope timeScope("Create output sections");
    // First, create hidden sections
@@ -919,10 +905,7 @@ template <class LP> void Writer::createOutputSections() {
    for (ConcatInputSection *isec : inputSections) {
      if (isec->shouldOmitFromOutput())
        continue;
-    NamePair names = maybeRenameSection({isec->getSegName(), isec->getName()});
-    ConcatOutputSection *&osec = concatOutputSections[names];
-    if (!osec)
-      osec = make<ConcatOutputSection>(names.second);
+    ConcatOutputSection *osec = cast<ConcatOutputSection>(isec->parent);
      osec->addInput(isec);
      osec->inputOrder =
          std::min(osec->inputOrder, static_cast<int>(isec->outSecOff));
@@ -934,7 +917,8 @@ template <class LP> void Writer::createOutputSections() {
      StringRef segname = it.first.first;
      ConcatOutputSection *osec = it.second;
      assert(segname != segment_names::ld);
-    getOrCreateOutputSegment(segname)->addOutputSection(osec);
+    if (osec->isNeeded())
+      getOrCreateOutputSegment(segname)->addOutputSection(osec);
    }
  
    for (SyntheticSection *ssec : syntheticSections) {
diff --git a/lld/MachO/Writer.h b/lld/MachO/Writer.h

index 2a6cc63..56f6f7a 100644 (file)
--- a/lld/MachO/Writer.h
+++ b/lld/MachO/Writer.h
@@ -9,8 +9,6 @@
  #ifndef LLD_MACHO_WRITER_H
  #define LLD_MACHO_WRITER_H
  
-#include "Config.h"
-
  #include <cstdint>
  
  namespace lld {
@@ -29,8 +27,6 @@ public:
  
  template <class LP> void writeResult();
  
-NamePair maybeRenameSection(NamePair key);
-
  void createSyntheticSections();
  
  // Add bindings for symbols that need weak or non-lazy bindings.
diff --git a/lld/test/MachO/icf.s b/lld/test/MachO/icf.s

index 4367646..04b5676 100644 (file)
--- a/lld/test/MachO/icf.s
+++ b/lld/test/MachO/icf.s
@@ -25,15 +25,17 @@
  # CHECK:       [[#%x,SR]]    g   F __TEXT,__text _sr2
  # CHECK:       [[#%x,MR:]]   g   F __TEXT,__text _mr1
  # CHECK:       [[#%x,MR]]    g   F __TEXT,__text _mr2
+# CHECK:       [[#%x,K1:]]   g   O __TEXT,__foo  _k1
+# CHECK:       [[#%x,A:]]    g   F __TEXT,__text _k2
  ### FIXME: Mutually-recursive functions with identical bodies (see below)
  # COM:         [[#%x,XR:]]   g   F __TEXT,__text _xr1
  # COM:         [[#%x,XR]]    g   F __TEXT,__text _xr2
  
  # CHECK-LABEL: Disassembly of section __TEXT,__text:
  # CHECK:       [[#%x,MAIN]] <_main>:
-# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a3>
-# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a3>
-# CHECK-NEXT:  callq 0x[[#%x,A]]  <_a3>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
  # CHECK-NEXT:  callq 0x[[#%x,B]]  <_b>
  # CHECK-NEXT:  callq 0x[[#%x,B2]] <_b2>
  # CHECK-NEXT:  callq 0x[[#%x,C]]  <_c>
@@ -44,6 +46,8 @@
  # CHECK-NEXT:  callq 0x[[#%x,H]]  <_h>
  # CHECK-NEXT:  callq 0x[[#%x,I]]  <_i>
  # CHECK-NEXT:  callq 0x[[#%x,J]]  <_j>
+# CHECK-NEXT:  callq 0x[[#%x,K1]] <_k1>
+# CHECK-NEXT:  callq 0x[[#%x,A]]  <_k2>
  # CHECK-NEXT:  callq 0x[[#%x,SR]] <_sr2>
  # CHECK-NEXT:  callq 0x[[#%x,SR]] <_sr2>
  # CHECK-NEXT:  callq 0x[[#%x,MR]] <_mr2>
@@ -232,8 +236,38 @@ _j:
    ret
    .cfi_endproc
  
+### No fold: _k1 is in a different section from _a1
+.section __TEXT,__foo
+.globl _k1
+.p2align 2
+_k1:
+  callq _d
+  mov ___nan@GOTPCREL(%rip), %rax
+  callq ___isnan
+  movabs $_abs1a, %rdx
+  movl $0, %eax
+  ret
+  nopl (%rax)
+
+### Fold: _k2 is in a section that gets renamed and output as __text
+.section __TEXT,__StaticInit
+.globl _k2
+.p2align 2
+_k2:
+  callq _d
+  mov ___nan@GOTPCREL(%rip), %rax
+  callq ___isnan
+  movabs $_abs1a, %rdx
+  movl $0, %eax
+  ret
+## For some reason, llvm-mc generates different nop encodings when adding
+## padding for __StaticInit vs __text functions. So we explicitly specify the
+## nop here to make sure this function can be folded with _a1.
+  nopl (%rax)
+
  ### Fold: Simple recursion
  
+.text
  .globl _sr1
  .p2align 2
  _sr1:
@@ -311,6 +345,8 @@ _main:
    callq _h
    callq _i
    callq _j
+  callq _k1
+  callq _k2
    callq _sr1
    callq _sr2
    callq _mr1
author	Jez Ng <jezng@fb.com>
	Sat, 17 Jul 2021 17:42:26 +0000 (13:42 -0400)
committer	Jez Ng <jezng@fb.com>
	Sat, 17 Jul 2021 17:42:51 +0000 (13:42 -0400)
lld/MachO/ConcatOutputSection.cpp		patch \| blob \| history
lld/MachO/ConcatOutputSection.h		patch \| blob \| history
lld/MachO/Driver.cpp		patch \| blob \| history
lld/MachO/ICF.cpp		patch \| blob \| history
lld/MachO/InputSection.cpp		patch \| blob \| history
lld/MachO/SyntheticSections.cpp		patch \| blob \| history
lld/MachO/SyntheticSections.h		patch \| blob \| history
lld/MachO/UnwindInfoSection.cpp		patch \| blob \| history
lld/MachO/Writer.cpp		patch \| blob \| history
lld/MachO/Writer.h		patch \| blob \| history
lld/test/MachO/icf.s		patch \| blob \| history