COFF: Use (name, output characteristics) as a key when grouping input sections into...
authorPeter Collingbourne <peter@pcc.me.uk>
Fri, 20 Apr 2018 21:10:33 +0000 (21:10 +0000)
committerPeter Collingbourne <peter@pcc.me.uk>
Fri, 20 Apr 2018 21:10:33 +0000 (21:10 +0000)
This is what link.exe does and lets us avoid needing to worry about
merging output characteristics while adding input sections to output
sections.

With this change we can't process /merge in the same way as before
because sections with different output characteristics can still
be merged into one another. So this change moves the processing of
/merge to just before we assign addresses. In the case where there
are multiple output sections with the same name, link.exe only merges
the first section with the source name into the first section with
the target name, and we do the same.

At the same time I also implemented transitive merging (which means
that /merge:.c=.b /merge:.b=.a merges both .c and .b into .a).

This isn't quite enough though because link.exe has a special case for
.CRT in 32-bit mode: it processes sections whose output characteristics
are DATA | R | W as though the output characteristics were DATA | R
(so that they get merged into things like constructor lists in the
expected way). Chromium has a few such sections, and it turns out
that those sections were causing the problem that resulted in r318699
(merge .xdata into .rdata) being reverted: because of the previous
permission merging semantics, the .CRT sections were causing the entire
.rdata section to become writable, which caused the SEH runtime to
crash because it apparently requires .xdata to be read-only. This
change also implements the same special case.

This should unblock being able to merge .xdata into .rdata by default,
as well as .bss into .data, both of which will be done in followups.

Differential Revision: https://reviews.llvm.org/D45801

llvm-svn: 330479

lld/COFF/Writer.cpp
lld/COFF/Writer.h
lld/test/COFF/crt-chars.test [new file with mode: 0644]
lld/test/COFF/merge.test
lld/test/COFF/output-chars.test [new file with mode: 0644]
lld/test/COFF/unwind.test

index 5911466..63e556b 100644 (file)
@@ -151,6 +151,7 @@ private:
   void createMiscChunks();
   void createImportTables();
   void createExportTable();
+  void mergeSections();
   void assignAddresses();
   void removeEmptySections();
   void createSymbolAndStringTable();
@@ -201,6 +202,7 @@ private:
   OutputSection *TextSec;
   OutputSection *RdataSec;
   OutputSection *DataSec;
+  OutputSection *PdataSec;
   OutputSection *IdataSec;
   OutputSection *EdataSec;
   OutputSection *DidatSec;
@@ -234,14 +236,17 @@ void OutputSection::addChunk(Chunk *C) {
   C->setOutputSection(this);
 }
 
-void OutputSection::addPermissions(uint32_t C) {
-  Header.Characteristics |= C & PermMask;
-}
-
 void OutputSection::setPermissions(uint32_t C) {
   Header.Characteristics = C & PermMask;
 }
 
+void OutputSection::merge(OutputSection *Other) {
+  for (Chunk *C : Other->Chunks)
+    C->setOutputSection(this);
+  Chunks.insert(Chunks.end(), Other->Chunks.begin(), Other->Chunks.end());
+  Other->Chunks.clear();
+}
+
 // Write the section header to a given buffer.
 void OutputSection::writeHeaderTo(uint8_t *Buf) {
   auto *Hdr = reinterpret_cast<coff_section *>(Buf);
@@ -329,6 +334,7 @@ void Writer::run() {
   createMiscChunks();
   createImportTables();
   createExportTable();
+  mergeSections();
   assignAddresses();
   removeEmptySections();
   setSectionPermissions();
@@ -399,17 +405,13 @@ void Writer::createSections() {
   const uint32_t W = IMAGE_SCN_MEM_WRITE;
   const uint32_t X = IMAGE_SCN_MEM_EXECUTE;
 
-  SmallDenseMap<StringRef, OutputSection *> Sections;
-  auto CreateSection = [&](StringRef Name, uint32_t Perms) {
-    auto I = Config->Merge.find(Name);
-    if (I != Config->Merge.end())
-      Name = I->second;
-    OutputSection *&Sec = Sections[Name];
+  SmallDenseMap<std::pair<StringRef, uint32_t>, OutputSection *> Sections;
+  auto CreateSection = [&](StringRef Name, uint32_t OutChars) {
+    OutputSection *&Sec = Sections[{Name, OutChars}];
     if (!Sec) {
-      Sec = make<OutputSection>(Name);
+      Sec = make<OutputSection>(Name, OutChars);
       OutputSections.push_back(Sec);
     }
-    Sec->addPermissions(Perms);
     return Sec;
   };
 
@@ -418,15 +420,15 @@ void Writer::createSections() {
   CreateSection(".bss", BSS | R | W);
   RdataSec = CreateSection(".rdata", DATA | R);
   DataSec = CreateSection(".data", DATA | R | W);
-  CreateSection(".pdata", DATA | R);
+  PdataSec = CreateSection(".pdata", DATA | R);
   IdataSec = CreateSection(".idata", DATA | R);
   EdataSec = CreateSection(".edata", DATA | R);
   DidatSec = CreateSection(".didat", DATA | R);
   RsrcSec = CreateSection(".rsrc", DATA | R);
   RelocSec = CreateSection(".reloc", DATA | DISCARDABLE | R);
 
-  // Then bin chunks by name.
-  std::map<StringRef, std::vector<Chunk *>> Map;
+  // Then bin chunks by name and output characteristics.
+  std::map<std::pair<StringRef, uint32_t>, std::vector<Chunk *>> Map;
   for (Chunk *C : Symtab->getChunks()) {
     auto *SC = dyn_cast<SectionChunk>(C);
     if (SC && !SC->isLive()) {
@@ -434,7 +436,7 @@ void Writer::createSections() {
         SC->printDiscardedMessage();
       continue;
     }
-    Map[C->getSectionName()].push_back(C);
+    Map[{C->getSectionName(), C->getOutputCharacteristics()}].push_back(C);
   }
 
   // Process an /order option.
@@ -447,18 +449,20 @@ void Writer::createSections() {
   // discarded when determining output section. So, .text$foo
   // contributes to .text, for example. See PE/COFF spec 3.2.
   for (auto Pair : Map) {
-    StringRef Name = getOutputSectionName(Pair.first);
-    if (Name == ".pdata") {
-      if (!FirstPdata)
-        FirstPdata = Pair.second.front();
-      LastPdata = Pair.second.back();
-    }
-    OutputSection *Sec = CreateSection(Name, 0);
+    StringRef Name = getOutputSectionName(Pair.first.first);
+    uint32_t OutChars = Pair.first.second;
+
+    // In link.exe, there is a special case for the I386 target where .CRT
+    // sections are treated as if they have output characteristics DATA | R if
+    // their characteristics are DATA | R | W. This implements the same special
+    // case for all architectures.
+    if (Name == ".CRT")
+      OutChars = DATA | R;
+
+    OutputSection *Sec = CreateSection(Name, OutChars);
     std::vector<Chunk *> &Chunks = Pair.second;
-    for (Chunk *C : Chunks) {
+    for (Chunk *C : Chunks)
       Sec->addChunk(C);
-      Sec->addPermissions(C->getOutputCharacteristics());
-    }
   }
 
   // Finally, move some output sections to the end.
@@ -696,6 +700,37 @@ void Writer::createSymbolAndStringTable() {
   FileSize = alignTo(FileOff, SectorSize);
 }
 
+void Writer::mergeSections() {
+  if (!PdataSec->getChunks().empty()) {
+    FirstPdata = PdataSec->getChunks().front();
+    LastPdata = PdataSec->getChunks().back();
+  }
+
+  for (auto &P : Config->Merge) {
+    StringRef ToName = P.second;
+    if (P.first == ToName)
+      continue;
+    StringSet<> Names;
+    while (1) {
+      if (!Names.insert(ToName).second)
+        fatal("/merge: cycle found for section '" + P.first + "'");
+      auto I = Config->Merge.find(ToName);
+      if (I == Config->Merge.end())
+        break;
+      ToName = I->second;
+    }
+    OutputSection *From = findSection(P.first);
+    OutputSection *To = findSection(ToName);
+    if (!From)
+      continue;
+    if (!To) {
+      From->Name = ToName;
+      continue;
+    }
+    To->merge(From);
+  }
+}
+
 // Visits all sections to assign incremental, non-overlapping RVAs and
 // file offsets.
 void Writer::assignAddresses() {
@@ -1100,8 +1135,9 @@ void Writer::setSectionPermissions() {
   for (auto &P : Config->Section) {
     StringRef Name = P.first;
     uint32_t Perm = P.second;
-    if (auto *Sec = findSection(Name))
-      Sec->setPermissions(Perm);
+    for (OutputSection *Sec : OutputSections)
+      if (Sec->Name == Name)
+        Sec->setPermissions(Perm);
   }
 }
 
index a76a792..d37276c 100644 (file)
@@ -30,8 +30,11 @@ void writeResult();
 // non-overlapping file offsets and RVAs.
 class OutputSection {
 public:
-  OutputSection(llvm::StringRef N) : Name(N), Header({}) {}
+  OutputSection(llvm::StringRef N, uint32_t Chars) : Name(N) {
+    Header.Characteristics = Chars;
+  }
   void addChunk(Chunk *C);
+  void merge(OutputSection *Other);
   ArrayRef<Chunk *> getChunks() { return Chunks; }
   void addPermissions(uint32_t C);
   void setPermissions(uint32_t C);
@@ -57,7 +60,7 @@ public:
   uint32_t SectionIndex = 0;
 
   llvm::StringRef Name;
-  llvm::object::coff_section Header;
+  llvm::object::coff_section Header = {};
 
 private:
   uint32_t StringTableOff = 0;
diff --git a/lld/test/COFF/crt-chars.test b/lld/test/COFF/crt-chars.test
new file mode 100644 (file)
index 0000000..e685631
--- /dev/null
@@ -0,0 +1,32 @@
+# RUN: yaml2obj %s > %t.obj
+# RUN: lld-link /out:%t.dll /entry:__ImageBase /dll %t.obj
+# RUN: llvm-readobj -sections -section-data %t.dll | FileCheck %s
+
+# CHECK: Name: .CRT
+# CHECK: Characteristics [
+# CHECK-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# CHECK-NEXT: IMAGE_SCN_MEM_READ
+# CHECK-NEXT: ]
+# CHECK-NEXT: SectionData (
+# CHECK-NEXT: 010203
+# CHECK-NEXT: )
+
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [  ]
+sections:
+  - Name:            .CRT$XCZ
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    SectionData:     03
+  - Name:            .CRT$XCU
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+    Alignment:       1
+    SectionData:     02
+  - Name:            .CRT$XCA
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    SectionData:     01
+symbols:
+...
index f0cf6bb..10a5672 100644 (file)
@@ -3,6 +3,10 @@
 # RUN:   /merge:.foo=.abc /merge:.bar=.def %t.obj /debug
 # RUN: llvm-readobj -sections %t.exe | FileCheck %s
 
+# RUN: lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN:   /merge:.foo=.bar /merge:.bar=.abc %t.obj /debug
+# RUN: llvm-readobj -sections %t.exe | FileCheck --check-prefix=CHECK2 %s
+
 # RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
 # RUN:   /merge:.rsrc=.foo %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-RSRC %s
 # RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
 # RUN:   /merge:.reloc=.foo %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-RELOC %s
 # RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
 # RUN:   /merge:.foo=.reloc %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-RELOC %s
+# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN:   /merge:.foo=.foo1 /merge:.foo1=.foo %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-CYCLE %s
+# RUN: not lld-link /out:%t.exe /entry:main /subsystem:console /force \
+# RUN:   /merge:.foo=.foo1 /merge:.foo1=.foo2 /merge:.foo2=.foo1 %t.obj /debug 2>&1 | FileCheck --check-prefix=NO-CYCLE %s
 
 # CHECK: Name: .def
 # CHECK: Name: .abc
 
+# CHECK2-NOT: Name: .bar
+# CHECK2: Name: .abc
+# CHECK2-NOT: Name: .bar
+
 # NO-RSRC: /merge: cannot merge '.rsrc' with any section
 # NO-RELOC: /merge: cannot merge '.reloc' with any section
 
+# NO-CYCLE: /merge: cycle found for section '.foo'
+
 --- !COFF
 header:
   Machine:         IMAGE_FILE_MACHINE_AMD64
diff --git a/lld/test/COFF/output-chars.test b/lld/test/COFF/output-chars.test
new file mode 100644 (file)
index 0000000..29f6cc1
--- /dev/null
@@ -0,0 +1,106 @@
+# RUN: yaml2obj %s > %t.obj
+# RUN: lld-link /out:%t.dll /entry:__ImageBase /dll %t.obj
+# RUN: llvm-readobj -sections %t.dll | FileCheck %s
+# RUN: lld-link /out:%t.dll /entry:__ImageBase /dll %t.obj /section:.foo,rwe
+# RUN: llvm-readobj -sections %t.dll | FileCheck --check-prefix=SECTION %s
+# RUN: lld-link /out:%t.dll /entry:__ImageBase /dll %t.obj /merge:.foo=.bar
+# RUN: llvm-readobj -sections -section-data %t.dll | FileCheck --check-prefix=MERGE %s
+# RUN: lld-link /out:%t.dll /entry:__ImageBase /dll %t.obj /merge:.foo=.bar /section:.foo,rwe
+# RUN: llvm-readobj -sections %t.dll | FileCheck --check-prefix=MERGE-SECTION %s
+
+# CHECK: Name: .foo
+# CHECK: Characteristics [
+# CHECK-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# CHECK-NEXT: IMAGE_SCN_MEM_READ
+# CHECK-NEXT: ]
+
+# CHECK: Name: .foo
+# CHECK: Characteristics [
+# CHECK-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# CHECK-NEXT: IMAGE_SCN_MEM_READ
+# CHECK-NEXT: IMAGE_SCN_MEM_WRITE
+# CHECK-NEXT: ]
+
+# SECTION: Name: .foo
+# SECTION: Characteristics [
+# SECTION-NEXT: IMAGE_SCN_MEM_EXECUTE
+# SECTION-NEXT: IMAGE_SCN_MEM_READ
+# SECTION-NEXT: IMAGE_SCN_MEM_WRITE
+# SECTION-NEXT: ]
+
+# SECTION: Name: .foo
+# SECTION: Characteristics [
+# SECTION-NEXT: IMAGE_SCN_MEM_EXECUTE
+# SECTION-NEXT: IMAGE_SCN_MEM_READ
+# SECTION-NEXT: IMAGE_SCN_MEM_WRITE
+# SECTION-NEXT: ]
+
+# MERGE: Name: .bar
+# MERGE: Characteristics [
+# MERGE-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# MERGE-NEXT: IMAGE_SCN_MEM_READ
+# MERGE-NEXT: ]
+# MERGE-NEXT: SectionData (
+# MERGE-NEXT: 0000: 0301
+
+# MERGE: Name: .bar
+# MERGE: Characteristics [
+# MERGE-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# MERGE-NEXT: IMAGE_SCN_MEM_READ
+# MERGE-NEXT: IMAGE_SCN_MEM_WRITE
+# MERGE-NEXT: ]
+# MERGE-NEXT: SectionData (
+# MERGE-NEXT: 0000: 04
+
+# MERGE: Name: .foo
+# MERGE: Characteristics [
+# MERGE-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# MERGE-NEXT: IMAGE_SCN_MEM_READ
+# MERGE-NEXT: IMAGE_SCN_MEM_WRITE
+# MERGE-NEXT: ]
+# MERGE-NEXT: SectionData (
+# MERGE-NEXT: 0000: 02
+
+# MERGE-SECTION: Name: .bar
+# MERGE-SECTION: Characteristics [
+# MERGE-SECTION-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# MERGE-SECTION-NEXT: IMAGE_SCN_MEM_READ
+# MERGE-SECTION-NEXT: ]
+
+# MERGE-SECTION: Name: .bar
+# MERGE-SECTION: Characteristics [
+# MERGE-SECTION-NEXT: IMAGE_SCN_CNT_INITIALIZED_DATA
+# MERGE-SECTION-NEXT: IMAGE_SCN_MEM_READ
+# MERGE-SECTION-NEXT: IMAGE_SCN_MEM_WRITE
+# MERGE-SECTION-NEXT: ]
+
+# MERGE-SECTION: Name: .foo
+# MERGE-SECTION: Characteristics [
+# MERGE-SECTION-NEXT: IMAGE_SCN_MEM_EXECUTE
+# MERGE-SECTION-NEXT: IMAGE_SCN_MEM_READ
+# MERGE-SECTION-NEXT: IMAGE_SCN_MEM_WRITE
+# MERGE-SECTION-NEXT: ]
+
+--- !COFF
+header:
+  Machine:         IMAGE_FILE_MACHINE_AMD64
+  Characteristics: [  ]
+sections:
+  - Name:            .foo
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    SectionData:     01
+  - Name:            .foo
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+    Alignment:       1
+    SectionData:     02
+  - Name:            .bar
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ ]
+    Alignment:       1
+    SectionData:     03
+  - Name:            .bar
+    Characteristics: [ IMAGE_SCN_CNT_INITIALIZED_DATA, IMAGE_SCN_MEM_READ, IMAGE_SCN_MEM_WRITE ]
+    Alignment:       1
+    SectionData:     04
+symbols:
+...
index c35164b..674e243 100644 (file)
@@ -12,7 +12,7 @@
 # FIXME: llvm-readobj currently does not understand files with .pdata merged
 # into .rdata. But we can at least check that the section headers look correct.
 #
-# HEADER-MERGE: ExceptionTableRVA: 0x2000
+# HEADER-MERGE: ExceptionTableRVA: 0x2004
 # HEADER-MERGE-NEXT: ExceptionTableSize: 0x30
 # HEADER-MERGE: Name: .rdata
 # HEADER-MERGE-NEXT: VirtualSize: 0x34