[WebAssembly] GC constructor functions in otherwise unused archive objects
authorDan Gohman <dev@sunfishcode.online>
Thu, 1 Oct 2020 03:00:04 +0000 (20:00 -0700)
committerDan Gohman <dev@sunfishcode.online>
Tue, 13 Oct 2020 01:54:57 +0000 (18:54 -0700)
This allows `__wasilibc_populate_libpreopen` to be GC'd in more cases
where it isn't needed, including when linked from Rust's libstd.

Differential Revision: https://reviews.llvm.org/D85062

12 files changed:
lld/test/wasm/Inputs/ctor-ctor.s [new file with mode: 0644]
lld/test/wasm/Inputs/ctor-lib.s [new file with mode: 0644]
lld/test/wasm/Inputs/ctor-setup-call-def.s [new file with mode: 0644]
lld/test/wasm/Inputs/ctor-setup.s [new file with mode: 0644]
lld/test/wasm/Inputs/ctor-start.s [new file with mode: 0644]
lld/test/wasm/ctor-gc-setup.test [new file with mode: 0644]
lld/test/wasm/ctor-gc.test [new file with mode: 0644]
lld/test/wasm/ctor-no-gc.test [new file with mode: 0644]
lld/wasm/InputFiles.h
lld/wasm/MarkLive.cpp
lld/wasm/Symbols.cpp
lld/wasm/Writer.cpp

diff --git a/lld/test/wasm/Inputs/ctor-ctor.s b/lld/test/wasm/Inputs/ctor-ctor.s
new file mode 100644 (file)
index 0000000..cf62182
--- /dev/null
@@ -0,0 +1,15 @@
+       .section        .text.def,"",@
+       .globl def
+def:
+       .functype       def () -> ()
+       end_function
+
+       .section        .text.test_ctor,"",@
+       .globl test_ctor
+test_ctor:
+       .functype       test_ctor () -> ()
+       end_function
+
+       .section        .init_array,"",@
+       .p2align        2
+       .int32 test_ctor
diff --git a/lld/test/wasm/Inputs/ctor-lib.s b/lld/test/wasm/Inputs/ctor-lib.s
new file mode 100644 (file)
index 0000000..67dc0b0
--- /dev/null
@@ -0,0 +1,14 @@
+       .section        .text.lib_func,"",@
+       .globl  lib_func
+lib_func:
+       .functype       lib_func () -> ()
+       end_function
+
+       .section        .text.unused_lib_func,"",@
+       .globl unused_lib_func
+unused_lib_func:
+       .functype       unused_lib_func () -> ()
+       call def
+       end_function
+
+       .functype       def () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-setup-call-def.s b/lld/test/wasm/Inputs/ctor-setup-call-def.s
new file mode 100644 (file)
index 0000000..b0d09dd
--- /dev/null
@@ -0,0 +1,21 @@
+# Like Inputs/ctor-setup.s, except it calls `def` instead of `lib_func`,
+# so it pulls in the .o file containing `ctor`.
+
+       .section        .text._start,"",@
+       .globl  _start
+_start:
+       .functype       _start () -> ()
+       end_function
+
+       .section        .text.setup,"",@
+       .globl setup
+setup:
+       .functype       setup () -> ()
+       call def
+       end_function
+
+       .section        .init_array,"",@
+       .p2align        2
+       .int32 setup
+
+.functype       def () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-setup.s b/lld/test/wasm/Inputs/ctor-setup.s
new file mode 100644 (file)
index 0000000..8149548
--- /dev/null
@@ -0,0 +1,19 @@
+# Like Inputs/ctor-start.s, except it calls `lib_func` from a ctor
+# instead of from `_start`.
+
+       .globl  _start
+_start:
+       .functype       _start () -> ()
+       end_function
+
+       .globl  setup
+setup:
+       .functype       setup () -> ()
+       call    lib_func
+       end_function
+
+       .section        .init_array,"",@
+       .p2align        2
+       .int32  setup
+
+        .functype       lib_func () -> ()
diff --git a/lld/test/wasm/Inputs/ctor-start.s b/lld/test/wasm/Inputs/ctor-start.s
new file mode 100644 (file)
index 0000000..8f85fd2
--- /dev/null
@@ -0,0 +1,7 @@
+       .globl _start
+_start:
+       .functype       _start () -> ()
+       call lib_func
+       end_function
+
+       .functype       lib_func () -> ()
diff --git a/lld/test/wasm/ctor-gc-setup.test b/lld/test/wasm/ctor-gc-setup.test
new file mode 100644 (file)
index 0000000..2076a42
--- /dev/null
@@ -0,0 +1,12 @@
+; Like ctor-gc.test, but main object calls a function from its constructor,
+; which shouldn't matter; `ctor` shouldn't be pulled in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup.s -o %t.setup.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: Name: test_ctor
diff --git a/lld/test/wasm/ctor-gc.test b/lld/test/wasm/ctor-gc.test
new file mode 100644 (file)
index 0000000..18deab5
--- /dev/null
@@ -0,0 +1,12 @@
+; Verify that constructors from a .o file which it initially depends on but
+; doesn't ultimately contribute to the final link are not included.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-start.s -o %t.start.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.start.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK-NOT: __wasm_call_ctors
diff --git a/lld/test/wasm/ctor-no-gc.test b/lld/test/wasm/ctor-no-gc.test
new file mode 100644 (file)
index 0000000..04e3fdc
--- /dev/null
@@ -0,0 +1,12 @@
+; Like ctor-gc-setup.test, but it calls a different function, so it does pull
+; in the object containing `ctor`, so `ctor` is linked in.
+;
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-ctor.s -o %t.ctor.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-lib.s -o %t.lib.o
+; RUN: llvm-mc -filetype=obj -triple=wasm32-unknown-unknown %p/Inputs/ctor-setup-call-def.s -o %t.setup-call-def.o
+; RUN: rm -f %t.lib.a
+; RUN: llvm-ar rcs %t.lib.a %t.lib.o %t.ctor.o
+; RUN: wasm-ld %t.setup-call-def.o %t.lib.a -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; CHECK: Name: test_ctor
index eb32056..0abd47a 100644 (file)
@@ -60,8 +60,14 @@ public:
 
   MutableArrayRef<Symbol *> getMutableSymbols() { return symbols; }
 
+  // An InputFile is considered live if any of the symbols defined by it
+  // are live.
+  void markLive() { live = true; }
+  bool isLive() const { return live; }
+
 protected:
-  InputFile(Kind k, MemoryBufferRef m) : mb(m), fileKind(k) {}
+  InputFile(Kind k, MemoryBufferRef m)
+      : mb(m), fileKind(k), live(!config->gcSections) {}
   MemoryBufferRef mb;
 
   // List of all symbols referenced or defined by this file.
@@ -69,6 +75,7 @@ protected:
 
 private:
   const Kind fileKind;
+  bool live;
 };
 
 // .a file (ar archive)
@@ -92,6 +99,10 @@ public:
   explicit ObjFile(MemoryBufferRef m, StringRef archiveName)
       : InputFile(ObjectKind, m) {
     this->archiveName = std::string(archiveName);
+
+    // If this isn't part of an archive, it's eagerly linked, so mark it live.
+    if (archiveName.empty())
+      markLive();
   }
   static bool classof(const InputFile *f) { return f->kind() == ObjectKind; }
 
@@ -156,6 +167,10 @@ public:
   explicit BitcodeFile(MemoryBufferRef m, StringRef archiveName)
       : InputFile(BitcodeKind, m) {
     this->archiveName = std::string(archiveName);
+
+    // If this isn't part of an archive, it's eagerly linked, so mark it live.
+    if (archiveName.empty())
+      markLive();
   }
   static bool classof(const InputFile *f) { return f->kind() == BitcodeKind; }
 
index 2766eec..4bce688 100644 (file)
@@ -42,6 +42,7 @@ public:
 
 private:
   void enqueue(Symbol *sym);
+  void enqueueInitFunctions(const ObjFile *sym);
   void markSymbol(Symbol *sym);
   void mark();
   bool isCallCtorsLive();
@@ -56,11 +57,35 @@ void MarkLive::enqueue(Symbol *sym) {
   if (!sym || sym->isLive())
     return;
   LLVM_DEBUG(dbgs() << "markLive: " << sym->getName() << "\n");
+
+  InputFile *file = sym->getFile();
+  bool needInitFunctions = file && !file->isLive() && sym->isDefined();
+
   sym->markLive();
+
+  // Mark ctor functions in the object that defines this symbol live.
+  // The ctor functions are all referenced by the synthetic callCtors
+  // function. However, this function does not contain relocations so we
+  // have to manually mark the ctors as live.
+  if (needInitFunctions)
+    enqueueInitFunctions(cast<ObjFile>(file));
+
   if (InputChunk *chunk = sym->getChunk())
     queue.push_back(chunk);
 }
 
+// The ctor functions are all referenced by the synthetic callCtors
+// function.  However, this function does not contain relocations so we
+// have to manually mark the ctors as live.
+void MarkLive::enqueueInitFunctions(const ObjFile *obj) {
+  const WasmLinkingData &l = obj->getWasmObj()->linkingData();
+  for (const WasmInitFunc &f : l.InitFunctions) {
+    auto *initSym = obj->getFunctionSymbol(f.Symbol);
+    if (!initSym->isDiscarded())
+      enqueue(initSym);
+  }
+}
+
 void MarkLive::run() {
   // Add GC root symbols.
   if (!config->entry.empty())
@@ -75,31 +100,24 @@ void MarkLive::run() {
   if (Symbol *callDtors = WasmSym::callDtors)
     enqueue(callDtors);
 
-  // The ctor functions are all referenced by the synthetic callCtors
-  // function.  However, this function does not contain relocations so we
-  // have to manually mark the ctors as live.
-  for (const ObjFile *obj : symtab->objectFiles) {
-    const WasmLinkingData &l = obj->getWasmObj()->linkingData();
-    for (const WasmInitFunc &f : l.InitFunctions) {
-      auto *initSym = obj->getFunctionSymbol(f.Symbol);
-      if (!initSym->isDiscarded())
-        enqueue(initSym);
-    }
-  }
-
   // In Emscripten-style PIC, `__wasm_call_ctors` calls `__wasm_apply_relocs`.
   if (config->isPic)
     enqueue(WasmSym::applyRelocs);
 
-  // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
-  // live so that we assign it an index and call it.
-  if (isCallCtorsLive())
-    enqueue(WasmSym::callCtors);
-
   if (config->sharedMemory && !config->shared)
     enqueue(WasmSym::initMemory);
 
+  // Enqueue constructors in objects explicitly live from the command-line.
+  for (const ObjFile *obj : symtab->objectFiles)
+    if (obj->isLive())
+      enqueueInitFunctions(obj);
+
   mark();
+
+  // If we have any non-discarded init functions, mark `__wasm_call_ctors` as
+  // live so that we assign it an index and call it.
+  if (isCallCtorsLive())
+    WasmSym::callCtors->markLive();
 }
 
 void MarkLive::mark() {
@@ -181,9 +199,11 @@ bool MarkLive::isCallCtorsLive() {
   // it can call them.
   for (const ObjFile *file : symtab->objectFiles) {
     const WasmLinkingData &l = file->getWasmObj()->linkingData();
-    for (const WasmInitFunc &f : l.InitFunctions)
-      if (!file->getFunctionSymbol(f.Symbol)->isDiscarded())
+    for (const WasmInitFunc &f : l.InitFunctions) {
+      auto *sym = file->getFunctionSymbol(f.Symbol);
+      if (!sym->isDiscarded() && sym->isLive())
         return true;
+    }
   }
 
   return false;
index d69ef00..e92af6c 100644 (file)
@@ -132,6 +132,8 @@ bool Symbol::isLive() const {
 
 void Symbol::markLive() {
   assert(!isDiscarded());
+  if (file != NULL)
+    file->markLive();
   if (auto *g = dyn_cast<DefinedGlobal>(this))
     g->global->live = true;
   if (auto *e = dyn_cast<DefinedEvent>(this))
index 3161831..aaa2974 100644 (file)
@@ -1112,9 +1112,8 @@ void Writer::calculateInitFunctions() {
     for (const WasmInitFunc &f : l.InitFunctions) {
       FunctionSymbol *sym = file->getFunctionSymbol(f.Symbol);
       // comdat exclusions can cause init functions be discarded.
-      if (sym->isDiscarded())
+      if (sym->isDiscarded() || !sym->isLive())
         continue;
-      assert(sym->isLive());
       if (sym->signature->Params.size() != 0)
         error("constructor functions cannot take arguments: " + toString(*sym));
       LLVM_DEBUG(dbgs() << "initFunctions: " << toString(*sym) << "\n");