[WebAssembly] Handle weak undefined functions with a synthetic stub
authorNicholas Wilson <nicholas@nicholaswilson.me.uk>
Fri, 9 Mar 2018 17:06:38 +0000 (17:06 +0000)
committerNicholas Wilson <nicholas@nicholaswilson.me.uk>
Fri, 9 Mar 2018 17:06:38 +0000 (17:06 +0000)
This error case is described in Linking.md. The operand for call requires
generation of a synthetic stub.

Differential Revision: https://reviews.llvm.org/D44028

llvm-svn: 327151

lld/test/wasm/undefined-weak-call.ll [new file with mode: 0644]
lld/wasm/Driver.cpp
lld/wasm/InputFiles.cpp
lld/wasm/MarkLive.cpp
lld/wasm/Symbols.cpp
lld/wasm/Symbols.h

diff --git a/lld/test/wasm/undefined-weak-call.ll b/lld/test/wasm/undefined-weak-call.ll
new file mode 100644 (file)
index 0000000..29630d0
--- /dev/null
@@ -0,0 +1,117 @@
+; RUN: llc -filetype=obj %s -o %t.o
+; RUN: wasm-ld --check-signatures --no-entry %t.o -o %t.wasm
+; RUN: obj2yaml %t.wasm | FileCheck %s
+
+; Check that calling an undefined weak function generates an appropriate stub
+; that will fail at runtime with "unreachable".
+
+target triple = "wasm32-unknown-unknown-wasm"
+
+declare extern_weak void @weakFunc1()
+declare extern_weak void @weakFunc2()         ; same signature
+declare extern_weak void @weakFunc3(i32 %arg) ; different
+declare extern_weak void @weakFunc4()         ; should be GC'd as not called
+
+define i32 @callWeakFuncs() {
+  call void @weakFunc1()
+  call void @weakFunc2()
+  call void @weakFunc3(i32 2)
+  %addr1 = ptrtoint void ()* @weakFunc1 to i32
+  %addr4 = ptrtoint void ()* @weakFunc4 to i32
+  %sum = add i32 %addr1, %addr4
+  ret i32 %sum
+}
+
+; CHECK:      --- !WASM
+; CHECK-NEXT: FileHeader:
+; CHECK-NEXT:   Version:         0x00000001
+; CHECK-NEXT: Sections:
+; CHECK-NEXT:   - Type:            TYPE
+; CHECK-NEXT:     Signatures:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         ReturnType:      I32
+; CHECK-NEXT:         ParamTypes:
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         ReturnType:      NORESULT
+; CHECK-NEXT:         ParamTypes:
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         ReturnType:      NORESULT
+; CHECK-NEXT:         ParamTypes:
+; CHECK-NEXT:           - I32
+; CHECK-NEXT:   - Type:            FUNCTION
+; CHECK-NEXT:     FunctionTypes:   [ 0, 1, 1, 1, 2 ]
+; CHECK-NEXT:   - Type:            TABLE
+; CHECK-NEXT:     Tables:
+; CHECK-NEXT:       - ElemType:        ANYFUNC
+; CHECK-NEXT:         Limits:
+; CHECK-NEXT:           Flags:           [ HAS_MAX ]
+; CHECK-NEXT:           Initial:         0x00000001
+; CHECK-NEXT:           Maximum:         0x00000001
+; CHECK-NEXT:   - Type:            MEMORY
+; CHECK-NEXT:     Memories:
+; CHECK-NEXT:       - Initial:         0x00000002
+; CHECK-NEXT:   - Type:            GLOBAL
+; CHECK-NEXT:     Globals:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Type:            I32
+; CHECK-NEXT:         Mutable:         true
+; CHECK-NEXT:         InitExpr:
+; CHECK-NEXT:           Opcode:          I32_CONST
+; CHECK-NEXT:           Value:           66560
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Type:            I32
+; CHECK-NEXT:         Mutable:         false
+; CHECK-NEXT:         InitExpr:
+; CHECK-NEXT:           Opcode:          I32_CONST
+; CHECK-NEXT:           Value:           66560
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         Type:            I32
+; CHECK-NEXT:         Mutable:         false
+; CHECK-NEXT:         InitExpr:
+; CHECK-NEXT:           Opcode:          I32_CONST
+; CHECK-NEXT:           Value:           1024
+; CHECK-NEXT:   - Type:            EXPORT
+; CHECK-NEXT:     Exports:
+; CHECK-NEXT:       - Name:            memory
+; CHECK-NEXT:         Kind:            MEMORY
+; CHECK-NEXT:         Index:           0
+; CHECK-NEXT:       - Name:            __heap_base
+; CHECK-NEXT:         Kind:            GLOBAL
+; CHECK-NEXT:         Index:           1
+; CHECK-NEXT:       - Name:            __data_end
+; CHECK-NEXT:         Kind:            GLOBAL
+; CHECK-NEXT:         Index:           2
+; CHECK-NEXT:       - Name:            callWeakFuncs
+; CHECK-NEXT:         Kind:            FUNCTION
+; CHECK-NEXT:         Index:           0
+; CHECK-NEXT:   - Type:            CODE
+; CHECK-NEXT:     Functions:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Locals:
+; CHECK-NEXT:         Body:            10828080800010838080800041021084808080004180808080004180808080006A0B
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Locals:
+; CHECK-NEXT:         Body:            0B
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         Locals:
+; CHECK-NEXT:         Body:            000B
+; CHECK-NEXT:       - Index:           3
+; CHECK-NEXT:         Locals:
+; CHECK-NEXT:         Body:            000B
+; CHECK-NEXT:       - Index:           4
+; CHECK-NEXT:         Locals:
+; CHECK-NEXT:         Body:            000B
+; CHECK-NEXT:   - Type:            CUSTOM
+; CHECK-NEXT:     Name:            name
+; CHECK-NEXT:     FunctionNames:
+; CHECK-NEXT:       - Index:           0
+; CHECK-NEXT:         Name:            callWeakFuncs
+; CHECK-NEXT:       - Index:           1
+; CHECK-NEXT:         Name:            __wasm_call_ctors
+; CHECK-NEXT:       - Index:           2
+; CHECK-NEXT:         Name:            undefined function weakFunc1
+; CHECK-NEXT:       - Index:           3
+; CHECK-NEXT:         Name:            undefined function weakFunc2
+; CHECK-NEXT:       - Index:           4
+; CHECK-NEXT:         Name:            undefined function weakFunc3
+; CHECK-NEXT: ...
index c3f53e5..98d1f5d 100644 (file)
@@ -215,6 +215,44 @@ static StringRef getEntry(opt::InputArgList &Args, StringRef Default) {
   return Arg->getValue();
 }
 
+static const uint8_t UnreachableFn[] = {
+    0x03 /* ULEB length */, 0x00 /* ULEB num locals */,
+    0x00 /* opcode unreachable */, 0x0b /* opcode end */
+};
+
+// For weak undefined functions, there may be "call" instructions that reference
+// the symbol. In this case, we need to synthesise a dummy/stub function that
+// will abort at runtime, so that relocations can still provided an operand to
+// the call instruction that passes Wasm validation.
+static void handleWeakUndefines() {
+  for (Symbol *Sym : Symtab->getSymbols()) {
+    if (!Sym->isUndefined() || !Sym->isWeak())
+      continue;
+    auto *FuncSym = dyn_cast<FunctionSymbol>(Sym);
+    if (!FuncSym)
+      continue;
+
+    // It is possible for undefined functions not to have a signature (eg. if
+    // added via "--undefined"), but weak undefined ones do have a signature.
+    assert(FuncSym->getFunctionType());
+    const WasmSignature &Sig = *FuncSym->getFunctionType();
+
+    // Add a synthetic dummy for weak undefined functions.  These dummies will
+    // be GC'd if not used as the target of any "call" instructions.
+    StringRef StubName =
+        Saver.save("undefined function " + toString(*Sym, false));
+    SyntheticFunction *Func = make<SyntheticFunction>(Sig, StubName);
+    Func->setBody(UnreachableFn);
+    // Ensure it compares equal to the null pointer, and so that table relocs
+    // don't pull in the stub body (only call-operand relocs should do that).
+    Func->setTableIndex(0);
+    Symtab->SyntheticFunctions.emplace_back(Func);
+    // Hide our dummy to prevent export.
+    uint32_t Flags = WASM_SYMBOL_VISIBILITY_HIDDEN;
+    replaceSymbol<DefinedFunction>(Sym, Sym->getName(), Flags, nullptr, Func);
+  }
+}
+
 void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   WasmOptTable Parser;
   opt::InputArgList Args = Parser.parse(ArgsArr.slice(1));
@@ -326,6 +364,10 @@ void LinkerDriver::link(ArrayRef<const char *> ArgsArr) {
   for (InputFile *F : Files)
     Symtab->addFile(F);
 
+  // Add synthetic dummies for weak undefined functions.
+  if (!Config->Relocatable)
+    handleWeakUndefines();
+
   // Make sure we have resolved all symbols.
   if (!Config->Relocatable && !Config->AllowUndefined) {
     Symtab->reportRemainingUndefines();
index 2c65c7b..12ee9ec 100644 (file)
@@ -65,14 +65,8 @@ uint32_t ObjFile::calcNewIndex(const WasmRelocation &Reloc) const {
 uint32_t ObjFile::calcNewValue(const WasmRelocation &Reloc) const {
   switch (Reloc.Type) {
   case R_WEBASSEMBLY_TABLE_INDEX_I32:
-  case R_WEBASSEMBLY_TABLE_INDEX_SLEB: {
-    // The null case is possible, if you take the address of a weak function
-    // that's simply not supplied.
-    FunctionSymbol *Sym = getFunctionSymbol(Reloc.Index);
-    if (Sym->hasTableIndex())
-      return Sym->getTableIndex();
-    return 0;
-  }
+  case R_WEBASSEMBLY_TABLE_INDEX_SLEB:
+    return getFunctionSymbol(Reloc.Index)->getTableIndex();
   case R_WEBASSEMBLY_MEMORY_ADDR_SLEB:
   case R_WEBASSEMBLY_MEMORY_ADDR_I32:
   case R_WEBASSEMBLY_MEMORY_ADDR_LEB:
index 22211c1..9b72697 100644 (file)
@@ -73,8 +73,25 @@ void lld::wasm::markLive() {
     InputChunk *C = Q.pop_back_val();
 
     for (const WasmRelocation Reloc : C->getRelocations()) {
-      if (Reloc.Type != R_WEBASSEMBLY_TYPE_INDEX_LEB)
-        Enqueue(C->File->getSymbol(Reloc.Index));
+      if (Reloc.Type == R_WEBASSEMBLY_TYPE_INDEX_LEB)
+        continue;
+      Symbol *Sym = C->File->getSymbol(Reloc.Index);
+
+      // If the function has been assigned the special index zero in the table,
+      // the relocation doesn't pull in the function body, since the function
+      // won't actually go in the table (the runtime will trap attempts to call
+      // that index, since we don't use it).  A function with a table index of
+      // zero is only reachable via "call", not via "call_indirect".  The stub
+      // functions used for weak-undefined symbols have this behaviour (compare
+      // equal to null pointer, only reachable via direct call).
+      if (Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_SLEB ||
+          Reloc.Type == R_WEBASSEMBLY_TABLE_INDEX_I32) {
+        FunctionSymbol *FuncSym = cast<FunctionSymbol>(Sym);
+        if (FuncSym->hasTableIndex() && FuncSym->getTableIndex() == 0)
+          continue;
+      }
+
+      Enqueue(Sym);
     }
   }
 
index 742c887..b1bf9b3 100644 (file)
@@ -180,10 +180,10 @@ DefinedGlobal::DefinedGlobal(StringRef Name, uint32_t Flags, InputFile *File,
 
 void LazySymbol::fetch() { cast<ArchiveFile>(File)->addMember(&ArchiveSymbol); }
 
-std::string lld::toString(const wasm::Symbol &Sym) {
+std::string lld::toString(const wasm::Symbol &Sym, bool QuoteDemangled) {
   if (Config->Demangle)
     if (Optional<std::string> S = demangleItanium(Sym.getName()))
-      return "`" + *S + "'";
+      return QuoteDemangled ? ("`" + *S + "'") : *S;
   return Sym.getName();
 }
 
index 76a7417..92acabb 100644 (file)
@@ -312,7 +312,7 @@ T *replaceSymbol(Symbol *S, ArgT &&... Arg) {
 } // namespace wasm
 
 // Returns a symbol name for an error message.
-std::string toString(const wasm::Symbol &Sym);
+std::string toString(const wasm::Symbol &Sym, bool QuoteDemangled = true);
 std::string toString(wasm::Symbol::Kind Kind);
 std::string toString(WasmSymbolType Type);