From 53cd7406bb6507f60c14fbbce4b213f0bbd78730 Mon Sep 17 00:00:00 2001 From: Reid Kleckner Date: Fri, 7 Jun 2019 22:05:12 +0000 Subject: [PATCH] [COFF] Fix /export:foo=bar when bar is a weak alias Summary: When handling exports from the command line or from .def files, the linker does a "fuzzy" string lookup to allow finding mangled symbols. However, when the symbol is re-exported under a new name, the linker has to transfer the decorations from the exported symbol over to the new name. This is implemented by taking the mangled symbol that was found in the object and replacing the original symbol name with the export name. Before this patch, LLD implemented the fuzzy search by adding an undefined symbol with the unmangled name, and then during symbol resolution, checking if similar mangled symbols had been added after the last round of symbol resolution. If so, LLD makes the original symbol a weak alias of the mangled symbol. Later, to get the original symbol name, LLD would look through the weak alias and forward it on to the import library writer, which copies the symbol decorations. This approach doesn't work when bar is itself a weak alias, as is the case in asan. It's especially bad when the aliasee of bar contains the string "bar", consider "bar_default". In this case, we would end up exporting the symbol "foo_default" when we should've exported just "foo". To fix this, don't look through weak aliases to find the mangled name. Save the mangled name earlier during fuzzy symbol lookup. Fixes PR42074 Reviewers: mstorsjo, ruiu Subscribers: thakis, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D62984 llvm-svn: 362849 --- lld/COFF/Driver.cpp | 26 +++++++++++--- lld/COFF/Driver.h | 2 ++ lld/COFF/DriverUtils.cpp | 12 ------- lld/COFF/SymbolTable.cpp | 60 +++++++++++++++++-------------- lld/COFF/SymbolTable.h | 6 ++-- lld/test/COFF/export-stdcall.s | 31 ++++++++++++++++ lld/test/COFF/export-weak-alias.s | 32 +++++++++++++++++ llvm/include/llvm/Object/COFFImportFile.h | 12 +++++++ 8 files changed, 136 insertions(+), 45 deletions(-) create mode 100644 lld/test/COFF/export-stdcall.s create mode 100644 lld/test/COFF/export-weak-alias.s diff --git a/lld/COFF/Driver.cpp b/lld/COFF/Driver.cpp index 7f7fde1..ef25fe3 100644 --- a/lld/COFF/Driver.cpp +++ b/lld/COFF/Driver.cpp @@ -138,8 +138,8 @@ static StringRef mangle(StringRef Sym) { } static bool findUnderscoreMangle(StringRef Sym) { - StringRef Entry = Symtab->findMangle(mangle(Sym)); - return !Entry.empty() && !isa(Symtab->find(Entry)); + Symbol *S = Symtab->findMangle(mangle(Sym)); + return S && !isa(S); } MemoryBufferRef LinkerDriver::takeBuffer(std::unique_ptr MB) { @@ -486,6 +486,24 @@ Symbol *LinkerDriver::addUndefined(StringRef Name) { return B; } +StringRef LinkerDriver::mangleMaybe(Symbol *S) { + // If the plain symbol name has already been resolved, do nothing. + Undefined *Unmangled = dyn_cast(S); + if (!Unmangled) + return ""; + + // Otherwise, see if a similar, mangled symbol exists in the symbol table. + Symbol *Mangled = Symtab->findMangle(Unmangled->getName()); + if (!Mangled) + return ""; + + // If we find a similar mangled symbol, make this an alias to it and return + // its name. + log(Unmangled->getName() + " aliased to " + Mangled->getName()); + Unmangled->WeakAlias = Symtab->addUndefined(Mangled->getName()); + return Mangled->getName(); +} + // Windows specific -- find default entry point name. // // There are four different entry point functions for Windows executables, @@ -1644,7 +1662,7 @@ void LinkerDriver::link(ArrayRef ArgsArr) { // Windows specific -- if entry point is not found, // search for its mangled names. if (Config->Entry) - Symtab->mangleMaybe(Config->Entry); + mangleMaybe(Config->Entry); // Windows specific -- Make sure we resolve all dllexported symbols. for (Export &E : Config->Exports) { @@ -1652,7 +1670,7 @@ void LinkerDriver::link(ArrayRef ArgsArr) { continue; E.Sym = addUndefined(E.Name); if (!E.Directives) - Symtab->mangleMaybe(E.Sym); + E.SymbolName = mangleMaybe(E.Sym); } // Add weak aliases. Weak aliases is a mechanism to give remaining diff --git a/lld/COFF/Driver.h b/lld/COFF/Driver.h index f0c2ee6..6b4266a 100644 --- a/lld/COFF/Driver.h +++ b/lld/COFF/Driver.h @@ -108,6 +108,8 @@ private: Symbol *addUndefined(StringRef Sym); + StringRef mangleMaybe(Symbol *S); + // Windows specific -- "main" is not the only main function in Windows. // You can choose one from these four -- {w,}{WinMain,main}. // There are four different entry point functions for them, diff --git a/lld/COFF/DriverUtils.cpp b/lld/COFF/DriverUtils.cpp index f689cd1..c59027e 100644 --- a/lld/COFF/DriverUtils.cpp +++ b/lld/COFF/DriverUtils.cpp @@ -655,18 +655,6 @@ void fixupExports() { } for (Export &E : Config->Exports) { - Symbol *Sym = E.Sym; - if (!E.ForwardTo.empty() || !Sym) { - E.SymbolName = E.Name; - } else { - if (auto *U = dyn_cast(Sym)) - if (U->WeakAlias) - Sym = U->WeakAlias; - E.SymbolName = Sym->getName(); - } - } - - for (Export &E : Config->Exports) { if (!E.ForwardTo.empty()) { E.ExportName = undecorate(E.Name); } else { diff --git a/lld/COFF/SymbolTable.cpp b/lld/COFF/SymbolTable.cpp index 1c602c6..5180559 100644 --- a/lld/COFF/SymbolTable.cpp +++ b/lld/COFF/SymbolTable.cpp @@ -472,48 +472,56 @@ Symbol *SymbolTable::findUnderscore(StringRef Name) { return find(Name); } -StringRef SymbolTable::findByPrefix(StringRef Prefix) { +// Return all symbols that start with Prefix, possibly ignoring the first +// character of Prefix or the first character symbol. +std::vector SymbolTable::getSymsWithPrefix(StringRef Prefix) { + std::vector Syms; for (auto Pair : SymMap) { StringRef Name = Pair.first.val(); - if (Name.startswith(Prefix)) - return Name; + if (Name.startswith(Prefix) || Name.startswith(Prefix.drop_front()) || + Name.drop_front().startswith(Prefix) || + Name.drop_front().startswith(Prefix.drop_front())) { + Syms.push_back(Pair.second); + } } - return ""; + return Syms; } -StringRef SymbolTable::findMangle(StringRef Name) { +Symbol *SymbolTable::findMangle(StringRef Name) { if (Symbol *Sym = find(Name)) if (!isa(Sym)) - return Name; + return Sym; + + // Efficient fuzzy string lookup is impossible with a hash table, so iterate + // the symbol table once and collect all possibly matching symbols into this + // vector. Then compare each possibly matching symbol with each possible + // mangling. + std::vector Syms = getSymsWithPrefix(Name); + auto FindByPrefix = [&Syms](const Twine &T) -> Symbol * { + std::string Prefix = T.str(); + for (auto *S : Syms) + if (S->getName().startswith(Prefix)) + return S; + return nullptr; + }; + + // For non-x86, just look for C++ functions. if (Config->Machine != I386) - return findByPrefix(("?" + Name + "@@Y").str()); + return FindByPrefix("?" + Name + "@@Y"); + if (!Name.startswith("_")) - return ""; + return nullptr; // Search for x86 stdcall function. - StringRef S = findByPrefix((Name + "@").str()); - if (!S.empty()) + if (Symbol *S = FindByPrefix(Name + "@")) return S; // Search for x86 fastcall function. - S = findByPrefix(("@" + Name.substr(1) + "@").str()); - if (!S.empty()) + if (Symbol *S = FindByPrefix("@" + Name.substr(1) + "@")) return S; // Search for x86 vectorcall function. - S = findByPrefix((Name.substr(1) + "@@").str()); - if (!S.empty()) + if (Symbol *S = FindByPrefix(Name.substr(1) + "@@")) return S; // Search for x86 C++ non-member function. - return findByPrefix(("?" + Name.substr(1) + "@@Y").str()); -} - -void SymbolTable::mangleMaybe(Symbol *B) { - auto *U = dyn_cast(B); - if (!U || U->WeakAlias) - return; - StringRef Alias = findMangle(U->getName()); - if (!Alias.empty()) { - log(U->getName() + " aliased to " + Alias); - U->WeakAlias = addUndefined(Alias); - } + return FindByPrefix("?" + Name.substr(1) + "@@Y"); } Symbol *SymbolTable::addUndefined(StringRef Name) { diff --git a/lld/COFF/SymbolTable.h b/lld/COFF/SymbolTable.h index 7f74863..d01bac1 100644 --- a/lld/COFF/SymbolTable.h +++ b/lld/COFF/SymbolTable.h @@ -68,8 +68,7 @@ public: // mangled symbol. This function tries to find a mangled name // for U from the symbol table, and if found, set the symbol as // a weak alias for U. - void mangleMaybe(Symbol *B); - StringRef findMangle(StringRef Name); + Symbol *findMangle(StringRef Name); // Build a set of COFF objects representing the combined contents of // BitcodeFiles and add them to the symbol table. Called after all files are @@ -115,7 +114,8 @@ private: std::pair insert(StringRef Name); /// Same as insert(Name), but also sets IsUsedInRegularObj. std::pair insert(StringRef Name, InputFile *F); - StringRef findByPrefix(StringRef Prefix); + + std::vector getSymsWithPrefix(StringRef Prefix); llvm::DenseMap SymMap; std::unique_ptr LTO; diff --git a/lld/test/COFF/export-stdcall.s b/lld/test/COFF/export-stdcall.s new file mode 100644 index 0000000..b114641 --- /dev/null +++ b/lld/test/COFF/export-stdcall.s @@ -0,0 +1,31 @@ +# RUN: llvm-mc -triple i686-windows-msvc %s -o %t.obj -filetype=obj +# RUN: lld-link %t.obj -out:%t.dll -dll -nodefaultlib -noentry -export:foo_std=bar_std -export:foo_fast=bar_fast +# RUN: llvm-nm %t.lib | FileCheck %s + +# MSVC fudges the lookup of 'bar' to allow it to find the stdcall function +# _bar_std@8, and then exports _foo_std@8. Same for fastcall and other mangling +# schemes. + +# CHECK: export-stdcall.s.tmp.dll: +# CHECK: 00000000 T @foo_fast@8 +# CHECK: 00000000 T __imp_@foo_fast@8 + +# CHECK: export-stdcall.s.tmp.dll: +# CHECK: 00000000 T __imp__foo_std@8 +# CHECK: 00000000 T _foo_std@8 + + .text + .def _bar_std@8; .scl 2; .type 32; .endef + .globl _bar_std@8 +_bar_std@8: + movl 8(%esp), %eax + movl 4(%esp), %ecx + leal 42(%ecx,%eax), %eax + retl $8 + + .def @bar_fast@8; .scl 2; .type 32; .endef + .globl @bar_fast@8 +@bar_fast@8: + leal 42(%ecx,%eax), %eax + retl + diff --git a/lld/test/COFF/export-weak-alias.s b/lld/test/COFF/export-weak-alias.s new file mode 100644 index 0000000..d1c2141 --- /dev/null +++ b/lld/test/COFF/export-weak-alias.s @@ -0,0 +1,32 @@ +# RUN: llvm-mc -triple x86_64-windows-msvc %s -o %t.obj -filetype=obj +# RUN: lld-link %t.obj -out:%t.dll -dll -nodefaultlib -noentry +# RUN: llvm-nm %t.lib | FileCheck %s + +# CHECK: export-weak-alias.s.tmp.dll: +# CHECK: 00000000 T __imp_foo_dll{{$}} +# CHECK: 00000000 T foo_dll{{$}} + + .text + .def @feat.00; + .scl 3; + .type 0; + .endef + .globl @feat.00 +.set @feat.00, 0 + .file "t.c" + .def foo_def; + .scl 2; + .type 32; + .endef + .globl foo_def # -- Begin function foo_def + .p2align 4, 0x90 +foo_def: # @foo_def +# %bb.0: # %entry + movl $42, %eax + retq + # -- End function + .section .drectve,"yn" + .ascii " /alternatename:foo=foo_def" + .ascii " /export:foo_dll=foo" + + .addrsig diff --git a/llvm/include/llvm/Object/COFFImportFile.h b/llvm/include/llvm/Object/COFFImportFile.h index e07060c..5aa8364 100644 --- a/llvm/include/llvm/Object/COFFImportFile.h +++ b/llvm/include/llvm/Object/COFFImportFile.h @@ -69,9 +69,21 @@ private: }; struct COFFShortExport { + /// The name of the export as specified in the .def file or on the command + /// line, i.e. "foo" in "/EXPORT:foo", and "bar" in "/EXPORT:foo=bar". This + /// may lack mangling, such as underscore prefixing and stdcall suffixing. std::string Name; + + /// The external, exported name. Only non-empty when export renaming is in + /// effect, i.e. "foo" in "/EXPORT:foo=bar". std::string ExtName; + + /// The real, mangled symbol name from the object file. Given + /// "/export:foo=bar", this could be "_bar@8" if bar is stdcall. std::string SymbolName; + + /// Creates a weak alias. This is the name of the weak aliasee. In a .def + /// file, this is "baz" in "EXPORTS\nfoo = bar == baz". std::string AliasTarget; uint16_t Ordinal = 0; -- 2.7.4