From 82ca390062d11512528e3d357d8d3d7b69477caf Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 11 Oct 2022 20:40:26 -0400 Subject: [PATCH] Reland "[lld/mac] Port typo correction for undefined symbols from ELF port" The only difference in the reland is that the loop at the top of getAlternativeSpelling() now calls dyn_cast_or_null() instead of dyn_cast() -- a file's symbols list can contain null entries. The test for this might be slightly involved, so I'll land it in a follow-up, to make the reland similar to the original commit. Originally reviewed at: Differential Revision: https://reviews.llvm.org/D135038 This reverts commit 317b5582b813c51d1fb6723fd44b227b7f274bc7. --- lld/MachO/SymbolTable.cpp | 148 ++++++++++++++++++++++++++++++- lld/test/MachO/undef-spell-corrector.s | 79 +++++++++++++++++ lld/test/MachO/undef-suggest-extern-c.s | 19 ++++ lld/test/MachO/undef-suggest-extern-c2.s | 21 +++++ 4 files changed, 264 insertions(+), 3 deletions(-) create mode 100644 lld/test/MachO/undef-spell-corrector.s create mode 100644 lld/test/MachO/undef-suggest-extern-c.s create mode 100644 lld/test/MachO/undef-suggest-extern-c2.s diff --git a/lld/MachO/SymbolTable.cpp b/lld/MachO/SymbolTable.cpp index 5173fa0..8ad7dbb 100644 --- a/lld/MachO/SymbolTable.cpp +++ b/lld/MachO/SymbolTable.cpp @@ -392,8 +392,138 @@ void macho::reportPendingDuplicateSymbols() { } } +// Check whether the definition name def is a mangled function name that matches +// the reference name ref. +static bool canSuggestExternCForCXX(StringRef ref, StringRef def) { + llvm::ItaniumPartialDemangler d; + std::string name = def.str(); + if (d.partialDemangle(name.c_str())) + return false; + char *buf = d.getFunctionName(nullptr, nullptr); + if (!buf) + return false; + bool ret = ref == buf; + free(buf); + return ret; +} + +// Suggest an alternative spelling of an "undefined symbol" diagnostic. Returns +// the suggested symbol, which is either in the symbol table, or in the same +// file of sym. +static const Symbol *getAlternativeSpelling(const Undefined &sym, + std::string &pre_hint, + std::string &post_hint) { + DenseMap map; + if (sym.getFile() && sym.getFile()->kind() == InputFile::ObjKind) { + // Build a map of local defined symbols. + for (const Symbol *s : sym.getFile()->symbols) + if (auto *defined = dyn_cast_or_null(s)) + if (!defined->isExternal()) + map.try_emplace(s->getName(), s); + } + + auto suggest = [&](StringRef newName) -> const Symbol * { + // If defined locally. + if (const Symbol *s = map.lookup(newName)) + return s; + + // If in the symbol table and not undefined. + if (const Symbol *s = symtab->find(newName)) + if (dyn_cast(s) == nullptr) + return s; + + return nullptr; + }; + + // This loop enumerates all strings of Levenshtein distance 1 as typo + // correction candidates and suggests the one that exists as a non-undefined + // symbol. + StringRef name = sym.getName(); + for (size_t i = 0, e = name.size(); i != e + 1; ++i) { + // Insert a character before name[i]. + std::string newName = (name.substr(0, i) + "0" + name.substr(i)).str(); + for (char c = '0'; c <= 'z'; ++c) { + newName[i] = c; + if (const Symbol *s = suggest(newName)) + return s; + } + if (i == e) + break; + + // Substitute name[i]. + newName = std::string(name); + for (char c = '0'; c <= 'z'; ++c) { + newName[i] = c; + if (const Symbol *s = suggest(newName)) + return s; + } + + // Transpose name[i] and name[i+1]. This is of edit distance 2 but it is + // common. + if (i + 1 < e) { + newName[i] = name[i + 1]; + newName[i + 1] = name[i]; + if (const Symbol *s = suggest(newName)) + return s; + } + + // Delete name[i]. + newName = (name.substr(0, i) + name.substr(i + 1)).str(); + if (const Symbol *s = suggest(newName)) + return s; + } + + // Case mismatch, e.g. Foo vs FOO. + for (auto &it : map) + if (name.equals_insensitive(it.first)) + return it.second; + for (Symbol *sym : symtab->getSymbols()) + if (dyn_cast(sym) == nullptr && + name.equals_insensitive(sym->getName())) + return sym; + + // The reference may be a mangled name while the definition is not. Suggest a + // missing extern "C". + if (name.startswith("__Z")) { + std::string buf = name.str(); + llvm::ItaniumPartialDemangler d; + if (!d.partialDemangle(buf.c_str())) + if (char *buf = d.getFunctionName(nullptr, nullptr)) { + const Symbol *s = suggest((Twine("_") + buf).str()); + free(buf); + if (s) { + pre_hint = ": extern \"C\" "; + return s; + } + } + } else { + StringRef name_without_underscore = name; + name_without_underscore.consume_front("_"); + const Symbol *s = nullptr; + for (auto &it : map) + if (canSuggestExternCForCXX(name_without_underscore, it.first)) { + s = it.second; + break; + } + if (!s) + for (Symbol *sym : symtab->getSymbols()) + if (canSuggestExternCForCXX(name_without_underscore, sym->getName())) { + s = sym; + break; + } + if (s) { + pre_hint = " to declare "; + post_hint = " as extern \"C\"?"; + return s; + } + } + + return nullptr; +} + static void reportUndefinedSymbol(const Undefined &sym, - const UndefinedDiag &locations) { + const UndefinedDiag &locations, + bool correctSpelling) { std::string message = "undefined symbol"; if (config->archMultiple) message += (" for arch " + getArchitectureName(config->arch())).str(); @@ -426,6 +556,17 @@ static void reportUndefinedSymbol(const Undefined &sym, ("\n>>> referenced " + Twine(totalReferences - i) + " more times") .str(); + if (correctSpelling) { + std::string pre_hint = ": ", post_hint; + if (const Symbol *corrected = + getAlternativeSpelling(sym, pre_hint, post_hint)) { + message += + "\n>>> did you mean" + pre_hint + toString(*corrected) + post_hint; + if (corrected->getFile()) + message += "\n>>> defined in: " + toString(corrected->getFile()); + } + } + if (config->undefinedSymbolTreatment == UndefinedSymbolTreatment::error) error(message); else if (config->undefinedSymbolTreatment == @@ -436,8 +577,9 @@ static void reportUndefinedSymbol(const Undefined &sym, } void macho::reportPendingUndefinedSymbols() { - for (const auto &undef : undefs) - reportUndefinedSymbol(*undef.first, undef.second); + // Enable spell corrector for the first 2 diagnostics. + for (const auto &[i, undef] : llvm::enumerate(undefs)) + reportUndefinedSymbol(*undef.first, undef.second, i < 2); // This function is called multiple times during execution. Clear the printed // diagnostics to avoid printing the same things again the next time. diff --git a/lld/test/MachO/undef-spell-corrector.s b/lld/test/MachO/undef-spell-corrector.s new file mode 100644 index 0000000..cfec062 --- /dev/null +++ b/lld/test/MachO/undef-spell-corrector.s @@ -0,0 +1,79 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o + +## Insert a character. +## The spell corrector is enabled for the first two "undefined symbol" diagnostics. +# RUN: echo 'call bcde; call abcd; call abde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.o + +## Symbols defined in DSO can be suggested. +# RUN: %lld %t.o -dylib -o %t.dylib +# RUN: not %lld %t.dylib %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=INSERT %s -DFILE=%t.dylib + +# INSERT: error: undefined symbol: abde +# INSERT-NEXT: >>> referenced by {{.*}} +# INSERT-NEXT: >>> did you mean: abcde +# INSERT-NEXT: >>> defined in: [[FILE]] +# INSERT: error: undefined symbol: abcd +# INSERT-NEXT: >>> referenced by {{.*}} +# INSERT-NEXT: >>> did you mean: abcde +# INSERT-NEXT: >>> defined in: [[FILE]] +# INSERT: error: undefined symbol: bcde +# INSERT-NEXT: >>> referenced by {{.*}} +# INSERT-NOT: >>> + +## Substitute a character. +# RUN: echo 'call bbcde; call abcdd' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=SUBST %s + +# SUBST: error: undefined symbol: abcdd +# SUBST-NEXT: >>> referenced by {{.*}} +# SUBST-NEXT: >>> did you mean: abcde +# SUBST: error: undefined symbol: bbcde +# SUBST-NEXT: >>> referenced by {{.*}} +# SUBST-NEXT: >>> did you mean: abcde + +## Delete a character. +# RUN: echo 'call aabcde; call abcdee' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=DELETE %s + +# DELETE: error: undefined symbol: abcdee +# DELETE-NEXT: >>> referenced by {{.*}} +# DELETE-NEXT: >>> did you mean: abcde +# DELETE: error: undefined symbol: aabcde +# DELETE-NEXT: >>> referenced by {{.*}} +# DELETE-NEXT: >>> did you mean: abcde + +## Transpose. +# RUN: echo 'call bacde' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -o /dev/null 2>&1 | FileCheck --check-prefix=TRANSPOSE %s + +# TRANSPOSE: error: undefined symbol: bacde +# TRANSPOSE-NEXT: >>> referenced by {{.*}} +# TRANSPOSE-NEXT: >>> did you mean: abcde + +## Missing const qualifier. +# RUN: echo 'call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s +## Local defined symbols. +# RUN: echo '__Z3fooPKi: call __Z3fooPi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CONST %s + +# CONST: error: undefined symbol: foo(int*) +# CONST-NEXT: >>> referenced by {{.*}} +# CONST-NEXT: >>> did you mean: foo(int const*) + +## Case mismatch. +# RUN: echo 'call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s +# RUN: echo '__Z3fooPKi: call __Z3FOOPKi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t1.o -demangle -o /dev/null 2>&1 | FileCheck --check-prefix=CASE %s + +# CASE: error: undefined symbol: FOO(int const*) +# CASE-NEXT: >>> referenced by {{.*}} +# CASE-NEXT: >>> did you mean: foo(int const*) + +.globl _main, abcde, __Z3fooPKi +_main: +abcde: +__Z3fooPKi: diff --git a/lld/test/MachO/undef-suggest-extern-c.s b/lld/test/MachO/undef-suggest-extern-c.s new file mode 100644 index 0000000..8c53c8e --- /dev/null +++ b/lld/test/MachO/undef-suggest-extern-c.s @@ -0,0 +1,19 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o + +## The reference is mangled while the definition is not, suggest a missing +## extern "C". +# RUN: echo 'call __Z3fooi' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s + +# CHECK: error: undefined symbol: foo(int) +# CHECK-NEXT: >>> referenced by {{.*}} +# CHECK-NEXT: >>> did you mean: extern "C" foo + +## Don't suggest for nested names like F::foo() and foo::foo(). +# RUN: echo 'call __ZN1F3fooEv; call __ZN3fooC1Ev' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o +# RUN: not ld.lld %t.o %t2.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean' + +.globl _start, _foo +_start: +_foo: diff --git a/lld/test/MachO/undef-suggest-extern-c2.s b/lld/test/MachO/undef-suggest-extern-c2.s new file mode 100644 index 0000000..152fe38 --- /dev/null +++ b/lld/test/MachO/undef-suggest-extern-c2.s @@ -0,0 +1,21 @@ +# REQUIRES: x86 +# RUN: llvm-mc -filetype=obj -triple=x86_64-apple-macos %s -o %t.o + +## The definition is mangled while the reference is not, suggest an arbitrary +## C++ overload. +# RUN: echo '.globl __Z3fooi; __Z3fooi:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t1.o +# RUN: not %lld %t.o %t1.o -demangle -o /dev/null 2>&1 | FileCheck %s + +## Check that we can suggest a local definition. +# RUN: echo '__Z3fooi: call _foo' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t2.o +# RUN: not %lld %t2.o -demangle -o /dev/null 2>&1 | FileCheck %s + +# CHECK: error: undefined symbol: foo +# CHECK-NEXT: >>> referenced by {{.*}} +# CHECK-NEXT: >>> did you mean to declare foo(int) as extern "C"? + +## Don't suggest nested names whose base name is "foo", e.g. F::foo(). +# RUN: echo '.globl __ZN1F3fooEv; __ZN1F3fooEv:' | llvm-mc -filetype=obj -triple=x86_64-apple-macos - -o %t3.o +# RUN: not %lld %t.o %t3.o -o /dev/null 2>&1 | FileCheck /dev/null --implicit-check-not='did you mean' + +call _foo -- 2.7.4