From e63c799a767b0f682af62eba9d1d375c59e58627 Mon Sep 17 00:00:00 2001 From: David Blaikie Date: Mon, 29 Nov 2021 15:35:56 -0800 Subject: [PATCH] [Demangle] Add support for D simple single qualified names This patch adds support for simple single qualified names that includes internal mangled names and normal symbol names. Differential Revision: https://reviews.llvm.org/D111415 --- llvm/lib/Demangle/DLangDemangle.cpp | 217 +++++++++++++++++++++++++- llvm/unittests/Demangle/DLangDemangleTest.cpp | 5 +- 2 files changed, 219 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Demangle/DLangDemangle.cpp b/llvm/lib/Demangle/DLangDemangle.cpp index d2f1bf4..73d5ce1 100644 --- a/llvm/lib/Demangle/DLangDemangle.cpp +++ b/llvm/lib/Demangle/DLangDemangle.cpp @@ -14,12 +14,214 @@ //===----------------------------------------------------------------------===// #include "llvm/Demangle/Demangle.h" +#include "llvm/Demangle/StringView.h" #include "llvm/Demangle/Utility.h" #include +#include using namespace llvm; using llvm::itanium_demangle::OutputBuffer; +using llvm::itanium_demangle::StringView; + +namespace { + +/// Demangle information structure. +struct Demangler { + /// Initialize the information structure we use to pass around information. + /// + /// \param Mangled String to demangle. + Demangler(const char *Mangled); + + /// Extract and demangle the mangled symbol and append it to the output + /// string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#name_mangling . + /// \see https://dlang.org/spec/abi.html#MangledName . + const char *parseMangle(OutputBuffer *Demangled); + +private: + /// Extract and demangle a given mangled symbol and append it to the output + /// string. + /// + /// \param Demangled output buffer to write the demangled name. + /// \param Mangled mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#name_mangling . + /// \see https://dlang.org/spec/abi.html#MangledName . + const char *parseMangle(OutputBuffer *Demangled, const char *Mangled); + + /// Extract the number from a given string. + /// + /// \param Mangled string to extract the number. + /// \param Ret assigned result value. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \note A result larger than UINT_MAX is considered a failure. + /// + /// \see https://dlang.org/spec/abi.html#Number . + const char *decodeNumber(const char *Mangled, unsigned long *Ret); + + /// Extract and demangle an identifier from a given mangled symbol append it + /// to the output string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#SymbolName . + const char *parseIdentifier(OutputBuffer *Demangled, const char *Mangled); + + /// Extract and demangle the plain identifier from a given mangled symbol and + /// prepend/append it to the output string, with a special treatment for some + /// magic compiler generated symbols. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// \param Len Length of the mangled symbol name. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#LName . + const char *parseLName(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len); + + /// Extract and demangle the qualified symbol from a given mangled symbol + /// append it to the output string. + /// + /// \param Demangled Output buffer to write the demangled name. + /// \param Mangled Mangled symbol to be demangled. + /// + /// \return The remaining string on success or nullptr on failure. + /// + /// \see https://dlang.org/spec/abi.html#QualifiedName . + const char *parseQualified(OutputBuffer *Demangled, const char *Mangled); + + /// The string we are demangling. + const char *Str; +}; + +} // namespace + +const char *Demangler::decodeNumber(const char *Mangled, unsigned long *Ret) { + // Return nullptr if trying to extract something that isn't a digit. + if (Mangled == nullptr || !std::isdigit(*Mangled)) + return nullptr; + + unsigned long Val = 0; + + do { + unsigned long Digit = Mangled[0] - '0'; + + // Check for overflow. + if (Val > (std::numeric_limits::max() - Digit) / 10) + return nullptr; + + Val = Val * 10 + Digit; + ++Mangled; + } while (std::isdigit(*Mangled)); + + if (*Mangled == '\0') + return nullptr; + + *Ret = Val; + return Mangled; +} + +const char *Demangler::parseMangle(OutputBuffer *Demangled, + const char *Mangled) { + // A D mangled symbol is comprised of both scope and type information. + // MangleName: + // _D QualifiedName Type + // _D QualifiedName Z + // ^ + // The caller should have guaranteed that the start pointer is at the + // above location. + // Note that type is never a function type, but only the return type of + // a function or the type of a variable. + Mangled += 2; + + Mangled = parseQualified(Demangled, Mangled); + + if (Mangled != nullptr) { + // Artificial symbols end with 'Z' and have no type. + if (*Mangled == 'Z') + ++Mangled; + else { + // TODO: Implement symbols with types. + return nullptr; + } + } + + return Mangled; +} + +const char *Demangler::parseQualified(OutputBuffer *Demangled, + const char *Mangled) { + // Qualified names are identifiers separated by their encoded length. + // Nested functions also encode their argument types without specifying + // what they return. + // QualifiedName: + // SymbolFunctionName + // SymbolFunctionName QualifiedName + // ^ + // SymbolFunctionName: + // SymbolName + // SymbolName TypeFunctionNoReturn + // SymbolName M TypeFunctionNoReturn + // SymbolName M TypeModifiers TypeFunctionNoReturn + // The start pointer should be at the above location. + + // TODO: Parse multiple identifiers + + return parseIdentifier(Demangled, Mangled); +} + +const char *Demangler::parseIdentifier(OutputBuffer *Demangled, + const char *Mangled) { + unsigned long Len; + + if (Mangled == nullptr || *Mangled == '\0') + return nullptr; + + // TODO: Parse back references and lengthless template instances. + + const char *Endptr = decodeNumber(Mangled, &Len); + + if (Endptr == nullptr || Len == 0) + return nullptr; + + if (strlen(Endptr) < Len) + return nullptr; + + Mangled = Endptr; + + // TODO: Parse template instances with a length prefix. + + return parseLName(Demangled, Mangled, Len); +} + +const char *Demangler::parseLName(OutputBuffer *Demangled, const char *Mangled, + unsigned long Len) { + *Demangled << StringView(Mangled, Len); + Mangled += Len; + + return Mangled; +} + +Demangler::Demangler(const char *Mangled) : Str(Mangled) {} + +const char *Demangler::parseMangle(OutputBuffer *Demangled) { + return parseMangle(Demangled, this->Str); +} char *llvm::dlangDemangle(const char *MangledName) { if (MangledName == nullptr || strncmp(MangledName, "_D", 2) != 0) @@ -29,8 +231,19 @@ char *llvm::dlangDemangle(const char *MangledName) { if (!initializeOutputBuffer(nullptr, nullptr, Demangled, 1024)) return nullptr; - if (strcmp(MangledName, "_Dmain") == 0) + if (strcmp(MangledName, "_Dmain") == 0) { Demangled << "D main"; + } else { + + Demangler D = Demangler(MangledName); + MangledName = D.parseMangle(&Demangled); + + // Check that the entire symbol was successfully demangled. + if (MangledName == nullptr || *MangledName != '\0') { + std::free(Demangled.getBuffer()); + return nullptr; + } + } // OutputBuffer's internal buffer is not null terminated and therefore we need // to add it to comply with C null terminated strings. @@ -40,6 +253,6 @@ char *llvm::dlangDemangle(const char *MangledName) { return Demangled.getBuffer(); } - free(Demangled.getBuffer()); + std::free(Demangled.getBuffer()); return nullptr; } diff --git a/llvm/unittests/Demangle/DLangDemangleTest.cpp b/llvm/unittests/Demangle/DLangDemangleTest.cpp index 8a324ec..ad814cb 100644 --- a/llvm/unittests/Demangle/DLangDemangleTest.cpp +++ b/llvm/unittests/Demangle/DLangDemangleTest.cpp @@ -30,4 +30,7 @@ INSTANTIATE_TEST_SUITE_P(DLangDemangleTest, DLangDemangleTestFixture, testing::Values(std::make_pair("_Dmain", "D main"), std::make_pair(nullptr, nullptr), std::make_pair("_Z", nullptr), - std::make_pair("_DDD", nullptr))); + std::make_pair("_DDD", nullptr), + std::make_pair("_D88", nullptr), + std::make_pair("_D8demangleZ", + "demangle"))); -- 2.7.4