From 7310403e3cdf8a436f94770e1a1498db05d2d091 Mon Sep 17 00:00:00 2001 From: =?utf8?q?Tomasz=20Mi=C4=85sko?= Date: Mon, 3 May 2021 16:41:30 -0700 Subject: [PATCH] [demangler] Initial support for the new Rust mangling scheme Add a demangling support for a small subset of a new Rust mangling scheme, with complete support planned as a follow up work. Intergate Rust demangling into llvm-cxxfilt and use llvm-cxxfilt for end-to-end testing. The new Rust mangling scheme uses "_R" as a prefix, which makes it easy to disambiguate it from other mangling schemes. The public API is modeled after __cxa_demangle / llvm::itaniumDemangle, since potential candidates for further integration use those. Reviewed By: dblaikie Differential Revision: https://reviews.llvm.org/D101444 --- llvm/include/llvm/Demangle/Demangle.h | 3 + llvm/include/llvm/Demangle/RustDemangle.h | 118 ++++++++++++ llvm/lib/Demangle/CMakeLists.txt | 1 + llvm/lib/Demangle/RustDemangle.cpp | 276 +++++++++++++++++++++++++++ llvm/test/Demangle/rust.test | 43 +++++ llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp | 5 + llvm/unittests/Demangle/CMakeLists.txt | 1 + llvm/unittests/Demangle/RustDemangleTest.cpp | 90 +++++++++ 8 files changed, 537 insertions(+) create mode 100644 llvm/include/llvm/Demangle/RustDemangle.h create mode 100644 llvm/lib/Demangle/RustDemangle.cpp create mode 100644 llvm/test/Demangle/rust.test create mode 100644 llvm/unittests/Demangle/RustDemangleTest.cpp diff --git a/llvm/include/llvm/Demangle/Demangle.h b/llvm/include/llvm/Demangle/Demangle.h index b4006a0..c396a1d 100644 --- a/llvm/include/llvm/Demangle/Demangle.h +++ b/llvm/include/llvm/Demangle/Demangle.h @@ -57,6 +57,9 @@ char *microsoftDemangle(const char *mangled_name, size_t *n_read, char *buf, size_t *n_buf, int *status, MSDemangleFlags Flags = MSDF_None); +// Demangles a Rust v0 mangled symbol. The API follows that of __cxa_demangle. +char *rustDemangle(const char *MangledName, char *Buf, size_t *N, int *Status); + /// Attempt to demangle a string using different demangling schemes. /// The function uses heuristics to determine which demangling scheme to use. /// \param MangledName - reference to string to demangle. diff --git a/llvm/include/llvm/Demangle/RustDemangle.h b/llvm/include/llvm/Demangle/RustDemangle.h new file mode 100644 index 0000000..e2286f7 --- /dev/null +++ b/llvm/include/llvm/Demangle/RustDemangle.h @@ -0,0 +1,118 @@ +//===--- RustDemangle.h -----------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEMANGLE_RUSTDEMANGLE_H +#define LLVM_DEMANGLE_RUSTDEMANGLE_H + +#include "llvm/Demangle/DemangleConfig.h" +#include "llvm/Demangle/StringView.h" +#include "llvm/Demangle/Utility.h" + +namespace llvm { +namespace rust_demangle { + +using llvm::itanium_demangle::OutputStream; +using llvm::itanium_demangle::StringView; + +struct Identifier { + StringView Name; + bool Punycode; + + bool empty() const { return Name.empty(); } +}; + +class Demangler { + // Maximum recursion level. Used to avoid stack overflow. + size_t MaxRecursionLevel; + // Current recursion level. + size_t RecursionLevel; + + // Input string that is being demangled with "_R" prefix removed. + StringView Input; + // Position in the input string. + size_t Position; + + // True if an error occurred. + bool Error; + +public: + // Demangled output. + OutputStream Output; + + Demangler(size_t MaxRecursionLevel = 500); + + bool demangle(StringView MangledName); + +private: + void demanglePath(); + + Identifier parseIdentifier(); + void parseOptionalBase62Number(char Tag); + uint64_t parseBase62Number(); + uint64_t parseDecimalNumber(); + + void print(StringView S) { + if (Error) + return; + + Output += S; + } + + char look() const { + if (Error || Position >= Input.size()) + return 0; + + return Input[Position]; + } + + char consume() { + if (Error || Position >= Input.size()) { + Error = true; + return 0; + } + + return Input[Position++]; + } + + bool consumeIf(char Prefix) { + if (Error || Position >= Input.size() || Input[Position] != Prefix) + return false; + + Position += 1; + return true; + } + + /// Computes A + B. When computation wraps around sets the error and returns + /// false. Otherwise assigns the result to A and returns true. + bool addAssign(uint64_t &A, const uint64_t B) { + if (A > std::numeric_limits::max() - B) { + Error = true; + return false; + } + + A += B; + return true; + } + + /// Computes A * B. When computation wraps around sets the error and returns + /// false. Otherwise assigns the result to A and returns true. + bool mulAssign(uint64_t &A, const uint64_t B) { + if (B != 0 && A > std::numeric_limits::max() / B) { + Error = true; + return false; + } + + A *= B; + return true; + } +}; + +} // namespace rust_demangle +} // namespace llvm + +#endif diff --git a/llvm/lib/Demangle/CMakeLists.txt b/llvm/lib/Demangle/CMakeLists.txt index 1368d91..86e2d49 100644 --- a/llvm/lib/Demangle/CMakeLists.txt +++ b/llvm/lib/Demangle/CMakeLists.txt @@ -3,6 +3,7 @@ add_llvm_component_library(LLVMDemangle ItaniumDemangle.cpp MicrosoftDemangle.cpp MicrosoftDemangleNodes.cpp + RustDemangle.cpp ADDITIONAL_HEADER_DIRS "${LLVM_MAIN_INCLUDE_DIR}/llvm/Demangle" diff --git a/llvm/lib/Demangle/RustDemangle.cpp b/llvm/lib/Demangle/RustDemangle.cpp new file mode 100644 index 0000000..d196d66 --- /dev/null +++ b/llvm/lib/Demangle/RustDemangle.cpp @@ -0,0 +1,276 @@ +//===--- RustDemangle.cpp ---------------------------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines a demangler for Rust v0 mangled symbols as specified in +// https://rust-lang.github.io/rfcs/2603-rust-symbol-name-mangling-v0.html +// +//===----------------------------------------------------------------------===// + +#include "llvm/Demangle/RustDemangle.h" +#include "llvm/Demangle/Demangle.h" + +#include +#include +#include +#include + +using namespace llvm; +using namespace rust_demangle; + +char *llvm::rustDemangle(const char *MangledName, char *Buf, size_t *N, + int *Status) { + if (MangledName == nullptr || (Buf != nullptr && N == nullptr)) { + if (Status != nullptr) + *Status = demangle_invalid_args; + return nullptr; + } + + // Return early if mangled name doesn't look like a Rust symbol. + StringView Mangled(MangledName); + if (!Mangled.startsWith("_R")) { + if (Status != nullptr) + *Status = demangle_invalid_mangled_name; + return nullptr; + } + + Demangler D; + if (!initializeOutputStream(nullptr, nullptr, D.Output, 1024)) { + if (Status != nullptr) + *Status = demangle_memory_alloc_failure; + return nullptr; + } + + if (!D.demangle(Mangled)) { + if (Status != nullptr) + *Status = demangle_invalid_mangled_name; + std::free(D.Output.getBuffer()); + return nullptr; + } + + D.Output += '\0'; + char *Demangled = D.Output.getBuffer(); + size_t DemangledLen = D.Output.getCurrentPosition(); + + if (Buf != nullptr) { + if (DemangledLen <= *N) { + std::memcpy(Buf, Demangled, DemangledLen); + std::free(Demangled); + Demangled = Buf; + } else { + std::free(Buf); + } + } + + if (N != nullptr) + *N = DemangledLen; + + if (Status != nullptr) + *Status = demangle_success; + + return Demangled; +} + +Demangler::Demangler(size_t MaxRecursionLevel) + : MaxRecursionLevel(MaxRecursionLevel) {} + +static inline bool isDigit(const char C) { return '0' <= C && C <= '9'; } + +static inline bool isLower(const char C) { return 'a' <= C && C <= 'z'; } + +static inline bool isUpper(const char C) { return 'A' <= C && C <= 'Z'; } + +/// Returns true if C is a valid mangled character: <0-9a-zA-Z_>. +static inline bool isValid(const char C) { + return isDigit(C) || isLower(C) || isUpper(C) || C == '_'; +} + +// Demangles Rust v0 mangled symbol. Returns true when successful, and false +// otherwise. The demangled symbol is stored in Output field. It is +// responsibility of the caller to free the memory behind the output stream. +// +// = "_R" [] +bool Demangler::demangle(StringView Mangled) { + Position = 0; + Error = false; + RecursionLevel = 0; + + if (!Mangled.consumeFront("_R")) { + Error = true; + return false; + } + Input = Mangled; + + demanglePath(); + + // FIXME parse optional . + + if (Position != Input.size()) + Error = true; + + return !Error; +} + +// = "C" // crate root +// | "M" // (inherent impl) +// | "X" // (trait impl) +// | "Y" // (trait definition) +// | "N" // ...::ident (nested path) +// | "I" {} "E" // ... (generic args) +// | +// = [] +// = "C" // closure +// | "S" // shim +// | // other special namespaces +// | // internal namespaces +void Demangler::demanglePath() { + if (Error || RecursionLevel >= MaxRecursionLevel) { + Error = true; + return; + } + RecursionLevel += 1; + + switch (consume()) { + case 'C': { + parseOptionalBase62Number('s'); + Identifier Ident = parseIdentifier(); + print(Ident.Name); + break; + } + case 'N': { + char NS = consume(); + if (!isLower(NS) && !isUpper(NS)) { + Error = true; + break; + } + demanglePath(); + + parseOptionalBase62Number('s'); + Identifier Ident = parseIdentifier(); + + if (!Ident.empty()) { + // FIXME print special namespaces: + // * "C" closures + // * "S" shim + print("::"); + print(Ident.Name); + } + break; + } + default: + // FIXME parse remaining productions. + Error = true; + break; + } + + RecursionLevel -= 1; +} + +// = ["u"] ["_"] +Identifier Demangler::parseIdentifier() { + bool Punycode = consumeIf('u'); + uint64_t Bytes = parseDecimalNumber(); + + // Underscore resolves the ambiguity when identifier starts with a decimal + // digit or another underscore. + consumeIf('_'); + + if (Error || Bytes > Input.size() - Position) { + Error = true; + return {}; + } + StringView S = Input.substr(Position, Bytes); + Position += Bytes; + + if (!std::all_of(S.begin(), S.end(), isValid)) { + Error = true; + return {}; + } + + return {S, Punycode}; +} + +// Parses optional base 62 number. The presence of a number is determined using +// Tag. +void Demangler::parseOptionalBase62Number(char Tag) { + // Parsing result is currently unused. + if (consumeIf(Tag)) + parseBase62Number(); +} + +// Parses base 62 number with <0-9a-zA-Z> as digits. Number is terminated by +// "_". All values are offset by 1, so that "_" encodes 0, "0_" encodes 1, +// "1_" encodes 2, etc. +// +// = {<0-9a-zA-Z>} "_" +uint64_t Demangler::parseBase62Number() { + if (consumeIf('_')) + return 0; + + uint64_t Value = 0; + + while (true) { + uint64_t Digit; + char C = consume(); + + if (C == '_') { + break; + } else if (isDigit(C)) { + Digit = C - '0'; + } else if (isLower(C)) { + Digit = 10 + (C - 'a'); + } else if (isUpper(C)) { + Digit = 10 + 26 + (C - 'A'); + } else { + Error = true; + return 0; + } + + if (!mulAssign(Value, 62)) + return 0; + + if (!addAssign(Value, Digit)) + return 0; + } + + if (!addAssign(Value, 1)) + return 0; + + return Value; +} + +// Parses a decimal number that had been encoded without any leading zeros. +// +// = "0" +// | <1-9> {<0-9>} +uint64_t Demangler::parseDecimalNumber() { + char C = look(); + if (!isDigit(C)) { + Error = true; + return 0; + } + + if (C == '0') { + consume(); + return 0; + } + + uint64_t Value = 0; + + while (isDigit(look())) { + if (!mulAssign(Value, 10)) { + Error = true; + return 0; + } + + uint64_t D = consume() - '0'; + if (!addAssign(Value, D)) + return 0; + } + + return Value; +} diff --git a/llvm/test/Demangle/rust.test b/llvm/test/Demangle/rust.test new file mode 100644 index 0000000..1be0692 --- /dev/null +++ b/llvm/test/Demangle/rust.test @@ -0,0 +1,43 @@ +RUN: llvm-cxxfilt -n < %s | FileCheck --match-full-lines %s + +CHECK: a::main + _RNvC1a4main + +CHECK: hello::rust + _RNvCshGpAVYOtgW1_5hello4rust + +CHECK: a::b::c + _RNvNvC1a1b1c + +; Invalid mangled characters + +CHECK: _RNvC2a.1c + _RNvC2a.1c + +CHECK: _RNvC2a$1c + _RNvC2a$1c + +; Invalid identifier length (UINT64_MAX + 3, which happens to be ok after a wraparound). + +CHECK: _RNvC2ab18446744073709551618xy + _RNvC2ab18446744073709551618xy + +; Mangling scheme includes an optional encoding version. When present it would +; indicate an encoding we don't support yet. Check that it is rejected: + +CHECK: _R0NvC1a4main + _R0NvC1a4main + +; Early EOF + +CHECK: _RNv + _RNv + +CHECK: _RNvC + _RNvC + +CHECK: _RNvC1a5main + _RNvC1a5main + +CHECK: _RNvC1a20abc + _RNvC1a20abc diff --git a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp index 93d6322..ac569ab 100644 --- a/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp +++ b/llvm/tools/llvm-cxxfilt/llvm-cxxfilt.cpp @@ -97,6 +97,11 @@ static std::string demangle(const std::string &Mangled) { Undecorated = itaniumDemangle(DecoratedStr + 6, nullptr, nullptr, &Status); } + if (!Undecorated && + (DecoratedLength >= 2 && strncmp(DecoratedStr, "_R", 2) == 0)) { + Undecorated = rustDemangle(DecoratedStr, nullptr, nullptr, &Status); + } + std::string Result(Undecorated ? Prefix + Undecorated : Mangled); free(Undecorated); return Result; diff --git a/llvm/unittests/Demangle/CMakeLists.txt b/llvm/unittests/Demangle/CMakeLists.txt index 8db2595..4bcc9bb 100644 --- a/llvm/unittests/Demangle/CMakeLists.txt +++ b/llvm/unittests/Demangle/CMakeLists.txt @@ -7,5 +7,6 @@ add_llvm_unittest(DemangleTests DemangleTest.cpp ItaniumDemangleTest.cpp PartialDemangleTest.cpp + RustDemangleTest.cpp StringViewTest.cpp ) diff --git a/llvm/unittests/Demangle/RustDemangleTest.cpp b/llvm/unittests/Demangle/RustDemangleTest.cpp new file mode 100644 index 0000000..670e48e --- /dev/null +++ b/llvm/unittests/Demangle/RustDemangleTest.cpp @@ -0,0 +1,90 @@ +//===------------------ RustDemangleTest.cpp ------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/Demangle/Demangle.h" +#include "gmock/gmock.h" +#include "gtest/gtest.h" + +#include + +TEST(RustDemangle, Success) { + char *Demangled = + llvm::rustDemangle("_RNvC1a4main", nullptr, nullptr, nullptr); + EXPECT_STREQ(Demangled, "a::main"); + std::free(Demangled); + + // With status. + int Status = 0; + Demangled = llvm::rustDemangle("_RNvC1a4main", nullptr, nullptr, &Status); + EXPECT_EQ(Status, llvm::demangle_success); + EXPECT_STREQ(Demangled, "a::main"); + std::free(Demangled); + + // With status and length. + size_t N = 0; + Demangled = llvm::rustDemangle("_RNvC1a4main", nullptr, &N, &Status); + EXPECT_EQ(Status, llvm::demangle_success); + EXPECT_EQ(N, 8u); + EXPECT_STREQ(Demangled, "a::main"); + std::free(Demangled); +} + +TEST(RustDemangle, Invalid) { + int Status = 0; + char *Demangled = nullptr; + + // Invalid prefix. + Demangled = llvm::rustDemangle("_ABCDEF", nullptr, nullptr, &Status); + EXPECT_EQ(Status, llvm::demangle_invalid_mangled_name); + EXPECT_EQ(Demangled, nullptr); + + // Correct prefix but still invalid. + Demangled = llvm::rustDemangle("_RRR", nullptr, nullptr, &Status); + EXPECT_EQ(Status, llvm::demangle_invalid_mangled_name); + EXPECT_EQ(Demangled, nullptr); +} + +TEST(RustDemangle, OutputBufferWithoutLength) { + char *Buffer = static_cast(std::malloc(1024)); + ASSERT_NE(Buffer, nullptr); + + int Status = 0; + char *Demangled = + llvm::rustDemangle("_RNvC1a4main", Buffer, nullptr, &Status); + + EXPECT_EQ(Status, llvm::demangle_invalid_args); + EXPECT_EQ(Demangled, nullptr); + std::free(Buffer); +} + +TEST(RustDemangle, OutputBuffer) { + size_t N = 1024; + char *Buffer = static_cast(std::malloc(N)); + ASSERT_NE(Buffer, nullptr); + + int Status = 0; + char *Demangled = llvm::rustDemangle("_RNvC1a4main", Buffer, &N, &Status); + + EXPECT_EQ(Status, llvm::demangle_success); + EXPECT_EQ(Demangled, Buffer); + EXPECT_STREQ(Demangled, "a::main"); + std::free(Demangled); +} + +TEST(RustDemangle, SmallOutputBuffer) { + size_t N = 1; + char *Buffer = static_cast(std::malloc(N)); + ASSERT_NE(Buffer, nullptr); + + int Status = 0; + char *Demangled = llvm::rustDemangle("_RNvC1a4main", Buffer, &N, &Status); + + EXPECT_EQ(Status, llvm::demangle_success); + EXPECT_STREQ(Demangled, "a::main"); + std::free(Demangled); +} -- 2.7.4