From 77c397f465f170df8f39f79fde93b724205b8009 Mon Sep 17 00:00:00 2001 From: Pavel Labath Date: Fri, 29 Jun 2018 11:20:29 +0000 Subject: [PATCH] UUID: Add support for arbitrary-sized module IDs Summary: The data structure is optimized for the case where the UUID size is <= 20 bytes (standard length emitted by the GNU linkers), but larger sizes are also possible. I've modified the string conversion function to support the new sizes as well. For standard UUIDs it maintains the traditional formatting (4-2-2-2-6). If a UUID is shorter, we just cut this sequence short, and for longer UUIDs it will just repeat the last 6-byte block as long as necessary. I've also modified ObjectFileELF to take advantage of the new UUIDs and avoid manually padding the UUID to 16 bytes. While there, I also made sure the computed UUID does not depend on host endianness. Reviewers: clayborg, lemo, sas, davide, espindola Subscribers: emaste, arichardson, lldb-commits Differential Revision: https://reviews.llvm.org/D48633 llvm-svn: 335963 --- lldb/include/lldb/Utility/UUID.h | 52 +++++----- lldb/source/Interpreter/OptionValueUUID.cpp | 14 +-- .../Plugins/ObjectFile/ELF/ObjectFileELF.cpp | 34 +++--- lldb/source/Utility/UUID.cpp | 114 +++++++-------------- lldb/unittests/Utility/UUIDTest.cpp | 11 ++ 5 files changed, 97 insertions(+), 128 deletions(-) diff --git a/lldb/include/lldb/Utility/UUID.h b/lldb/include/lldb/Utility/UUID.h index a6ce7de..d42c888 100644 --- a/lldb/include/lldb/Utility/UUID.h +++ b/lldb/include/lldb/Utility/UUID.h @@ -27,12 +27,6 @@ namespace lldb_private { class UUID { public: - // Most UUIDs are 16 bytes, but some Linux build-ids (SHA1) are 20. - typedef uint8_t ValueType[20]; - - //------------------------------------------------------------------ - // Constructors and Destructors - //------------------------------------------------------------------ UUID() = default; /// Creates a UUID from the data pointed to by the bytes argument. No special @@ -64,18 +58,16 @@ public: return UUID(bytes); } - void Clear() { m_num_uuid_bytes = 0; } + void Clear() { m_bytes.clear(); } void Dump(Stream *s) const; - llvm::ArrayRef GetBytes() const { - return {m_uuid, m_num_uuid_bytes}; - } + llvm::ArrayRef GetBytes() const { return m_bytes; } explicit operator bool() const { return IsValid(); } - bool IsValid() const { return m_num_uuid_bytes > 0; } + bool IsValid() const { return !m_bytes.empty(); } - std::string GetAsString(const char *separator = nullptr) const; + std::string GetAsString(llvm::StringRef separator = "-") const; size_t SetFromStringRef(llvm::StringRef str, uint32_t num_uuid_bytes = 16); @@ -99,24 +91,34 @@ public: /// The original string, with all decoded bytes removed. //------------------------------------------------------------------ static llvm::StringRef - DecodeUUIDBytesFromString(llvm::StringRef str, ValueType &uuid_bytes, - uint32_t &bytes_decoded, + DecodeUUIDBytesFromString(llvm::StringRef str, + llvm::SmallVectorImpl &uuid_bytes, uint32_t num_uuid_bytes = 16); private: - UUID(llvm::ArrayRef bytes); - - uint32_t m_num_uuid_bytes = 0; // Should be 0, 16 or 20 - ValueType m_uuid; -}; + UUID(llvm::ArrayRef bytes) : m_bytes(bytes.begin(), bytes.end()) {} -bool operator==(const UUID &lhs, const UUID &rhs); -bool operator!=(const UUID &lhs, const UUID &rhs); -bool operator<(const UUID &lhs, const UUID &rhs); -bool operator<=(const UUID &lhs, const UUID &rhs); -bool operator>(const UUID &lhs, const UUID &rhs); -bool operator>=(const UUID &lhs, const UUID &rhs); + // GNU ld generates 20-byte build-ids. Size chosen to avoid heap allocations + // for this case. + llvm::SmallVector m_bytes; + friend bool operator==(const UUID &LHS, const UUID &RHS) { + return LHS.m_bytes == RHS.m_bytes; + } + friend bool operator!=(const UUID &LHS, const UUID &RHS) { + return !(LHS == RHS); + } + friend bool operator<(const UUID &LHS, const UUID &RHS) { + return LHS.m_bytes < RHS.m_bytes; + } + friend bool operator<=(const UUID &LHS, const UUID &RHS) { + return !(RHS < LHS); + } + friend bool operator>(const UUID &LHS, const UUID &RHS) { return RHS < LHS; } + friend bool operator>=(const UUID &LHS, const UUID &RHS) { + return !(LHS < RHS); + } +}; } // namespace lldb_private #endif // LLDB_UTILITY_UUID_H diff --git a/lldb/source/Interpreter/OptionValueUUID.cpp b/lldb/source/Interpreter/OptionValueUUID.cpp index b5c6600..b940584 100644 --- a/lldb/source/Interpreter/OptionValueUUID.cpp +++ b/lldb/source/Interpreter/OptionValueUUID.cpp @@ -76,21 +76,17 @@ size_t OptionValueUUID::AutoComplete(CommandInterpreter &interpreter, ExecutionContext exe_ctx(interpreter.GetExecutionContext()); Target *target = exe_ctx.GetTargetPtr(); if (target) { - const size_t num_modules = target->GetImages().GetSize(); - if (num_modules > 0) { - UUID::ValueType uuid_bytes; - uint32_t num_bytes_decoded = 0; - UUID::DecodeUUIDBytesFromString(s, uuid_bytes, num_bytes_decoded); + llvm::SmallVector uuid_bytes; + if (UUID::DecodeUUIDBytesFromString(s, uuid_bytes).empty()) { + const size_t num_modules = target->GetImages().GetSize(); for (size_t i = 0; i < num_modules; ++i) { ModuleSP module_sp(target->GetImages().GetModuleAtIndex(i)); if (module_sp) { const UUID &module_uuid = module_sp->GetUUID(); if (module_uuid.IsValid()) { - llvm::ArrayRef decoded_bytes(uuid_bytes, - num_bytes_decoded); llvm::ArrayRef module_bytes = module_uuid.GetBytes(); - if (module_bytes.size() >= num_bytes_decoded && - module_bytes.take_front(num_bytes_decoded) == decoded_bytes) { + if (module_bytes.size() >= uuid_bytes.size() && + module_bytes.take_front(uuid_bytes.size()).equals(uuid_bytes)) { matches.AppendString(module_uuid.GetAsString()); } } diff --git a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp index de7b391..87019083 100644 --- a/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp +++ b/lldb/source/Plugins/ObjectFile/ELF/ObjectFileELF.cpp @@ -730,16 +730,17 @@ size_t ObjectFileELF::GetModuleSpecifications( data.GetDataStart(), data.GetByteSize()); } } + using u32le = llvm::support::ulittle32_t; if (gnu_debuglink_crc) { // Use 4 bytes of crc from the .gnu_debuglink section. - uint32_t uuidt[4] = {gnu_debuglink_crc, 0, 0, 0}; - uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data(gnu_debuglink_crc); + uuid = UUID::fromData(&data, sizeof(data)); } else if (core_notes_crc) { // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make // it look different form .gnu_debuglink crc followed by 4 bytes // of note segments crc. - uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0}; - uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)}; + uuid = UUID::fromData(data, sizeof(data)); } } @@ -909,6 +910,7 @@ bool ObjectFileELF::GetUUID(lldb_private::UUID *uuid) { if (!ParseSectionHeaders() && GetType() != ObjectFile::eTypeCoreFile) return false; + using u32le = llvm::support::ulittle32_t; if (m_uuid.IsValid()) { // We have the full build id uuid. *uuid = m_uuid; @@ -925,8 +927,8 @@ bool ObjectFileELF::GetUUID(lldb_private::UUID *uuid) { // Use 8 bytes - first 4 bytes for *magic* prefix, mainly to make it look // different form .gnu_debuglink crc - followed by 4 bytes of note // segments crc. - uint32_t uuidt[4] = {g_core_uuid_magic, core_notes_crc, 0, 0}; - m_uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data[] = {u32le(g_core_uuid_magic), u32le(core_notes_crc)}; + m_uuid = UUID::fromData(data, sizeof(data)); } } else { if (!m_gnu_debuglink_crc) @@ -934,8 +936,8 @@ bool ObjectFileELF::GetUUID(lldb_private::UUID *uuid) { calc_gnu_debuglink_crc32(m_data.GetDataStart(), m_data.GetByteSize()); if (m_gnu_debuglink_crc) { // Use 4 bytes of crc from the .gnu_debuglink section. - uint32_t uuidt[4] = {m_gnu_debuglink_crc, 0, 0, 0}; - m_uuid = UUID::fromData(uuidt, sizeof(uuidt)); + u32le data(m_gnu_debuglink_crc); + m_uuid = UUID::fromData(&data, sizeof(data)); } } @@ -1273,18 +1275,16 @@ ObjectFileELF::RefineModuleDetailsFromNote(lldb_private::DataExtractor &data, // Only bother processing this if we don't already have the uuid set. if (!uuid.IsValid()) { // 16 bytes is UUID|MD5, 20 bytes is SHA1. Other linkers may produce a - // build-id of a different - // length. Accept it as long as it's at least 4 bytes as it will be - // better than our own crc32. - if (note.n_descsz >= 4 && note.n_descsz <= 20) { - uint8_t uuidbuf[20]; - if (data.GetU8(&offset, &uuidbuf, note.n_descsz) == nullptr) { + // build-id of a different length. Accept it as long as it's at least + // 4 bytes as it will be better than our own crc32. + if (note.n_descsz >= 4) { + if (const uint8_t *buf = data.PeekData(offset, note.n_descsz)) { + // Save the build id as the UUID for the module. + uuid = UUID::fromData(buf, note.n_descsz); + } else { error.SetErrorString("failed to read GNU_BUILD_ID note payload"); return error; } - - // Save the build id as the UUID for the module. - uuid = UUID::fromData(uuidbuf, note.n_descsz); } } break; diff --git a/lldb/source/Utility/UUID.cpp b/lldb/source/Utility/UUID.cpp index 3dfd5d9..623ad69 100644 --- a/lldb/source/Utility/UUID.cpp +++ b/lldb/source/Utility/UUID.cpp @@ -13,6 +13,7 @@ // Project includes #include "lldb/Utility/Stream.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Format.h" // C Includes #include @@ -21,35 +22,35 @@ using namespace lldb_private; -UUID::UUID(llvm::ArrayRef bytes) { - if (bytes.size() != 20 && bytes.size() != 16) - bytes = {}; - - m_num_uuid_bytes = bytes.size(); - std::memcpy(m_uuid, bytes.data(), bytes.size()); +// Whether to put a separator after count uuid bytes. +// For the first 16 bytes we follow the traditional UUID format. After that, we +// simply put a dash after every 6 bytes. +static inline bool separate(size_t count) { + if (count >= 10) + return (count - 10) % 6 == 0; + + switch (count) { + case 4: + case 6: + case 8: + return true; + default: + return false; + } } -std::string UUID::GetAsString(const char *separator) const { +std::string UUID::GetAsString(llvm::StringRef separator) const { std::string result; - char buf[256]; - if (!separator) - separator = "-"; - const uint8_t *u = GetBytes().data(); - if (sizeof(buf) > - (size_t)snprintf(buf, sizeof(buf), "%2.2X%2.2X%2.2X%2.2X%s%2.2X%2.2X%s%2." - "2X%2.2X%s%2.2X%2.2X%s%2.2X%2.2X%2.2X%" - "2.2X%2.2X%2.2X", - u[0], u[1], u[2], u[3], separator, u[4], u[5], separator, - u[6], u[7], separator, u[8], u[9], separator, u[10], - u[11], u[12], u[13], u[14], u[15])) { - result.append(buf); - if (m_num_uuid_bytes == 20) { - if (sizeof(buf) > (size_t)snprintf(buf, sizeof(buf), - "%s%2.2X%2.2X%2.2X%2.2X", separator, - u[16], u[17], u[18], u[19])) - result.append(buf); - } + llvm::raw_string_ostream os(result); + + for (auto B : llvm::enumerate(GetBytes())) { + if (separate(B.index())) + os << separator; + + os << llvm::format_hex_no_prefix(B.value(), 2, true); } + os.flush(); + return result; } @@ -64,25 +65,24 @@ static inline int xdigit_to_int(char ch) { return ch - '0'; } -llvm::StringRef UUID::DecodeUUIDBytesFromString(llvm::StringRef p, - ValueType &uuid_bytes, - uint32_t &bytes_decoded, - uint32_t num_uuid_bytes) { - ::memset(uuid_bytes, 0, sizeof(uuid_bytes)); - size_t uuid_byte_idx = 0; +llvm::StringRef +UUID::DecodeUUIDBytesFromString(llvm::StringRef p, + llvm::SmallVectorImpl &uuid_bytes, + uint32_t num_uuid_bytes) { + uuid_bytes.clear(); while (!p.empty()) { if (isxdigit(p[0]) && isxdigit(p[1])) { int hi_nibble = xdigit_to_int(p[0]); int lo_nibble = xdigit_to_int(p[1]); // Translate the two hex nibble characters into a byte - uuid_bytes[uuid_byte_idx] = (hi_nibble << 4) + lo_nibble; + uuid_bytes.push_back((hi_nibble << 4) + lo_nibble); // Skip both hex digits p = p.drop_front(2); // Increment the byte that we are decoding within the UUID value and // break out if we are done - if (++uuid_byte_idx == num_uuid_bytes) + if (uuid_bytes.size() == num_uuid_bytes) break; } else if (p.front() == '-') { // Skip dashes @@ -92,11 +92,6 @@ llvm::StringRef UUID::DecodeUUIDBytesFromString(llvm::StringRef p, break; } } - - // Clear trailing bytes to 0. - for (uint32_t i = uuid_byte_idx; i < sizeof(ValueType); i++) - uuid_bytes[i] = 0; - bytes_decoded = uuid_byte_idx; return p; } @@ -106,52 +101,17 @@ size_t UUID::SetFromStringRef(llvm::StringRef str, uint32_t num_uuid_bytes) { // Skip leading whitespace characters p = p.ltrim(); - ValueType bytes; - uint32_t bytes_decoded = 0; + llvm::SmallVector bytes; llvm::StringRef rest = - UUID::DecodeUUIDBytesFromString(p, bytes, bytes_decoded, num_uuid_bytes); + UUID::DecodeUUIDBytesFromString(p, bytes, num_uuid_bytes); // If we successfully decoded a UUID, return the amount of characters that // were consumed - if (bytes_decoded == num_uuid_bytes) { - *this = fromData(bytes, bytes_decoded); + if (bytes.size() == num_uuid_bytes) { + *this = fromData(bytes); return str.size() - rest.size(); } // Else return zero to indicate we were not able to parse a UUID value return 0; } - -bool lldb_private::operator==(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return lhs.GetBytes() == rhs.GetBytes(); -} - -bool lldb_private::operator!=(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return !(lhs == rhs); -} - -bool lldb_private::operator<(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - if (lhs.GetBytes().size() != rhs.GetBytes().size()) - return lhs.GetBytes().size() < rhs.GetBytes().size(); - - return std::memcmp(lhs.GetBytes().data(), rhs.GetBytes().data(), - lhs.GetBytes().size()); -} - -bool lldb_private::operator<=(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return !(lhs > rhs); -} - -bool lldb_private::operator>(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return rhs < lhs; -} - -bool lldb_private::operator>=(const lldb_private::UUID &lhs, - const lldb_private::UUID &rhs) { - return !(lhs < rhs); -} diff --git a/lldb/unittests/Utility/UUIDTest.cpp b/lldb/unittests/Utility/UUIDTest.cpp index 8c64d49..e848441 100644 --- a/lldb/unittests/Utility/UUIDTest.cpp +++ b/lldb/unittests/Utility/UUIDTest.cpp @@ -71,3 +71,14 @@ TEST(UUIDTest, SetFromStringRef) { 32u, u.SetFromStringRef("404142434445464748494a4b4c4d4e4f-50515253", 16)); EXPECT_EQ(UUID::fromData("@ABCDEFGHIJKLMNO", 16), u); } + +TEST(UUIDTest, StringConverion) { + EXPECT_EQ("40414243", UUID::fromData("@ABC", 4).GetAsString()); + EXPECT_EQ("40414243-4445-4647", UUID::fromData("@ABCDEFG", 8).GetAsString()); + EXPECT_EQ("40414243-4445-4647-4849-4A4B", + UUID::fromData("@ABCDEFGHIJK", 12).GetAsString()); + EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F", + UUID::fromData("@ABCDEFGHIJKLMNO", 16).GetAsString()); + EXPECT_EQ("40414243-4445-4647-4849-4A4B4C4D4E4F-50515253", + UUID::fromData("@ABCDEFGHIJKLMNOPQRS", 20).GetAsString()); +} -- 2.7.4