From 668ac94ba46d65cdedbe4194243c1d46c5f62ff1 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 24 Oct 2018 06:58:42 +0000 Subject: [PATCH] [clangd] Truncate SymbolID to 16 bytes. Summary: The goal is 8 bytes, which has a nonzero risk of collisions with huge indexes. This patch should shake out any issues with truncation at all, we can lower further later. Reviewers: ioeric Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D53587 llvm-svn: 345113 --- clang-tools-extra/clangd/index/Index.cpp | 7 ++++-- clang-tools-extra/clangd/index/Index.h | 9 +++++--- clang-tools-extra/clangd/index/Serialization.cpp | 4 ++-- .../unittests/clangd/SerializationTests.cpp | 27 +++++++++++----------- 4 files changed, 26 insertions(+), 21 deletions(-) diff --git a/clang-tools-extra/clangd/index/Index.cpp b/clang-tools-extra/clangd/index/Index.cpp index a803be4..69fe771 100644 --- a/clang-tools-extra/clangd/index/Index.cpp +++ b/clang-tools-extra/clangd/index/Index.cpp @@ -43,8 +43,11 @@ raw_ostream &operator<<(raw_ostream &OS, const SymbolLocation &L) { << "-" << L.End.line() << ":" << L.End.column() << ")"; } -SymbolID::SymbolID(StringRef USR) - : HashValue(SHA1::hash(arrayRefFromStringRef(USR))) {} +SymbolID::SymbolID(StringRef USR) { + auto Hash = SHA1::hash(arrayRefFromStringRef(USR)); + static_assert(sizeof(Hash) >= RawSize, "RawSize larger than SHA1"); + memcpy(HashValue.data(), Hash.data(), RawSize); +} raw_ostream &operator<<(raw_ostream &OS, const SymbolID &ID) { return OS << toHex(ID.raw()); diff --git a/clang-tools-extra/clangd/index/Index.h b/clang-tools-extra/clangd/index/Index.h index 9fe6b38..85759ca 100644 --- a/clang-tools-extra/clangd/index/Index.h +++ b/clang-tools-extra/clangd/index/Index.h @@ -89,7 +89,7 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &); // The class identifies a particular C++ symbol (class, function, method, etc). // // As USRs (Unified Symbol Resolution) could be large, especially for functions -// with long type arguments, SymbolID is using 160-bits SHA1(USR) values to +// with long type arguments, SymbolID is using truncated SHA1(USR) values to // guarantee the uniqueness of symbols while using a relatively small amount of // memory (vs storing USRs directly). // @@ -106,13 +106,16 @@ public: return HashValue < Sym.HashValue; } - constexpr static size_t RawSize = 20; + // The stored hash is truncated to RawSize bytes. + // This trades off memory against the number of symbols we can handle. + // FIXME: can we reduce this further to 8 bytes? + constexpr static size_t RawSize = 16; llvm::StringRef raw() const { return StringRef(reinterpret_cast(HashValue.data()), RawSize); } static SymbolID fromRaw(llvm::StringRef); - // Returns a 40-bytes hex encoded string. + // Returns a hex encoded string. std::string str() const; static llvm::Expected fromStr(llvm::StringRef); diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index fa08eb1..7d02e9c 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -300,7 +300,7 @@ Symbol readSymbol(Reader &Data, ArrayRef Strings) { // REFS ENCODING // A refs section has data grouped by Symbol. Each symbol has: -// - SymbolID: 20 bytes +// - SymbolID: 16 bytes // - NumRefs: varint // - Ref[NumRefs] // Fields of Ref are encoded in turn, see implementation. @@ -338,7 +338,7 @@ std::pair> readRefs(Reader &Data, // The current versioning scheme is simple - non-current versions are rejected. // If you make a breaking change, bump this version number to invalidate stored // data. Later we may want to support some backward compatibility. -constexpr static uint32_t Version = 5; +constexpr static uint32_t Version = 6; Expected readRIFF(StringRef Data) { auto RIFF = riff::readFile(Data); diff --git a/clang-tools-extra/unittests/clangd/SerializationTests.cpp b/clang-tools-extra/unittests/clangd/SerializationTests.cpp index 16b16e5..af3290f 100644 --- a/clang-tools-extra/unittests/clangd/SerializationTests.cpp +++ b/clang-tools-extra/unittests/clangd/SerializationTests.cpp @@ -27,7 +27,7 @@ namespace { const char *YAML = R"( --- !Symbol -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 +ID: 057557CEBF6E6B2DD437FBF60CC58F35 Name: 'Foo1' Scope: 'clang::' SymInfo: @@ -53,7 +53,7 @@ IncludeHeaders: ... --- !Symbol -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 +ID: 057557CEBF6E6B2DD437FBF60CC58F36 Name: 'Foo2' Scope: 'clang::' SymInfo: @@ -72,7 +72,7 @@ Signature: '-sig' CompletionSnippetSuffix: '-snippet' ... !Refs -ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 +ID: 057557CEBF6E6B2DD437FBF60CC58F35 References: - Kind: 4 Location: @@ -98,15 +98,14 @@ TEST(SerializationTest, YAMLConversions) { auto ParsedYAML = readIndexFile(YAML); ASSERT_TRUE(bool(ParsedYAML)) << ParsedYAML.takeError(); ASSERT_TRUE(bool(ParsedYAML->Symbols)); - EXPECT_THAT( - *ParsedYAML->Symbols, - UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"), - ID("057557CEBF6E6B2DD437FBF60CC58F352D1DF858"))); + EXPECT_THAT(*ParsedYAML->Symbols, + UnorderedElementsAre(ID("057557CEBF6E6B2DD437FBF60CC58F35"), + ID("057557CEBF6E6B2DD437FBF60CC58F36"))); auto Sym1 = *ParsedYAML->Symbols->find( - cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF856"))); + cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35"))); auto Sym2 = *ParsedYAML->Symbols->find( - cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F352D1DF858"))); + cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F36"))); EXPECT_THAT(Sym1, QName("clang::Foo1")); EXPECT_EQ(Sym1.Signature, ""); @@ -128,11 +127,11 @@ TEST(SerializationTest, YAMLConversions) { EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); ASSERT_TRUE(bool(ParsedYAML->Refs)); - EXPECT_THAT(*ParsedYAML->Refs, - UnorderedElementsAre( - Pair(cantFail(SymbolID::fromStr( - "057557CEBF6E6B2DD437FBF60CC58F352D1DF856")), - testing::SizeIs(1)))); + EXPECT_THAT( + *ParsedYAML->Refs, + UnorderedElementsAre( + Pair(cantFail(SymbolID::fromStr("057557CEBF6E6B2DD437FBF60CC58F35")), + testing::SizeIs(1)))); auto Ref1 = ParsedYAML->Refs->begin()->second.front(); EXPECT_EQ(Ref1.Kind, RefKind::Reference); EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc"); -- 2.7.4