From b515fabb3bca172f13a52d144824b56180dad158 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Thu, 18 Oct 2018 10:43:50 +0000 Subject: [PATCH] [clangd] Encode Line/Column as a 32-bits integer. Summary: This would buy us more memory. Using a 32-bits integer is enough for most human-readable source code (up to 4M lines and 4K columns). Previsouly, we used 8 bytes for a position, now 4 bytes, it would save us 8 bytes for each Ref and each Symbol instance. For LLVM-project binary index file, we save ~13% memory. | Before | After | | 412MB | 355MB | Reviewers: sammccall Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D53363 llvm-svn: 344735 --- clang-tools-extra/clangd/FindSymbols.cpp | 8 ++--- clang-tools-extra/clangd/XRefs.cpp | 8 ++--- clang-tools-extra/clangd/index/Index.cpp | 24 ++++++++++++-- clang-tools-extra/clangd/index/Index.h | 23 +++++++++++--- clang-tools-extra/clangd/index/Serialization.cpp | 8 ++--- clang-tools-extra/clangd/index/SymbolCollector.cpp | 4 +-- .../clangd/index/YAMLSerialization.cpp | 37 +++++++++++++++++++--- .../unittests/clangd/FileIndexTests.cpp | 8 ++--- clang-tools-extra/unittests/clangd/IndexTests.cpp | 23 +++++++++++--- .../unittests/clangd/SymbolCollectorTests.cpp | 30 +++++++++--------- 10 files changed, 126 insertions(+), 47 deletions(-) diff --git a/clang-tools-extra/clangd/FindSymbols.cpp b/clang-tools-extra/clangd/FindSymbols.cpp index 0b00012..129674f 100644 --- a/clang-tools-extra/clangd/FindSymbols.cpp +++ b/clang-tools-extra/clangd/FindSymbols.cpp @@ -140,10 +140,10 @@ getWorkspaceSymbols(StringRef Query, int Limit, const SymbolIndex *const Index, Location L; L.uri = URIForFile((*Path)); Position Start, End; - Start.line = CD.Start.Line; - Start.character = CD.Start.Column; - End.line = CD.End.Line; - End.character = CD.End.Column; + Start.line = CD.Start.line(); + Start.character = CD.Start.column(); + End.line = CD.End.line(); + End.character = CD.End.column(); L.range = {Start, End}; SymbolKind SK = indexSymbolKindToSymbolKind(Sym.SymInfo.Kind); std::string Scope = Sym.Scope; diff --git a/clang-tools-extra/clangd/XRefs.cpp b/clang-tools-extra/clangd/XRefs.cpp index 6dc65dc..54ab994 100644 --- a/clang-tools-extra/clangd/XRefs.cpp +++ b/clang-tools-extra/clangd/XRefs.cpp @@ -57,10 +57,10 @@ llvm::Optional toLSPLocation(const SymbolLocation &Loc, } Location LSPLoc; LSPLoc.uri = URIForFile(*Path); - LSPLoc.range.start.line = Loc.Start.Line; - LSPLoc.range.start.character = Loc.Start.Column; - LSPLoc.range.end.line = Loc.End.Line; - LSPLoc.range.end.character = Loc.End.Column; + LSPLoc.range.start.line = Loc.Start.line(); + LSPLoc.range.start.character = Loc.Start.column(); + LSPLoc.range.end.line = Loc.End.line(); + LSPLoc.range.end.character = Loc.End.column(); return LSPLoc; } diff --git a/clang-tools-extra/clangd/index/Index.cpp b/clang-tools-extra/clangd/index/Index.cpp index 058be6e..3c61137 100644 --- a/clang-tools-extra/clangd/index/Index.cpp +++ b/clang-tools-extra/clangd/index/Index.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "Index.h" +#include "Logger.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/Error.h" @@ -18,11 +19,30 @@ namespace clang { namespace clangd { using namespace llvm; +constexpr uint32_t SymbolLocation::Position::MaxLine; +constexpr uint32_t SymbolLocation::Position::MaxColumn; +void SymbolLocation::Position::setLine(uint32_t L) { + if (L > MaxLine) { + log("Set an overflowed Line {0}", L); + Line = MaxLine; + return; + } + Line = L; +} +void SymbolLocation::Position::setColumn(uint32_t Col) { + if (Col > MaxColumn) { + log("Set an overflowed Column {0}", Col); + Column = MaxColumn; + return; + } + Column = Col; +} + raw_ostream &operator<<(raw_ostream &OS, const SymbolLocation &L) { if (!L) return OS << "(none)"; - return OS << L.FileURI << "[" << L.Start.Line << ":" << L.Start.Column << "-" - << L.End.Line << ":" << L.End.Column << ")"; + return OS << L.FileURI << "[" << L.Start.line() << ":" << L.Start.column() + << "-" << L.End.line() << ":" << L.End.column() << ")"; } SymbolID::SymbolID(StringRef USR) diff --git a/clang-tools-extra/clangd/index/Index.h b/clang-tools-extra/clangd/index/Index.h index 9f09375..1ec856b 100644 --- a/clang-tools-extra/clangd/index/Index.h +++ b/clang-tools-extra/clangd/index/Index.h @@ -33,10 +33,23 @@ namespace clangd { struct SymbolLocation { // Specify a position (Line, Column) of symbol. Using Line/Column allows us to // build LSP responses without reading the file content. + // + // Position is encoded into 32 bits to save space. + // If Line/Column overflow, the value will be their maximum value. struct Position { - uint32_t Line = 0; // 0-based + void setLine(uint32_t Line); + uint32_t line() const { return Line; } + void setColumn(uint32_t Column); + uint32_t column() const { return Column; } + + static constexpr uint32_t MaxLine = (1 << 20) - 1; + static constexpr uint32_t MaxColumn = (1 << 12) - 1; + + // Clients should use getters and setters to access these members. + // FIXME: hide these members. + uint32_t Line : 20; // 0-based // Using UTF-16 code units. - uint32_t Column = 0; // 0-based + uint32_t Column : 12; // 0-based }; // The URI of the source file where a symbol occurs. @@ -50,11 +63,13 @@ struct SymbolLocation { }; inline bool operator==(const SymbolLocation::Position &L, const SymbolLocation::Position &R) { - return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column); + return std::make_tuple(L.line(), L.column()) == + std::make_tuple(R.line(), R.column()); } inline bool operator<(const SymbolLocation::Position &L, const SymbolLocation::Position &R) { - return std::tie(L.Line, L.Column) < std::tie(R.Line, R.Column); + return std::make_tuple(L.line(), L.column()) < + std::make_tuple(R.line(), R.column()); } inline bool operator==(const SymbolLocation &L, const SymbolLocation &R) { return std::tie(L.FileURI, L.Start, L.End) == diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index 35cca727..d1f637a 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -232,8 +232,8 @@ void writeLocation(const SymbolLocation &Loc, const StringTableOut &Strings, raw_ostream &OS) { writeVar(Strings.index(Loc.FileURI), OS); for (const auto &Endpoint : {Loc.Start, Loc.End}) { - writeVar(Endpoint.Line, OS); - writeVar(Endpoint.Column, OS); + writeVar(Endpoint.line(), OS); + writeVar(Endpoint.column(), OS); } } @@ -241,8 +241,8 @@ SymbolLocation readLocation(Reader &Data, ArrayRef Strings) { SymbolLocation Loc; Loc.FileURI = Data.consumeString(Strings); for (auto *Endpoint : {&Loc.Start, &Loc.End}) { - Endpoint->Line = Data.consumeVar(); - Endpoint->Column = Data.consumeVar(); + Endpoint->setLine(Data.consumeVar()); + Endpoint->setColumn(Data.consumeVar()); } return Loc; } diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 75f0eff..0cbc1c8 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -192,8 +192,8 @@ getTokenRange(SourceLocation TokLoc, const SourceManager &SM, auto CreatePosition = [&SM](SourceLocation Loc) { auto LSPLoc = sourceLocToPosition(SM, Loc); SymbolLocation::Position Pos; - Pos.Line = LSPLoc.line; - Pos.Column = LSPLoc.character; + Pos.setLine(LSPLoc.line); + Pos.setColumn(LSPLoc.character); return Pos; }; diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp index 06c4ef78..5c0f3b3 100644 --- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp +++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp @@ -19,6 +19,7 @@ #include "dex/Dex.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Errc.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/YAMLTraits.h" @@ -36,6 +37,13 @@ struct VariantEntry { llvm::Optional Symbol; llvm::Optional Refs; }; +// A class helps YAML to serialize the 32-bit encoded position (Line&Column), +// as YAMLIO can't directly map bitfields. +struct YPosition { + uint32_t Line; + uint32_t Column; +}; + } // namespace namespace llvm { namespace yaml { @@ -94,18 +102,39 @@ struct NormalizedSymbolOrigin { uint8_t Origin = 0; }; -template <> struct MappingTraits { - static void mapping(IO &IO, SymbolLocation::Position &Value) { +template <> struct MappingTraits { + static void mapping(IO &IO, YPosition &Value) { IO.mapRequired("Line", Value.Line); IO.mapRequired("Column", Value.Column); } }; +struct NormalizedPosition { + using Position = clang::clangd::SymbolLocation::Position; + NormalizedPosition(IO &) {} + NormalizedPosition(IO &, const Position &Pos) { + P.Line = Pos.line(); + P.Column = Pos.column(); + } + + Position denormalize(IO &) { + Position Pos; + Pos.setLine(P.Line); + Pos.setColumn(P.Column); + return Pos; + } + YPosition P; +}; + template <> struct MappingTraits { static void mapping(IO &IO, SymbolLocation &Value) { IO.mapRequired("FileURI", Value.FileURI); - IO.mapRequired("Start", Value.Start); - IO.mapRequired("End", Value.End); + MappingNormalization NStart( + IO, Value.Start); + IO.mapRequired("Start", NStart->P); + MappingNormalization NEnd( + IO, Value.End); + IO.mapRequired("End", NEnd->P); } }; diff --git a/clang-tools-extra/unittests/clangd/FileIndexTests.cpp b/clang-tools-extra/unittests/clangd/FileIndexTests.cpp index 1295a5c..1ead937 100644 --- a/clang-tools-extra/unittests/clangd/FileIndexTests.cpp +++ b/clang-tools-extra/unittests/clangd/FileIndexTests.cpp @@ -32,10 +32,10 @@ using testing::Pair; using testing::UnorderedElementsAre; MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(arg.Location.Start.line(), arg.Location.Start.column(), + arg.Location.End.line(), arg.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; } MATCHER_P(DeclURI, U, "") { return arg.CanonicalDeclaration.FileURI == U; } diff --git a/clang-tools-extra/unittests/clangd/IndexTests.cpp b/clang-tools-extra/unittests/clangd/IndexTests.cpp index 7279291..2e6ce8f 100644 --- a/clang-tools-extra/unittests/clangd/IndexTests.cpp +++ b/clang-tools-extra/unittests/clangd/IndexTests.cpp @@ -31,13 +31,28 @@ namespace { MATCHER_P(Named, N, "") { return arg.Name == N; } MATCHER_P(RefRange, Range, "") { - return std::tie(arg.Location.Start.Line, arg.Location.Start.Column, - arg.Location.End.Line, arg.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(arg.Location.Start.line(), arg.Location.Start.column(), + arg.Location.End.line(), arg.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } MATCHER_P(FileURI, F, "") { return arg.Location.FileURI == F; } +TEST(SymbolLocation, Position) { + using Position = SymbolLocation::Position; + Position Pos; + + Pos.setLine(1); + EXPECT_EQ(1u, Pos.line()); + Pos.setColumn(2); + EXPECT_EQ(2u, Pos.column()); + + Pos.setLine(Position::MaxLine + 1); // overflow + EXPECT_EQ(Pos.line(), Position::MaxLine); + Pos.setColumn(Position::MaxColumn + 1); // overflow + EXPECT_EQ(Pos.column(), Position::MaxColumn); +} + TEST(SymbolSlab, FindAndIterate) { SymbolSlab::Builder B; B.insert(symbol("Z")); diff --git a/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp b/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp index 506d355..7e22919 100644 --- a/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp +++ b/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp @@ -63,19 +63,19 @@ MATCHER_P2(IncludeHeaderWithRef, IncludeHeader, References, "") { return (arg.IncludeHeader == IncludeHeader) && (arg.References == References); } MATCHER_P(DeclRange, Pos, "") { - return std::tie(arg.CanonicalDeclaration.Start.Line, - arg.CanonicalDeclaration.Start.Column, - arg.CanonicalDeclaration.End.Line, - arg.CanonicalDeclaration.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple(arg.CanonicalDeclaration.Start.line(), + arg.CanonicalDeclaration.Start.column(), + arg.CanonicalDeclaration.End.line(), + arg.CanonicalDeclaration.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(DefRange, Pos, "") { - return std::tie(arg.Definition.Start.Line, - arg.Definition.Start.Column, arg.Definition.End.Line, - arg.Definition.End.Column) == - std::tie(Pos.start.line, Pos.start.character, Pos.end.line, - Pos.end.character); + return std::make_tuple( + arg.Definition.Start.line(), arg.Definition.Start.column(), + arg.Definition.End.line(), arg.Definition.End.column()) == + std::make_tuple(Pos.start.line, Pos.start.character, Pos.end.line, + Pos.end.character); } MATCHER_P(RefCount, R, "") { return int(arg.References) == R; } MATCHER_P(ForCodeCompletion, IsIndexedForCodeCompletion, "") { @@ -86,10 +86,10 @@ MATCHER(Deprecated, "") { return arg.Flags & Symbol::Deprecated; } MATCHER(RefRange, "") { const Ref &Pos = testing::get<0>(arg); const Range &Range = testing::get<1>(arg); - return std::tie(Pos.Location.Start.Line, Pos.Location.Start.Column, - Pos.Location.End.Line, Pos.Location.End.Column) == - std::tie(Range.start.line, Range.start.character, Range.end.line, - Range.end.character); + return std::make_tuple(Pos.Location.Start.line(), Pos.Location.Start.column(), + Pos.Location.End.line(), Pos.Location.End.column()) == + std::make_tuple(Range.start.line, Range.start.character, + Range.end.line, Range.end.character); } testing::Matcher &> HaveRanges(const std::vector Ranges) { -- 2.7.4