From d81e3146e30b5b0bb2b5c740405305d58fd18bf3 Mon Sep 17 00:00:00 2001 From: Haojian Wu Date: Fri, 31 Aug 2018 12:54:13 +0000 Subject: [PATCH] [clangd] Collect symbol occurrences in SymbolCollector. SymbolCollector will be used for two cases: - collect Symbol type only, used for indexing preamble AST. - collect Symbol and SymbolOccurrences, used for indexing main AST. For finding local references from the AST, we will implement it in other ways. llvm-svn: 341208 --- clang-tools-extra/clangd/index/Index.cpp | 43 ++++++++++++ clang-tools-extra/clangd/index/Index.h | 81 ++++++++++++++++++++-- clang-tools-extra/clangd/index/SymbolCollector.cpp | 74 ++++++++++++++++---- clang-tools-extra/clangd/index/SymbolCollector.h | 16 +++++ .../unittests/clangd/SymbolCollectorTests.cpp | 78 +++++++++++++++++++-- 5 files changed, 265 insertions(+), 27 deletions(-) diff --git a/clang-tools-extra/clangd/index/Index.cpp b/clang-tools-extra/clangd/index/Index.cpp index eef4360..bee7e0a 100644 --- a/clang-tools-extra/clangd/index/Index.cpp +++ b/clang-tools-extra/clangd/index/Index.cpp @@ -128,5 +128,48 @@ SymbolSlab SymbolSlab::Builder::build() && { return SymbolSlab(std::move(NewArena), std::move(Symbols)); } +raw_ostream &operator<<(raw_ostream &OS, SymbolOccurrenceKind K) { + if (K == SymbolOccurrenceKind::Unknown) + return OS << "Unknown"; + static const std::vector Messages = {"Decl", "Def", "Ref"}; + bool VisitedOnce = false; + for (unsigned I = 0; I < Messages.size(); ++I) { + if (static_cast(K) & 1u << I) { + if (VisitedOnce) + OS << ", "; + OS << Messages[I]; + VisitedOnce = true; + } + } + return OS; +} + +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SymbolOccurrence &Occurrence) { + OS << Occurrence.Location << ":" << Occurrence.Kind; + return OS; +} + +void SymbolOccurrenceSlab::insert(const SymbolID &SymID, + const SymbolOccurrence &Occurrence) { + assert(!Frozen && + "Can't insert a symbol occurrence after the slab has been frozen!"); + auto &SymOccurrences = Occurrences[SymID]; + SymOccurrences.push_back(Occurrence); + SymOccurrences.back().Location.FileURI = + UniqueStrings.save(Occurrence.Location.FileURI); +} + +void SymbolOccurrenceSlab::freeze() { + // Deduplicate symbol occurrenes. + for (auto &IDAndOccurrence : Occurrences) { + auto &Occurrence = IDAndOccurrence.getSecond(); + std::sort(Occurrence.begin(), Occurrence.end()); + Occurrence.erase(std::unique(Occurrence.begin(), Occurrence.end()), + Occurrence.end()); + } + Frozen = true; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/Index.h b/clang-tools-extra/clangd/index/Index.h index c813ab8..f5b96b4 100644 --- a/clang-tools-extra/clangd/index/Index.h +++ b/clang-tools-extra/clangd/index/Index.h @@ -32,9 +32,6 @@ struct SymbolLocation { uint32_t Line = 0; // 0-based // Using UTF-16 code units. uint32_t Column = 0; // 0-based - bool operator==(const Position& P) const { - return Line == P.Line && Column == P.Column; - } }; // The URI of the source file where a symbol occurs. @@ -45,11 +42,23 @@ struct SymbolLocation { Position End; explicit operator bool() const { return !FileURI.empty(); } - bool operator==(const SymbolLocation& Loc) const { - return std::tie(FileURI, Start, End) == - std::tie(Loc.FileURI, Loc.Start, Loc.End); - } }; +inline bool operator==(const SymbolLocation::Position &L, + const SymbolLocation::Position &R) { + return std::tie(L.Line, L.Column) == std::tie(R.Line, R.Column); +} +inline bool operator<(const SymbolLocation::Position &L, + const SymbolLocation::Position &R) { + return std::tie(L.Line, L.Column) < std::tie(R.Line, R.Column); +} +inline bool operator==(const SymbolLocation &L, const SymbolLocation &R) { + return std::tie(L.FileURI, L.Start, L.End) == + std::tie(R.FileURI, R.Start, R.End); +} +inline bool operator<(const SymbolLocation &L, const SymbolLocation &R) { + return std::tie(L.FileURI, L.Start, L.End) < + std::tie(R.FileURI, R.Start, R.End); +} llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolLocation &); // The class identifies a particular C++ symbol (class, function, method, etc). @@ -314,6 +323,9 @@ inline SymbolOccurrenceKind operator&(SymbolOccurrenceKind A, return static_cast(static_cast(A) & static_cast(B)); } +static const SymbolOccurrenceKind AllOccurrenceKinds = + SymbolOccurrenceKind::Declaration | SymbolOccurrenceKind::Definition | + SymbolOccurrenceKind::Reference; // Represents a symbol occurrence in the source file. It could be a // declaration/definition/reference occurrence. @@ -324,6 +336,61 @@ struct SymbolOccurrence { SymbolLocation Location; SymbolOccurrenceKind Kind = SymbolOccurrenceKind::Unknown; }; +inline bool operator<(const SymbolOccurrence &L, const SymbolOccurrence &R) { + return std::tie(L.Location, L.Kind) < std::tie(R.Location, R.Kind); +} +inline bool operator==(const SymbolOccurrence &L, const SymbolOccurrence &R) { + return std::tie(L.Location, L.Kind) == std::tie(R.Location, R.Kind); +} +llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, + const SymbolOccurrence &Occurrence); + +// An efficient structure of storing large set of symbol occurrences in memory. +// Filenames are deduplicated. +class SymbolOccurrenceSlab { +public: + using const_iterator = + llvm::DenseMap>::const_iterator; + using iterator = const_iterator; + + SymbolOccurrenceSlab() : UniqueStrings(Arena) {} + + // Define move semantics for the slab, allowing assignment from an rvalue. + // Implicit move assignment is deleted by the compiler because + // StringSaver has a reference type member. + SymbolOccurrenceSlab(SymbolOccurrenceSlab &&Slab) = default; + SymbolOccurrenceSlab &operator=(SymbolOccurrenceSlab &&RHS) { + assert(RHS.Frozen && + "SymbolOcucrrenceSlab must be frozen when move assigned!"); + Arena = std::move(RHS.Arena); + Frozen = true; + Occurrences = std::move(RHS.Occurrences); + return *this; + } + + const_iterator begin() const { return Occurrences.begin(); } + const_iterator end() const { return Occurrences.end(); } + + // Adds a symbol occurrence. + // This is a deep copy: underlying FileURI will be owned by the slab. + void insert(const SymbolID &SymID, const SymbolOccurrence &Occurrence); + + llvm::ArrayRef find(const SymbolID &ID) const { + assert(Frozen && "SymbolOccurrenceSlab must be frozen before looking up!"); + auto It = Occurrences.find(ID); + if (It == Occurrences.end()) + return {}; + return It->second; + } + + void freeze(); + +private: + bool Frozen = false; + llvm::BumpPtrAllocator Arena; + llvm::UniqueStringSaver UniqueStrings; + llvm::DenseMap> Occurrences; +}; struct FuzzyFindRequest { /// \brief A query string for the fuzzy find. This is matched against symbols' diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 6c6a792..67c1939 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -182,7 +182,24 @@ getIncludeHeader(llvm::StringRef QName, const SourceManager &SM, return toURI(SM, Header, Opts); } -// Return the symbol location of the token at \p Loc. +// Return the symbol range of the token at \p TokLoc. +std::pair +getTokenRange(SourceLocation TokLoc, const SourceManager &SM, + const LangOptions &LangOpts) { + auto CreatePosition = [&SM](SourceLocation Loc) { + auto LSPLoc = sourceLocToPosition(SM, Loc); + SymbolLocation::Position Pos; + Pos.Line = LSPLoc.line; + Pos.Column = LSPLoc.character; + return Pos; + }; + + auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts); + return {CreatePosition(TokLoc), + CreatePosition(TokLoc.getLocWithOffset(TokenLength))}; +} + +// Return the symbol location of the token at \p TokLoc. llvm::Optional getTokenLocation(SourceLocation TokLoc, const SourceManager &SM, const SymbolCollector::Options &Opts, @@ -194,19 +211,9 @@ getTokenLocation(SourceLocation TokLoc, const SourceManager &SM, FileURIStorage = std::move(*U); SymbolLocation Result; Result.FileURI = FileURIStorage; - auto TokenLength = clang::Lexer::MeasureTokenLength(TokLoc, SM, LangOpts); - - auto CreatePosition = [&SM](SourceLocation Loc) { - auto LSPLoc = sourceLocToPosition(SM, Loc); - SymbolLocation::Position Pos; - Pos.Line = LSPLoc.line; - Pos.Column = LSPLoc.character; - return Pos; - }; - - Result.Start = CreatePosition(TokLoc); - auto EndLoc = TokLoc.getLocWithOffset(TokenLength); - Result.End = CreatePosition(EndLoc); + auto Range = getTokenRange(TokLoc, SM, LangOpts); + Result.Start = Range.first; + Result.End = Range.second; return std::move(Result); } @@ -224,6 +231,11 @@ bool isPreferredDeclaration(const NamedDecl &ND, index::SymbolRoleSet Roles) { match(decl(isExpansionInMainFile()), ND, ND.getASTContext()).empty(); } +SymbolOccurrenceKind toOccurrenceKind(index::SymbolRoleSet Roles) { + return static_cast( + static_cast(AllOccurrenceKinds) & Roles); +} + } // namespace SymbolCollector::SymbolCollector(Options Opts) : Opts(std::move(Opts)) {} @@ -308,11 +320,16 @@ bool SymbolCollector::handleDeclOccurence( // Mark D as referenced if this is a reference coming from the main file. // D may not be an interesting symbol, but it's cheaper to check at the end. auto &SM = ASTCtx->getSourceManager(); + auto SpellingLoc = SM.getSpellingLoc(Loc); if (Opts.CountReferences && (Roles & static_cast(index::SymbolRole::Reference)) && - SM.getFileID(SM.getSpellingLoc(Loc)) == SM.getMainFileID()) + SM.getFileID(SpellingLoc) == SM.getMainFileID()) ReferencedDecls.insert(ND); + if ((static_cast(Opts.OccurrenceFilter) & Roles) && + SM.getFileID(SpellingLoc) == SM.getMainFileID()) + DeclOccurrences[ND].emplace_back(SpellingLoc, Roles); + // Don't continue indexing if this is a mere reference. if (!(Roles & static_cast(index::SymbolRole::Declaration) || Roles & static_cast(index::SymbolRole::Definition))) @@ -436,8 +453,35 @@ void SymbolCollector::finish() { IncRef(SymbolID(USR)); } } + + const auto &SM = ASTCtx->getSourceManager(); + auto* MainFileEntry = SM.getFileEntryForID(SM.getMainFileID()); + + if (auto MainFileURI = toURI(SM, MainFileEntry->getName(), Opts)) { + std::string MainURI = *MainFileURI; + for (const auto &It : DeclOccurrences) { + if (auto ID = getSymbolID(It.first)) { + if (Symbols.find(*ID)) { + for (const auto &LocAndRole : It.second) { + SymbolOccurrence Occurrence; + auto Range = + getTokenRange(LocAndRole.first, SM, ASTCtx->getLangOpts()); + Occurrence.Location.Start = Range.first; + Occurrence.Location.End = Range.second; + Occurrence.Location.FileURI = MainURI; + Occurrence.Kind = toOccurrenceKind(LocAndRole.second); + SymbolOccurrences.insert(*ID, Occurrence); + } + } + } + } + } else { + log("Failed to create URI for main file: {0}", MainFileEntry->getName()); + } + ReferencedDecls.clear(); ReferencedMacros.clear(); + DeclOccurrences.clear(); } const Symbol *SymbolCollector::addDeclaration(const NamedDecl &ND, diff --git a/clang-tools-extra/clangd/index/SymbolCollector.h b/clang-tools-extra/clangd/index/SymbolCollector.h index bc882e4..fb98b68 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.h +++ b/clang-tools-extra/clangd/index/SymbolCollector.h @@ -52,6 +52,10 @@ public: const CanonicalIncludes *Includes = nullptr; // Populate the Symbol.References field. bool CountReferences = false; + /// The symbol occurrence kind that will be collected. + /// If not set (Unknown), SymbolCollector will not collect any symbol + /// occurrences. + SymbolOccurrenceKind OccurrenceFilter = SymbolOccurrenceKind::Unknown; // Every symbol collected will be stamped with this origin. SymbolOrigin Origin = SymbolOrigin::Unknown; /// Collect macros. @@ -86,6 +90,11 @@ public: SymbolSlab takeSymbols() { return std::move(Symbols).build(); } + SymbolOccurrenceSlab takeOccurrences() { + SymbolOccurrences.freeze(); + return std::move(SymbolOccurrences); + } + void finish() override; private: @@ -94,14 +103,21 @@ private: // All Symbols collected from the AST. SymbolSlab::Builder Symbols; + // All symbol occurrences collected from the AST. + // Only symbols declared in preamble (from #inclues) and references from the + // main file will be included. + SymbolOccurrenceSlab SymbolOccurrences; ASTContext *ASTCtx; std::shared_ptr PP; std::shared_ptr CompletionAllocator; std::unique_ptr CompletionTUInfo; Options Opts; + using DeclOccurrence = std::pair; // Symbols referenced from the current TU, flushed on finish(). llvm::DenseSet ReferencedDecls; llvm::DenseSet ReferencedMacros; + llvm::DenseMap> + DeclOccurrences; // Maps canonical declaration provided by clang to canonical declaration for // an index symbol, if clangd prefers a different declaration than that // provided by clang. For example, friend declaration might be considered diff --git a/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp b/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp index 666d0bb..279ee0e 100644 --- a/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp +++ b/clang-tools-extra/unittests/clangd/SymbolCollectorTests.cpp @@ -28,9 +28,15 @@ #include #include +namespace clang { +namespace clangd { + +namespace { + using testing::AllOf; using testing::Eq; using testing::Field; +using testing::IsEmpty; using testing::Not; using testing::UnorderedElementsAre; using testing::UnorderedElementsAreArray; @@ -74,11 +80,18 @@ MATCHER_P(Refs, R, "") { return int(arg.References) == R; } MATCHER_P(ForCodeCompletion, IsIndexedForCodeCompletion, "") { return arg.IsIndexedForCodeCompletion == IsIndexedForCodeCompletion; } - -namespace clang { -namespace clangd { - -namespace { +MATCHER(OccurrenceRange, "") { + const SymbolOccurrence &Pos = testing::get<0>(arg); + const Range &Range = testing::get<1>(arg); + return std::tie(Pos.Location.Start.Line, Pos.Location.Start.Column, + Pos.Location.End.Line, Pos.Location.End.Column) == + std::tie(Range.start.line, Range.start.character, Range.end.line, + Range.end.character); +} +testing::Matcher &> +HaveRanges(const std::vector Ranges) { + return testing::UnorderedPointwise(OccurrenceRange(), Ranges); +} class ShouldCollectSymbolTest : public ::testing::Test { public: @@ -237,6 +250,7 @@ public: llvm::MemoryBuffer::getMemBuffer(MainCode)); Invocation.run(); Symbols = Factory->Collector->takeSymbols(); + SymbolOccurrences = Factory->Collector->takeOccurrences(); return true; } @@ -247,6 +261,7 @@ protected: std::string TestFileName; std::string TestFileURI; SymbolSlab Symbols; + SymbolOccurrenceSlab SymbolOccurrences; SymbolCollector::Options CollectorOpts; std::unique_ptr PragmaHandler; }; @@ -413,6 +428,59 @@ o]](); )); } +TEST_F(SymbolCollectorTest, Occurrences) { + Annotations Header(R"( + class $foo[[Foo]] { + public: + $foo[[Foo]]() {} + $foo[[Foo]](int); + }; + class $bar[[Bar]]; + void $func[[func]](); + )"); + Annotations Main(R"( + class $bar[[Bar]] {}; + + void $func[[func]](); + + void fff() { + $foo[[Foo]] foo; + $bar[[Bar]] bar; + $func[[func]](); + int abc = 0; + $foo[[Foo]] foo2 = abc; + } + )"); + Annotations SymbolsOnlyInMainCode(R"( + int a; + void b() {} + static const int c = 0; + class d {}; + )"); + CollectorOpts.OccurrenceFilter = AllOccurrenceKinds; + runSymbolCollector(Header.code(), + (Main.code() + SymbolsOnlyInMainCode.code()).str()); + auto HeaderSymbols = TestTU::withHeaderCode(Header.code()).headerSymbols(); + + EXPECT_THAT(SymbolOccurrences.find(findSymbol(Symbols, "Foo").ID), + HaveRanges(Main.ranges("foo"))); + EXPECT_THAT(SymbolOccurrences.find(findSymbol(Symbols, "Bar").ID), + HaveRanges(Main.ranges("bar"))); + EXPECT_THAT(SymbolOccurrences.find(findSymbol(Symbols, "func").ID), + HaveRanges(Main.ranges("func"))); + + // Retrieve IDs for symbols *only* in the main file, and verify these symbols + // are not collected. + auto MainSymbols = + TestTU::withHeaderCode(SymbolsOnlyInMainCode.code()).headerSymbols(); + EXPECT_THAT(SymbolOccurrences.find(findSymbol(MainSymbols, "a").ID), + IsEmpty()); + EXPECT_THAT(SymbolOccurrences.find(findSymbol(MainSymbols, "b").ID), + IsEmpty()); + EXPECT_THAT(SymbolOccurrences.find(findSymbol(MainSymbols, "c").ID), + IsEmpty()); +} + TEST_F(SymbolCollectorTest, References) { const std::string Header = R"( class W; -- 2.7.4