From: Sam McCall Date: Thu, 4 Oct 2018 14:09:55 +0000 (+0000) Subject: [clangd] clangd-indexer gathers refs and stores them in index files. X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=cc21779c3c0f7affde7a64a97ae33faf1b0901e5;p=platform%2Fupstream%2Fllvm.git [clangd] clangd-indexer gathers refs and stores them in index files. Reviewers: ioeric Subscribers: ilya-biryukov, MaskRay, jkorous, arphaman, kadircet, cfe-commits Differential Revision: https://reviews.llvm.org/D52531 llvm-svn: 343778 --- diff --git a/clang-tools-extra/clangd/index/IndexAction.cpp b/clang-tools-extra/clangd/index/IndexAction.cpp index 9bc7e06..68f5833 100644 --- a/clang-tools-extra/clangd/index/IndexAction.cpp +++ b/clang-tools-extra/clangd/index/IndexAction.cpp @@ -13,10 +13,11 @@ public: IndexAction(std::shared_ptr C, std::unique_ptr Includes, const index::IndexingOptions &Opts, - std::function &SymbolsCallback) + std::function SymbolsCallback, + std::function RefsCallback) : WrapperFrontendAction(index::createIndexingAction(C, Opts, nullptr)), - SymbolsCallback(SymbolsCallback), Collector(C), - Includes(std::move(Includes)), + SymbolsCallback(SymbolsCallback), RefsCallback(RefsCallback), + Collector(C), Includes(std::move(Includes)), PragmaHandler(collectIWYUHeaderMaps(this->Includes.get())) {} std::unique_ptr CreateASTConsumer(CompilerInstance &CI, @@ -41,10 +42,13 @@ public: return; } SymbolsCallback(Collector->takeSymbols()); + if (RefsCallback != nullptr) + RefsCallback(Collector->takeRefs()); } private: std::function SymbolsCallback; + std::function RefsCallback; std::shared_ptr Collector; std::unique_ptr Includes; std::unique_ptr PragmaHandler; @@ -54,20 +58,23 @@ private: std::unique_ptr createStaticIndexingAction(SymbolCollector::Options Opts, - std::function SymbolsCallback) { + std::function SymbolsCallback, + std::function RefsCallback) { index::IndexingOptions IndexOpts; IndexOpts.SystemSymbolFilter = index::IndexingOptions::SystemSymbolFilterKind::All; Opts.CollectIncludePath = true; Opts.CountReferences = true; Opts.Origin = SymbolOrigin::Static; + if (RefsCallback != nullptr) + Opts.RefFilter = RefKind::All; auto Includes = llvm::make_unique(); addSystemHeadersMapping(Includes.get()); Opts.Includes = Includes.get(); return llvm::make_unique( std::make_shared(std::move(Opts)), std::move(Includes), - IndexOpts, SymbolsCallback); -} + IndexOpts, SymbolsCallback, RefsCallback); +}; } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/IndexAction.h b/clang-tools-extra/clangd/index/IndexAction.h index b51bfd2..2330afc 100644 --- a/clang-tools-extra/clangd/index/IndexAction.h +++ b/clang-tools-extra/clangd/index/IndexAction.h @@ -21,10 +21,13 @@ namespace clangd { // Only a subset of SymbolCollector::Options are respected: // - include paths are always collected, and canonicalized appropriately // - references are always counted +// - main-file refs are collected (if RefsCallback is non-null) // - the symbol origin is always Static +// FIXME: refs from headers should also be collected. std::unique_ptr createStaticIndexingAction(SymbolCollector::Options Opts, - std::function SymbolsCallback); + std::function SymbolsCallback, + std::function RefsCallback); } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/index/Serialization.cpp b/clang-tools-extra/clangd/index/Serialization.cpp index 2ebd204..8784d73 100644 --- a/clang-tools-extra/clangd/index/Serialization.cpp +++ b/clang-tools-extra/clangd/index/Serialization.cpp @@ -298,17 +298,47 @@ Symbol readSymbol(Reader &Data, ArrayRef Strings) { return Sym; } +// REFS ENCODING +// A refs section has data grouped by Symbol. Each symbol has: +// - SymbolID: 20 bytes +// - NumRefs: varint +// - Ref[NumRefs] +// Fields of Ref are encoded in turn, see implementation. + +void writeRefs(const SymbolID &ID, ArrayRef Refs, + const StringTableOut &Strings, raw_ostream &OS) { + OS << ID.raw(); + writeVar(Refs.size(), OS); + for (const auto &Ref : Refs) { + OS.write(static_cast(Ref.Kind)); + writeLocation(Ref.Location, Strings, OS); + } +} + +std::pair> readRefs(Reader &Data, + ArrayRef Strings) { + std::pair> Result; + Result.first = Data.consumeID(); + Result.second.resize(Data.consumeVar()); + for (auto &Ref : Result.second) { + Ref.Kind = static_cast(Data.consume8()); + Ref.Location = readLocation(Data, Strings); + } + return Result; +} + // FILE ENCODING // A file is a RIFF chunk with type 'CdIx'. // It contains the sections: // - meta: version number // - stri: string table // - symb: symbols +// - refs: references to symbols // The current versioning scheme is simple - non-current versions are rejected. // If you make a breaking change, bump this version number to invalidate stored // data. Later we may want to support some backward compatibility. -constexpr static uint32_t Version = 4; +constexpr static uint32_t Version = 5; Expected readRIFF(StringRef Data) { auto RIFF = riff::readFile(Data); @@ -342,6 +372,18 @@ Expected readRIFF(StringRef Data) { return makeError("malformed or truncated symbol"); Result.Symbols = std::move(Symbols).build(); } + if (Chunks.count("refs")) { + Reader RefsReader(Chunks.lookup("refs")); + RefSlab::Builder Refs; + while (!RefsReader.eof()) { + auto RefsBundle = readRefs(RefsReader, Strings->Strings); + for (const auto &Ref : RefsBundle.second) // FIXME: bulk insert? + Refs.insert(RefsBundle.first, Ref); + } + if (RefsReader.err()) + return makeError("malformed or truncated refs"); + Result.Refs = std::move(Refs).build(); + } return std::move(Result); } @@ -363,6 +405,14 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { Symbols.emplace_back(Sym); visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); }); } + std::vector>> Refs; + if (Data.Refs) { + for (const auto &Sym : *Data.Refs) { + Refs.emplace_back(Sym); + for (auto &Ref : Refs.back().second) + Strings.intern(Ref.Location.FileURI); + } + } std::string StringSection; { @@ -379,6 +429,16 @@ void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) { } RIFF.Chunks.push_back({riff::fourCC("symb"), SymbolSection}); + std::string RefsSection; + if (Data.Refs) { + { + raw_string_ostream RefsOS(RefsSection); + for (const auto &Sym : Refs) + writeRefs(Sym.first, Sym.second, Strings, RefsOS); + } + RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection}); + } + OS << RIFF; } @@ -428,6 +488,8 @@ std::unique_ptr loadIndex(llvm::StringRef SymbolFilename, if (auto I = readIndexFile(Buffer->get()->getBuffer())) { if (I->Symbols) Symbols = std::move(*I->Symbols); + if (I->Refs) + Refs = std::move(*I->Refs); } else { llvm::errs() << "Bad Index: " << llvm::toString(I.takeError()) << "\n"; return nullptr; diff --git a/clang-tools-extra/clangd/index/Serialization.h b/clang-tools-extra/clangd/index/Serialization.h index 3cb86db..65b53ee 100644 --- a/clang-tools-extra/clangd/index/Serialization.h +++ b/clang-tools-extra/clangd/index/Serialization.h @@ -38,26 +38,29 @@ enum class IndexFileFormat { // Holds the contents of an index file that was read. struct IndexFileIn { llvm::Optional Symbols; + llvm::Optional Refs; }; -// Parse an index file. The input must be a RIFF container chunk. +// Parse an index file. The input must be a RIFF or YAML file. llvm::Expected readIndexFile(llvm::StringRef); // Specifies the contents of an index file to be written. struct IndexFileOut { - const SymbolSlab *Symbols; - // TODO: Support serializing symbol occurrences. + const SymbolSlab *Symbols = nullptr; + const RefSlab *Refs = nullptr; // TODO: Support serializing Dex posting lists. IndexFileFormat Format = IndexFileFormat::RIFF; IndexFileOut() = default; IndexFileOut(const IndexFileIn &I) - : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr) {} + : Symbols(I.Symbols ? I.Symbols.getPointer() : nullptr), + Refs(I.Refs ? I.Refs.getPointer() : nullptr) {} }; // Serializes an index file. llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const IndexFileOut &O); // Convert a single symbol to YAML, a nice debug representation. std::string toYAML(const Symbol &); +std::string toYAML(const std::pair> &); // Build an in-memory static index from an index file. // The size should be relatively small, so data can be managed in memory. diff --git a/clang-tools-extra/clangd/index/YAMLSerialization.cpp b/clang-tools-extra/clangd/index/YAMLSerialization.cpp index 73df618..a426cce 100644 --- a/clang-tools-extra/clangd/index/YAMLSerialization.cpp +++ b/clang-tools-extra/clangd/index/YAMLSerialization.cpp @@ -6,6 +6,12 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// +// +// A YAML index file is a sequence of tagged entries. +// Each entry either encodes a Symbol or the list of references to a symbol +// (a "ref bundle"). +// +//===----------------------------------------------------------------------===// #include "Index.h" #include "Serialization.h" @@ -20,10 +26,22 @@ #include LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Symbol::IncludeHeaderWithReferences) +LLVM_YAML_IS_SEQUENCE_VECTOR(clang::clangd::Ref) +namespace { +using RefBundle = + std::pair>; +// This is a pale imitation of std::variant +struct VariantEntry { + llvm::Optional Symbol; + llvm::Optional Refs; +}; +} // namespace namespace llvm { namespace yaml { +using clang::clangd::Ref; +using clang::clangd::RefKind; using clang::clangd::Symbol; using clang::clangd::SymbolID; using clang::clangd::SymbolLocation; @@ -179,6 +197,46 @@ template <> struct ScalarEnumerationTraits { } }; +template <> struct MappingTraits { + static void mapping(IO &IO, RefBundle &Refs) { + MappingNormalization NSymbolID(IO, + Refs.first); + IO.mapRequired("ID", NSymbolID->HexString); + IO.mapRequired("References", Refs.second); + } +}; + +struct NormalizedRefKind { + NormalizedRefKind(IO &) {} + NormalizedRefKind(IO &, RefKind O) { Kind = static_cast(O); } + + RefKind denormalize(IO &) { return static_cast(Kind); } + + uint8_t Kind = 0; +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, Ref &R) { + MappingNormalization NKind(IO, R.Kind); + IO.mapRequired("Kind", NKind->Kind); + IO.mapRequired("Location", R.Location); + } +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, VariantEntry &Variant) { + if (IO.mapTag("!Symbol", Variant.Symbol.hasValue())) { + if (!IO.outputting()) + Variant.Symbol.emplace(); + MappingTraits::mapping(IO, *Variant.Symbol); + } else if (IO.mapTag("!Refs", Variant.Refs.hasValue())) { + if (!IO.outputting()) + Variant.Refs.emplace(); + MappingTraits::mapping(IO, *Variant.Refs); + } + } +}; + } // namespace yaml } // namespace llvm @@ -187,23 +245,38 @@ namespace clangd { void writeYAML(const IndexFileOut &O, raw_ostream &OS) { llvm::yaml::Output Yout(OS); - for (Symbol Sym : *O.Symbols) // copy: Yout<< requires mutability. - Yout << Sym; + for (const auto &Sym : *O.Symbols) { + VariantEntry Entry; + Entry.Symbol = Sym; + Yout << Entry; + } + if (O.Refs) + for (auto &Sym : *O.Refs) { + VariantEntry Entry; + Entry.Refs = Sym; + Yout << Entry; + } } Expected readYAML(StringRef Data) { SymbolSlab::Builder Symbols; + RefSlab::Builder Refs; llvm::yaml::Input Yin(Data); do { - Symbol S; - Yin >> S; + VariantEntry Variant; + Yin >> Variant; if (Yin.error()) return llvm::errorCodeToError(Yin.error()); - Symbols.insert(S); + if (Variant.Symbol) + Symbols.insert(*Variant.Symbol); + if (Variant.Refs) + for (const auto &Ref : Variant.Refs->second) + Refs.insert(Variant.Refs->first, Ref); } while (Yin.nextDocument()); IndexFileIn Result; Result.Symbols.emplace(std::move(Symbols).build()); + Result.Refs.emplace(std::move(Refs).build()); return std::move(Result); } @@ -218,5 +291,16 @@ std::string toYAML(const Symbol &S) { return Buf; } +std::string toYAML(const std::pair> &Data) { + RefBundle Refs = {Data.first, Data.second}; + std::string Buf; + { + llvm::raw_string_ostream OS(Buf); + llvm::yaml::Output Yout(OS); + Yout << Refs; + } + return Buf; +} + } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/indexer/IndexerMain.cpp b/clang-tools-extra/clangd/indexer/IndexerMain.cpp index 8db7559..10490c0 100644 --- a/clang-tools-extra/clangd/indexer/IndexerMain.cpp +++ b/clang-tools-extra/clangd/indexer/IndexerMain.cpp @@ -67,18 +67,30 @@ public: else Symbols.insert(Sym); } + }, + [&](RefSlab S) { + std::lock_guard Lock(SymbolsMu); + for (const auto &Sym : S) { + // No need to merge as currently all Refs are from main file. + for (const auto &Ref : Sym.second) + Refs.insert(Sym.first, Ref); + } }) .release(); } // Awkward: we write the result in the destructor, because the executor // takes ownership so it's the easiest way to get our data back out. - ~IndexActionFactory() { Result.Symbols = std::move(Symbols).build(); } + ~IndexActionFactory() { + Result.Symbols = std::move(Symbols).build(); + Result.Refs = std::move(Refs).build(); + } private: IndexFileIn &Result; std::mutex SymbolsMu; SymbolSlab::Builder Symbols; + RefSlab::Builder Refs; }; } // namespace diff --git a/clang-tools-extra/unittests/clangd/SerializationTests.cpp b/clang-tools-extra/unittests/clangd/SerializationTests.cpp index 751883b..720c7df 100644 --- a/clang-tools-extra/unittests/clangd/SerializationTests.cpp +++ b/clang-tools-extra/unittests/clangd/SerializationTests.cpp @@ -13,6 +13,9 @@ #include "gmock/gmock.h" #include "gtest/gtest.h" +using testing::_; +using testing::AllOf; +using testing::Pair; using testing::UnorderedElementsAre; using testing::UnorderedElementsAreArray; namespace clang { @@ -21,6 +24,7 @@ namespace { const char *YAML = R"( --- +!Symbol ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 Name: 'Foo1' Scope: 'clang::' @@ -46,6 +50,7 @@ IncludeHeaders: References: 3 ... --- +!Symbol ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF858 Name: 'Foo2' Scope: 'clang::' @@ -64,6 +69,18 @@ Flags: 2 Signature: '-sig' CompletionSnippetSuffix: '-snippet' ... +!Refs +ID: 057557CEBF6E6B2DD437FBF60CC58F352D1DF856 +References: + - Kind: 4 + Location: + FileURI: file:///path/foo.cc + Start: + Line: 5 + Column: 3 + End: + Line: 5 + Column: 8 )"; MATCHER_P(ID, I, "") { return arg.ID == cantFail(SymbolID::fromStr(I)); } @@ -107,6 +124,16 @@ TEST(SerializationTest, YAMLConversions) { EXPECT_EQ(Sym2.CanonicalDeclaration.FileURI, "file:///path/bar.h"); EXPECT_FALSE(Sym2.Flags & Symbol::IndexedForCodeCompletion); EXPECT_TRUE(Sym2.Flags & Symbol::Deprecated); + + ASSERT_TRUE(bool(ParsedYAML->Refs)); + EXPECT_THAT(*ParsedYAML->Refs, + UnorderedElementsAre( + Pair(cantFail(SymbolID::fromStr( + "057557CEBF6E6B2DD437FBF60CC58F352D1DF856")), + testing::SizeIs(1)))); + auto Ref1 = ParsedYAML->Refs->begin()->second.front(); + EXPECT_EQ(Ref1.Kind, RefKind::Reference); + EXPECT_EQ(Ref1.Location.FileURI, "file:///path/foo.cc"); } std::vector YAMLFromSymbols(const SymbolSlab &Slab) { @@ -115,24 +142,37 @@ std::vector YAMLFromSymbols(const SymbolSlab &Slab) { Result.push_back(toYAML(Sym)); return Result; } +std::vector YAMLFromRefs(const RefSlab &Slab) { + std::vector Result; + for (const auto &Sym : Slab) + Result.push_back(toYAML(Sym)); + return Result; +} TEST(SerializationTest, BinaryConversions) { auto In = readIndexFile(YAML); EXPECT_TRUE(bool(In)) << In.takeError(); // Write to binary format, and parse again. - IndexFileOut Out; - Out.Symbols = In->Symbols.getPointer(); + IndexFileOut Out(*In); Out.Format = IndexFileFormat::RIFF; std::string Serialized = llvm::to_string(Out); + { + std::error_code EC; + llvm::raw_fd_ostream F("/tmp/foo", EC); + F << Serialized; + } auto In2 = readIndexFile(Serialized); ASSERT_TRUE(bool(In2)) << In.takeError(); - ASSERT_TRUE(In->Symbols); + ASSERT_TRUE(In2->Symbols); + ASSERT_TRUE(In2->Refs); // Assert the YAML serializations match, for nice comparisons and diffs. EXPECT_THAT(YAMLFromSymbols(*In2->Symbols), UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols))); + EXPECT_THAT(YAMLFromRefs(*In2->Refs), + UnorderedElementsAreArray(YAMLFromRefs(*In->Refs))); } } // namespace