};
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Inclusion&);
+// Contains information about one file in the build grpah and its direct
+// dependencies. Doesn't own the strings it references (IncludeGraph is
+// self-contained).
+struct IncludeGraphNode {
+ // True if current file is a main file rather than a header.
+ bool IsTU;
+ llvm::StringRef URI;
+ FileDigest Digest;
+ std::vector<llvm::StringRef> DirectIncludes;
+};
+// FileURI and FileInclusions are references to keys of the map containing
+// them.
+using IncludeGraph = llvm::StringMap<IncludeGraphNode>;
+
// Information captured about the inclusion graph in a translation unit.
// This includes detailed information about the direct #includes, and summary
// information about all transitive includes.
Left.end.character == Right.start.character;
}
+FileDigest digest(StringRef Content) {
+ return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID) {
+ bool Invalid = false;
+ StringRef Content = SM.getBufferData(FID, &Invalid);
+ if (Invalid)
+ return None;
+ return digest(Content);
+}
+
} // namespace clangd
} // namespace clang
#include "Protocol.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/SourceLocation.h"
+#include "clang/Basic/SourceManager.h"
#include "clang/Tooling/Core/Replacement.h"
+#include "llvm/Support/SHA1.h"
namespace clang {
class SourceManager;
namespace clangd {
+// We tend to generate digests for source codes in a lot of different places.
+// This represents the type for those digests to prevent us hard coding details
+// of hashing function at every place that needs to store this information.
+using FileDigest = decltype(llvm::SHA1::hash({}));
+FileDigest digest(StringRef Content);
+Optional<FileDigest> digestFile(const SourceManager &SM, FileID FID);
+
// Counts the number of UTF-16 code units needed to represent a string (LSP
// specifies string lengths in UTF-16 code units).
size_t lspLength(StringRef Code);
#include "ClangdUnit.h"
#include "Compiler.h"
#include "Logger.h"
+#include "SourceCode.h"
#include "Threading.h"
#include "Trace.h"
#include "URI.h"
QueueCV.notify_all();
}
-static BackgroundIndex::FileDigest digest(StringRef Content) {
- return SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
-}
-
-static Optional<BackgroundIndex::FileDigest> digestFile(const SourceManager &SM,
- FileID FID) {
- bool Invalid = false;
- StringRef Content = SM.getBufferData(FID, &Invalid);
- if (Invalid)
- return None;
- return digest(Content);
-}
-
// Resolves URI to file paths with cache.
class URIToFileCache {
public:
};
/// Given index results from a TU, only update files in \p FilesToUpdate.
-void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
- RefSlab Refs,
+void BackgroundIndex::update(StringRef MainFile, IndexFileIn Index,
const StringMap<FileDigest> &FilesToUpdate,
BackgroundIndexStorage *IndexStorage) {
// Partition symbols/references into files.
};
StringMap<File> Files;
URIToFileCache URICache(MainFile);
- for (const auto &Sym : Symbols) {
+ for (const auto &Sym : *Index.Symbols) {
if (Sym.CanonicalDeclaration) {
auto DeclPath = URICache.resolve(Sym.CanonicalDeclaration.FileURI);
if (FilesToUpdate.count(DeclPath) != 0)
}
}
DenseMap<const Ref *, SymbolID> RefToIDs;
- for (const auto &SymRefs : Refs) {
+ for (const auto &SymRefs : *Index.Refs) {
for (const auto &R : SymRefs.second) {
auto Path = URICache.resolve(R.Location.FileURI);
if (FilesToUpdate.count(Path) != 0) {
auto Hash = FilesToUpdate.lookup(Path);
// We need to store shards before updating the index, since the latter
// consumes slabs.
- // FIXME: Store Hash in the Shard.
if (IndexStorage) {
IndexFileOut Shard;
Shard.Symbols = SS.get();
Shard.Refs = RS.get();
- Shard.Digest = &Hash;
+
if (auto Error = IndexStorage->storeShard(Path, Shard))
elog("Failed to write background-index shard for file {0}: {1}", Path,
std::move(Error));
// \p FileDigests contains file digests for the current indexed files, and all
// changed files will be added to \p FilesToUpdate.
decltype(SymbolCollector::Options::FileFilter) createFileFilter(
- const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
- llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
+ const llvm::StringMap<FileDigest> &FileDigests,
+ llvm::StringMap<FileDigest> &FilesToUpdate) {
return [&FileDigests, &FilesToUpdate](const SourceManager &SM, FileID FID) {
StringRef Path;
if (const auto *F = SM.getFileEntryForID(FID))
Symbols.size(), Refs.numRefs());
SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
- update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
- IndexStorage);
+ IndexFileIn Index;
+ Index.Symbols = std::move(Symbols);
+ Index.Refs = std::move(Refs);
+
+ update(AbsolutePath, std::move(Index), FilesToUpdate, IndexStorage);
{
// Make sure hash for the main file is always updated even if there is no
// index data in it.
LLVM_NODISCARD bool
blockUntilIdleForTest(llvm::Optional<double> TimeoutSeconds = 10);
- using FileDigest = decltype(llvm::SHA1::hash({}));
-
private:
/// Given index results from a TU, only update files in \p FilesToUpdate.
- void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
+ /// Also stores new index information on IndexStorage.
+ void update(llvm::StringRef MainFile, IndexFileIn Index,
const llvm::StringMap<FileDigest> &FilesToUpdate,
BackgroundIndexStorage *IndexStorage);
return Loc;
}
+IncludeGraphNode readIncludeGraphNode(Reader &Data,
+ llvm::ArrayRef<llvm::StringRef> Strings) {
+ IncludeGraphNode IGN;
+ IGN.IsTU = Data.consume8();
+ IGN.URI = Data.consumeString(Strings);
+ llvm::StringRef Digest = Data.consume(IGN.Digest.size());
+ std::copy(Digest.bytes_begin(), Digest.bytes_end(), IGN.Digest.begin());
+ IGN.DirectIncludes.resize(Data.consumeVar());
+ for (llvm::StringRef &Include : IGN.DirectIncludes)
+ Include = Data.consumeString(Strings);
+ return IGN;
+}
+
+void writeIncludeGraphNode(const IncludeGraphNode &IGN,
+ const StringTableOut &Strings, raw_ostream &OS) {
+ OS.write(IGN.IsTU);
+ writeVar(Strings.index(IGN.URI), OS);
+ llvm::StringRef Hash(reinterpret_cast<const char *>(IGN.Digest.data()),
+ IGN.Digest.size());
+ OS << Hash;
+ writeVar(IGN.DirectIncludes.size(), OS);
+ for (llvm::StringRef Include : IGN.DirectIncludes)
+ writeVar(Strings.index(Include), OS);
+}
+
void writeSymbol(const Symbol &Sym, const StringTableOut &Strings,
raw_ostream &OS) {
OS << Sym.ID.raw(); // TODO: once we start writing xrefs and posting lists,
// A file is a RIFF chunk with type 'CdIx'.
// It contains the sections:
// - meta: version number
-// - srcs: checksum of the source file
+// - srcs: information related to include graph
// - stri: string table
// - symb: symbols
// - refs: references to symbols
IndexFileIn Result;
if (Chunks.count("srcs")) {
- Reader Hash(Chunks.lookup("srcs"));
- Result.Digest.emplace();
- llvm::StringRef Digest = Hash.consume(Result.Digest->size());
- std::copy(Digest.bytes_begin(), Digest.bytes_end(), Result.Digest->begin());
+ Reader SrcsReader(Chunks.lookup("srcs"));
+ Result.Sources.emplace();
+ while (!SrcsReader.eof()) {
+ auto IGN = readIncludeGraphNode(SrcsReader, Strings->Strings);
+ auto Entry = Result.Sources->try_emplace(IGN.URI).first;
+ Entry->getValue() = std::move(IGN);
+ // We change all the strings inside the structure to point at the keys in
+ // the map, since it is the only copy of the string that's going to live.
+ Entry->getValue().URI = Entry->getKey();
+ for (auto &Include : Entry->getValue().DirectIncludes)
+ Include = Result.Sources->try_emplace(Include).first->getKey();
+ }
+ if (SrcsReader.err())
+ return makeError("malformed or truncated include uri");
}
if (Chunks.count("symb")) {
return std::move(Result);
}
+template <class Callback>
+void visitStrings(IncludeGraphNode &IGN, const Callback &CB) {
+ CB(IGN.URI);
+ for (llvm::StringRef &Include : IGN.DirectIncludes)
+ CB(Include);
+}
+
void writeRIFF(const IndexFileOut &Data, raw_ostream &OS) {
assert(Data.Symbols && "An index file without symbols makes no sense!");
riff::File RIFF;
}
RIFF.Chunks.push_back({riff::fourCC("meta"), Meta});
- if (Data.Digest) {
- llvm::StringRef Hash(reinterpret_cast<const char *>(Data.Digest->data()),
- Data.Digest->size());
- RIFF.Chunks.push_back({riff::fourCC("srcs"), Hash});
- }
-
StringTableOut Strings;
std::vector<Symbol> Symbols;
for (const auto &Sym : *Data.Symbols) {
Symbols.emplace_back(Sym);
visitStrings(Symbols.back(), [&](StringRef &S) { Strings.intern(S); });
}
+ std::vector<IncludeGraphNode> Sources;
+ if (Data.Sources)
+ for (const auto &Source : *Data.Sources) {
+ Sources.push_back(Source.getValue());
+ visitStrings(Sources.back(), [&](StringRef &S) { Strings.intern(S); });
+ }
+
std::vector<std::pair<SymbolID, std::vector<Ref>>> Refs;
if (Data.Refs) {
for (const auto &Sym : *Data.Refs) {
RIFF.Chunks.push_back({riff::fourCC("refs"), RefsSection});
}
+ std::string SrcsSection;
+ {
+ {
+ raw_string_ostream SrcsOS(SrcsSection);
+ for (const auto &SF : Sources)
+ writeIncludeGraphNode(SF, Strings, SrcsOS);
+ }
+ RIFF.Chunks.push_back({riff::fourCC("srcs"), SrcsSection});
+ }
+
OS << RIFF;
}
#ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
#define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_RIFF_H
+#include "Headers.h"
#include "Index.h"
#include "llvm/Support/Error.h"
// Holds the contents of an index file that was read.
struct IndexFileIn {
- using FileDigest = std::array<uint8_t, 20>;
llvm::Optional<SymbolSlab> Symbols;
llvm::Optional<RefSlab> Refs;
- // Digest of the source file that generated the contents.
- llvm::Optional<FileDigest> Digest;
+ // Keys are URIs of the source files.
+ llvm::Optional<IncludeGraph> Sources;
};
// Parse an index file. The input must be a RIFF or YAML file.
llvm::Expected<IndexFileIn> readIndexFile(llvm::StringRef);
struct IndexFileOut {
const SymbolSlab *Symbols = nullptr;
const RefSlab *Refs = nullptr;
- // Digest of the source file that generated the contents.
- const IndexFileIn::FileDigest *Digest = nullptr;
+ // Keys are URIs of the source files.
+ const IncludeGraph *Sources = nullptr;
// TODO: Support serializing Dex posting lists.
IndexFileFormat Format = IndexFileFormat::RIFF;
)cpp";
std::string A_CC = "#include \"A.h\"\nvoid g() { (void)common; }";
FS.Files[testPath("root/A.cc")] = A_CC;
- auto Digest = llvm::SHA1::hash(
- {reinterpret_cast<const uint8_t *>(A_CC.data()), A_CC.size()});
llvm::StringMap<std::string> Storage;
size_t CacheHits = 0;
EXPECT_NE(ShardSource, nullptr);
EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre());
EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")}));
- EXPECT_EQ(*ShardSource->Digest, Digest);
}
} // namespace clangd
UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
}
-TEST(SerializationTest, HashTest) {
+TEST(SerializationTest, SrcsTest) {
auto In = readIndexFile(YAML);
EXPECT_TRUE(bool(In)) << In.takeError();
- std::string TestContent("TESTCONTENT");
- auto Digest =
+ std::string TestContent("TestContent");
+ IncludeGraphNode IGN;
+ IGN.Digest =
llvm::SHA1::hash({reinterpret_cast<const uint8_t *>(TestContent.data()),
TestContent.size()});
+ IGN.DirectIncludes = {"inc1", "inc2"};
+ IGN.URI = "URI";
+ IGN.IsTU = true;
+ IncludeGraph Sources;
+ Sources[IGN.URI] = IGN;
// Write to binary format, and parse again.
IndexFileOut Out(*In);
Out.Format = IndexFileFormat::RIFF;
- Out.Digest = &Digest;
- std::string Serialized = to_string(Out);
-
- auto In2 = readIndexFile(Serialized);
- ASSERT_TRUE(bool(In2)) << In.takeError();
- ASSERT_EQ(In2->Digest, Digest);
- ASSERT_TRUE(In2->Symbols);
- ASSERT_TRUE(In2->Refs);
-
- // Assert the YAML serializations match, for nice comparisons and diffs.
- EXPECT_THAT(YAMLFromSymbols(*In2->Symbols),
- UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
- EXPECT_THAT(YAMLFromRefs(*In2->Refs),
- UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+ Out.Sources = &Sources;
+ {
+ std::string Serialized = to_string(Out);
+
+ auto In = readIndexFile(Serialized);
+ ASSERT_TRUE(bool(In)) << In.takeError();
+ ASSERT_TRUE(In->Symbols);
+ ASSERT_TRUE(In->Refs);
+ ASSERT_TRUE(In->Sources);
+ ASSERT_TRUE(In->Sources->count(IGN.URI));
+ // Assert the YAML serializations match, for nice comparisons and diffs.
+ EXPECT_THAT(YAMLFromSymbols(*In->Symbols),
+ UnorderedElementsAreArray(YAMLFromSymbols(*In->Symbols)));
+ EXPECT_THAT(YAMLFromRefs(*In->Refs),
+ UnorderedElementsAreArray(YAMLFromRefs(*In->Refs)));
+ auto IGNDeserialized = In->Sources->lookup(IGN.URI);
+ EXPECT_EQ(IGNDeserialized.Digest, IGN.Digest);
+ EXPECT_EQ(IGNDeserialized.DirectIncludes, IGN.DirectIncludes);
+ EXPECT_EQ(IGNDeserialized.URI, IGN.URI);
+ EXPECT_EQ(IGNDeserialized.IsTU, IGN.IsTU);
+ }
}
} // namespace