Introduce shard storage to auto-index.
authorKadir Cetinkaya <kadircet@google.com>
Fri, 16 Nov 2018 09:03:56 +0000 (09:03 +0000)
committerKadir Cetinkaya <kadircet@google.com>
Fri, 16 Nov 2018 09:03:56 +0000 (09:03 +0000)
Reviewers: sammccall, ioeric

Reviewed By: sammccall

Subscribers: llvm-commits, mgorny, Eugene.Zelenko, ilya-biryukov, jkorous, arphaman, cfe-commits

Differential Revision: https://reviews.llvm.org/D54269

llvm-svn: 347038

clang-tools-extra/clangd/CMakeLists.txt
clang-tools-extra/clangd/index/Background.cpp
clang-tools-extra/clangd/index/Background.h
clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp [new file with mode: 0644]
clang-tools-extra/unittests/clangd/BackgroundIndexTests.cpp

index b3f4fc6..bf59eb9 100644 (file)
@@ -38,6 +38,7 @@ add_clang_library(clangDaemon
   XRefs.cpp
 
   index/Background.cpp
+  index/BackgroundIndexStorage.cpp
   index/CanonicalIncludes.cpp
   index/FileIndex.cpp
   index/Index.cpp
index 9be2257..da96f8c 100644 (file)
@@ -24,6 +24,9 @@
 #include "llvm/ADT/StringMap.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Support/SHA1.h"
+
+#include <memory>
+#include <queue>
 #include <random>
 #include <string>
 
@@ -31,21 +34,22 @@ using namespace llvm;
 namespace clang {
 namespace clangd {
 
-BackgroundIndex::BackgroundIndex(Context BackgroundContext,
-                                 StringRef ResourceDir,
-                                 const FileSystemProvider &FSProvider,
-                                 ArrayRef<std::string> URISchemes,
-                                 size_t ThreadPoolSize)
+BackgroundIndex::BackgroundIndex(
+    Context BackgroundContext, StringRef ResourceDir,
+    const FileSystemProvider &FSProvider, ArrayRef<std::string> URISchemes,
+    BackgroundIndexStorage::Factory IndexStorageFactory, size_t ThreadPoolSize)
     : SwapIndex(make_unique<MemIndex>()), ResourceDir(ResourceDir),
       FSProvider(FSProvider), BackgroundContext(std::move(BackgroundContext)),
-      URISchemes(URISchemes) {
+      URISchemes(URISchemes),
+      IndexStorageFactory(std::move(IndexStorageFactory)) {
   assert(ThreadPoolSize > 0 && "Thread pool size can't be zero.");
+  assert(IndexStorageFactory && "Storage factory can not be null!");
   while (ThreadPoolSize--) {
     ThreadPool.emplace_back([this] { run(); });
     // Set priority to low, since background indexing is a long running task we
     // do not want to eat up cpu when there are any other high priority threads.
     // FIXME: In the future we might want a more general way of handling this to
-    // support tasks with various priorities.
+    // support tasks with various priorities.
     setThreadPriority(ThreadPool.back(), ThreadPriority::Low);
   }
 }
@@ -97,9 +101,10 @@ void BackgroundIndex::blockUntilIdleForTest() {
 
 void BackgroundIndex::enqueue(StringRef Directory,
                               tooling::CompileCommand Cmd) {
+  BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory);
   {
     std::lock_guard<std::mutex> Lock(QueueMu);
-    enqueueLocked(std::move(Cmd));
+    enqueueLocked(std::move(Cmd), IndexStorage);
   }
   QueueCV.notify_all();
 }
@@ -110,6 +115,7 @@ void BackgroundIndex::enqueueAll(StringRef Directory,
   // FIXME: this function may be slow. Perhaps enqueue a task to re-read the CDB
   // from disk and enqueue the commands asynchronously?
   auto Cmds = CDB.getAllCompileCommands();
+  BackgroundIndexStorage *IndexStorage = IndexStorageFactory(Directory);
   SPAN_ATTACH(Tracer, "commands", int64_t(Cmds.size()));
   std::mt19937 Generator(std::random_device{}());
   std::shuffle(Cmds.begin(), Cmds.end(), Generator);
@@ -117,17 +123,18 @@ void BackgroundIndex::enqueueAll(StringRef Directory,
   {
     std::lock_guard<std::mutex> Lock(QueueMu);
     for (auto &Cmd : Cmds)
-      enqueueLocked(std::move(Cmd));
+      enqueueLocked(std::move(Cmd), IndexStorage);
   }
   QueueCV.notify_all();
 }
 
-void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd) {
+void BackgroundIndex::enqueueLocked(tooling::CompileCommand Cmd,
+                                    BackgroundIndexStorage *IndexStorage) {
   Queue.push_back(Bind(
-      [this](tooling::CompileCommand Cmd) {
+      [this, IndexStorage](tooling::CompileCommand Cmd) {
         std::string Filename = Cmd.Filename;
         Cmd.CommandLine.push_back("-resource-dir=" + ResourceDir);
-        if (auto Error = index(std::move(Cmd)))
+        if (auto Error = index(std::move(Cmd), IndexStorage))
           log("Indexing {0} failed: {1}", Filename, std::move(Error));
       },
       std::move(Cmd)));
@@ -179,7 +186,8 @@ private:
 /// Given index results from a TU, only update files in \p FilesToUpdate.
 void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
                              RefSlab Refs,
-                             const StringMap<FileDigest> &FilesToUpdate) {
+                             const StringMap<FileDigest> &FilesToUpdate,
+                             BackgroundIndexStorage *IndexStorage) {
   // Partition symbols/references into files.
   struct File {
     DenseSet<const Symbol *> Symbols;
@@ -227,20 +235,35 @@ void BackgroundIndex::update(StringRef MainFile, SymbolSlab Symbols,
     for (const auto *R : F.second.Refs)
       Refs.insert(RefToIDs[R], *R);
 
+    auto SS = llvm::make_unique<SymbolSlab>(std::move(Syms).build());
+    auto RS = llvm::make_unique<RefSlab>(std::move(Refs).build());
+
+    auto Hash = FilesToUpdate.lookup(Path);
+    // We need to store shards before updating the index, since the latter
+    // consumes slabs.
+    // FIXME: Store Hash in the Shard.
+    if (IndexStorage) {
+      IndexFileOut Shard;
+      Shard.Symbols = SS.get();
+      Shard.Refs = RS.get();
+      if (auto Error = IndexStorage->storeShard(Path, Shard))
+        elog("Failed to write background-index shard for file {0}: {1}", Path,
+             std::move(Error));
+    }
+
     std::lock_guard<std::mutex> Lock(DigestsMu);
     // This can override a newer version that is added in another thread,
     // if this thread sees the older version but finishes later. This should be
     // rare in practice.
-    IndexedFileDigests[Path] = FilesToUpdate.lookup(Path);
-    IndexedSymbols.update(Path,
-                          make_unique<SymbolSlab>(std::move(Syms).build()),
-                          make_unique<RefSlab>(std::move(Refs).build()));
+    IndexedFileDigests[Path] = Hash;
+    IndexedSymbols.update(Path, std::move(SS), std::move(RS));
   }
 }
 
 // Creates a filter to not collect index results from files with unchanged
 // digests.
-// \p FileDigests contains file digests for the current indexed files, and all changed files will be added to \p FilesToUpdate.
+// \p FileDigests contains file digests for the current indexed files, and all
+// changed files will be added to \p FilesToUpdate.
 decltype(SymbolCollector::Options::FileFilter) createFileFilter(
     const llvm::StringMap<BackgroundIndex::FileDigest> &FileDigests,
     llvm::StringMap<BackgroundIndex::FileDigest> &FilesToUpdate) {
@@ -269,7 +292,8 @@ decltype(SymbolCollector::Options::FileFilter) createFileFilter(
   };
 }
 
-Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
+Error BackgroundIndex::index(tooling::CompileCommand Cmd,
+                             BackgroundIndexStorage *IndexStorage) {
   trace::Span Tracer("BackgroundIndex");
   SPAN_ATTACH(Tracer, "file", Cmd.Filename);
   SmallString<128> AbsolutePath;
@@ -342,7 +366,8 @@ Error BackgroundIndex::index(tooling::CompileCommand Cmd) {
       Symbols.size(), Refs.numRefs());
   SPAN_ATTACH(Tracer, "symbols", int(Symbols.size()));
   SPAN_ATTACH(Tracer, "refs", int(Refs.numRefs()));
-  update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate);
+  update(AbsolutePath, std::move(Symbols), std::move(Refs), FilesToUpdate,
+         IndexStorage);
   {
     // Make sure hash for the main file is always updated even if there is no
     // index data in it.
index 9fc9f54..714303b 100644 (file)
@@ -14,6 +14,7 @@
 #include "FSProvider.h"
 #include "index/FileIndex.h"
 #include "index/Index.h"
+#include "index/Serialization.h"
 #include "clang/Tooling/CompilationDatabase.h"
 #include "llvm/ADT/StringMap.h"
 #include "llvm/Support/SHA1.h"
 namespace clang {
 namespace clangd {
 
+// Handles storage and retrieval of index shards. Both store and load
+// operations can be called from multiple-threads concurrently.
+class BackgroundIndexStorage {
+public:
+  // Shards of the index are stored and retrieved independently, keyed by shard
+  // identifier - in practice this is a source file name
+  virtual llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+                                 IndexFileOut Shard) const = 0;
+
+  // Tries to load shard with given identifier, returns nullptr if shard
+  // couldn't be loaded.
+  virtual std::unique_ptr<IndexFileIn>
+  loadShard(llvm::StringRef ShardIdentifier) const = 0;
+
+  // The factory provides storage for each CDB.
+  // It keeps ownership of the storage instances, and should manage caching
+  // itself. Factory must be threadsafe and never returns nullptr.
+  using Factory =
+      llvm::unique_function<BackgroundIndexStorage *(llvm::StringRef)>;
+
+  // Creates an Index Storage that saves shards into disk. Index storage uses
+  // CDBDirectory + ".clangd-index/" as the folder to save shards.
+  static Factory createDiskBackedStorageFactory();
+};
+
 // Builds an in-memory index by by running the static indexer action over
 // all commands in a compilation database. Indexing happens in the background.
 // FIXME: it should also persist its state on disk for fast start.
@@ -34,8 +60,9 @@ namespace clangd {
 class BackgroundIndex : public SwapIndex {
 public:
   // FIXME: resource-dir injection should be hoisted somewhere common.
-  BackgroundIndex(Context BackgroundContext, StringRef ResourceDir,
+  BackgroundIndex(Context BackgroundContext, llvm::StringRef ResourceDir,
                   const FileSystemProvider &, ArrayRef<std::string> URISchemes,
+                  BackgroundIndexStorage::Factory IndexStorageFactory,
                   size_t ThreadPoolSize = llvm::hardware_concurrency());
   ~BackgroundIndex(); // Blocks while the current task finishes.
 
@@ -59,7 +86,8 @@ public:
 private:
   /// Given index results from a TU, only update files in \p FilesToUpdate.
   void update(llvm::StringRef MainFile, SymbolSlab Symbols, RefSlab Refs,
-              const llvm::StringMap<FileDigest> &FilesToUpdate);
+              const llvm::StringMap<FileDigest> &FilesToUpdate,
+              BackgroundIndexStorage *IndexStorage);
 
   // configuration
   std::string ResourceDir;
@@ -68,16 +96,20 @@ private:
   std::vector<std::string> URISchemes;
 
   // index state
-  llvm::Error index(tooling::CompileCommand);
+  llvm::Error index(tooling::CompileCommand,
+                    BackgroundIndexStorage *IndexStorage);
 
   FileSymbols IndexedSymbols;
   llvm::StringMap<FileDigest> IndexedFileDigests; // Key is absolute file path.
   std::mutex DigestsMu;
 
+  BackgroundIndexStorage::Factory IndexStorageFactory;
+
   // queue management
   using Task = std::function<void()>;
   void run(); // Main loop executed by Thread. Runs tasks from Queue.
-  void enqueueLocked(tooling::CompileCommand Cmd);
+  void enqueueLocked(tooling::CompileCommand Cmd,
+                     BackgroundIndexStorage *IndexStorage);
   std::mutex QueueMu;
   unsigned NumActiveTasks = 0; // Only idle when queue is empty *and* no tasks.
   std::condition_variable QueueCV;
diff --git a/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp b/clang-tools-extra/clangd/index/BackgroundIndexStorage.cpp
new file mode 100644 (file)
index 0000000..ee25570
--- /dev/null
@@ -0,0 +1,112 @@
+//== BackgroundIndexStorage.cpp - Provide caching support to BackgroundIndex ==/
+//
+//                     The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "Logger.h"
+#include "index/Background.h"
+#include "llvm/Support/FileSystem.h"
+#include "llvm/Support/MemoryBuffer.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/SHA1.h"
+
+namespace clang {
+namespace clangd {
+namespace {
+
+using FileDigest = decltype(llvm::SHA1::hash({}));
+
+static FileDigest digest(StringRef Content) {
+  return llvm::SHA1::hash({(const uint8_t *)Content.data(), Content.size()});
+}
+
+std::string getShardPathFromFilePath(llvm::StringRef ShardRoot,
+                                     llvm::StringRef FilePath) {
+  llvm::SmallString<128> ShardRootSS(ShardRoot);
+  llvm::sys::path::append(ShardRootSS, llvm::sys::path::filename(FilePath) +
+                                           "." + llvm::toHex(digest(FilePath)) +
+                                           ".idx");
+  return ShardRoot.str();
+}
+
+// Uses disk as a storage for index shards. Creates a directory called
+// ".clangd-index/" under the path provided during construction.
+class DiskBackedIndexStorage : public BackgroundIndexStorage {
+  std::string DiskShardRoot;
+
+public:
+  // Sets DiskShardRoot to (Directory + ".clangd-index/") which is the base
+  // directory for all shard files.
+  DiskBackedIndexStorage(llvm::StringRef Directory) {
+    llvm::SmallString<128> CDBDirectory(Directory);
+    llvm::sys::path::append(CDBDirectory, ".clangd-index/");
+    DiskShardRoot = CDBDirectory.str();
+    std::error_code OK;
+    std::error_code EC = llvm::sys::fs::create_directory(DiskShardRoot);
+    if (EC != OK) {
+      elog("Failed to create directory {0} for index storage: {1}",
+           DiskShardRoot, EC.message());
+    }
+  }
+
+  std::unique_ptr<IndexFileIn>
+  loadShard(llvm::StringRef ShardIdentifier) const override {
+    const std::string ShardPath =
+        getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+    auto Buffer = llvm::MemoryBuffer::getFile(ShardPath);
+    if (!Buffer)
+      return nullptr;
+    if (auto I = readIndexFile(Buffer->get()->getBuffer()))
+      return llvm::make_unique<IndexFileIn>(std::move(*I));
+    else
+      elog("Error while reading shard {0}: {1}", ShardIdentifier,
+           I.takeError());
+    return nullptr;
+  }
+
+  llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+                         IndexFileOut Shard) const override {
+    auto ShardPath = getShardPathFromFilePath(DiskShardRoot, ShardIdentifier);
+    std::error_code EC;
+    llvm::raw_fd_ostream OS(ShardPath, EC);
+    if (EC)
+      return llvm::errorCodeToError(EC);
+    OS << Shard;
+    OS.close();
+    return llvm::errorCodeToError(OS.error());
+  }
+};
+
+// Creates and owns IndexStorages for multiple CDBs.
+class DiskBackedIndexStorageManager {
+public:
+  // Creates or fetches to storage from cache for the specified CDB.
+  BackgroundIndexStorage *operator()(llvm::StringRef CDBDirectory) {
+    std::lock_guard<std::mutex> Lock(*IndexStorageMapMu);
+    auto &IndexStorage = IndexStorageMap[CDBDirectory];
+    if (!IndexStorage)
+      IndexStorage = llvm::make_unique<DiskBackedIndexStorage>(CDBDirectory);
+    return IndexStorage.get();
+  }
+
+  // Creates or fetches to storage from cache for the specified CDB.
+  BackgroundIndexStorage *createStorage(llvm::StringRef CDBDirectory);
+
+private:
+  llvm::StringMap<std::unique_ptr<BackgroundIndexStorage>> IndexStorageMap;
+  std::unique_ptr<std::mutex> IndexStorageMapMu;
+};
+
+} // namespace
+
+BackgroundIndexStorage::Factory
+BackgroundIndexStorage::createDiskBackedStorageFactory() {
+  return DiskBackedIndexStorageManager();
+}
+
+} // namespace clangd
+} // namespace clang
index c6f401d..6746e80 100644 (file)
@@ -1,6 +1,7 @@
 #include "SyncAPI.h"
 #include "TestFS.h"
 #include "index/Background.h"
+#include "llvm/Support/ScopedPrinter.h"
 #include "gmock/gmock.h"
 #include "gtest/gtest.h"
 
@@ -24,6 +25,37 @@ RefsAre(std::vector<testing::Matcher<Ref>> Matchers) {
   return ElementsAre(testing::Pair(_, UnorderedElementsAreArray(Matchers)));
 }
 
+class MemoryShardStorage : public BackgroundIndexStorage {
+  mutable std::mutex StorageMu;
+  llvm::StringMap<std::string> &Storage;
+  size_t &CacheHits;
+
+public:
+  MemoryShardStorage(llvm::StringMap<std::string> &Storage, size_t &CacheHits)
+      : Storage(Storage), CacheHits(CacheHits) {}
+  llvm::Error storeShard(llvm::StringRef ShardIdentifier,
+                         IndexFileOut Shard) const override {
+    std::lock_guard<std::mutex> Lock(StorageMu);
+    Storage[ShardIdentifier] = llvm::to_string(Shard);
+    return llvm::Error::success();
+  }
+  std::unique_ptr<IndexFileIn>
+  loadShard(llvm::StringRef ShardIdentifier) const override {
+    std::lock_guard<std::mutex> Lock(StorageMu);
+    if (Storage.find(ShardIdentifier) == Storage.end()) {
+      return nullptr;
+    }
+    auto IndexFile = readIndexFile(Storage[ShardIdentifier]);
+    if (!IndexFile) {
+      ADD_FAILURE() << "Error while reading " << ShardIdentifier << ':'
+                    << IndexFile.takeError();
+      return nullptr;
+    }
+    CacheHits++;
+    return llvm::make_unique<IndexFileIn>(std::move(*IndexFile));
+  }
+};
+
 TEST(BackgroundIndexTest, IndexTwoFiles) {
   MockFSProvider FS;
   // a.h yields different symbols when included by A.cc vs B.cc.
@@ -45,7 +77,11 @@ TEST(BackgroundIndexTest, IndexTwoFiles) {
       void f_b() {
         (void)common;
       })cpp";
-  BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"});
+  llvm::StringMap<std::string> Storage;
+  size_t CacheHits = 0;
+  MemoryShardStorage MSS(Storage, CacheHits);
+  BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"},
+                      [&](llvm::StringRef) { return &MSS; });
 
   tooling::CompileCommand Cmd;
   Cmd.Filename = testPath("root/A.cc");
@@ -78,5 +114,49 @@ TEST(BackgroundIndexTest, IndexTwoFiles) {
                        FileURI("unittest:///root/B.cc")}));
 }
 
+TEST(BackgroundIndexTest, ShardStorageWriteTest) {
+  MockFSProvider FS;
+  FS.Files[testPath("root/A.h")] = R"cpp(
+      void common();
+      void f_b();
+      class A_CC {};
+      )cpp";
+  FS.Files[testPath("root/A.cc")] =
+      "#include \"A.h\"\nvoid g() { (void)common; }";
+
+  llvm::StringMap<std::string> Storage;
+  size_t CacheHits = 0;
+  MemoryShardStorage MSS(Storage, CacheHits);
+
+  tooling::CompileCommand Cmd;
+  Cmd.Filename = testPath("root/A.cc");
+  Cmd.Directory = testPath("root");
+  Cmd.CommandLine = {"clang++", testPath("root/A.cc")};
+  // Check nothing is loaded from Storage, but A.cc and A.h has been stored.
+  {
+    BackgroundIndex Idx(Context::empty(), "", FS, /*URISchemes=*/{"unittest"},
+                        [&](llvm::StringRef) { return &MSS; });
+    Idx.enqueue(testPath("root"), Cmd);
+    Idx.blockUntilIdleForTest();
+  }
+  EXPECT_EQ(CacheHits, 0U);
+  EXPECT_EQ(Storage.size(), 2U);
+
+  auto ShardHeader = MSS.loadShard(testPath("root/A.h"));
+  EXPECT_NE(ShardHeader, nullptr);
+  EXPECT_THAT(
+      *ShardHeader->Symbols,
+      UnorderedElementsAre(Named("common"), Named("A_CC"),
+                           AllOf(Named("f_b"), Declared(), Not(Defined()))));
+  for (const auto &Ref : *ShardHeader->Refs)
+    EXPECT_THAT(Ref.second,
+                UnorderedElementsAre(FileURI("unittest:///root/A.h")));
+
+  auto ShardSource = MSS.loadShard(testPath("root/A.cc"));
+  EXPECT_NE(ShardSource, nullptr);
+  EXPECT_THAT(*ShardSource->Symbols, UnorderedElementsAre());
+  EXPECT_THAT(*ShardSource->Refs, RefsAre({FileURI("unittest:///root/A.cc")}));
+}
+
 } // namespace clangd
 } // namespace clang