From 92dd077af1ff89929f5502c6c887358f51d5afc1 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Tue, 15 Dec 2020 14:00:03 +0100 Subject: [PATCH] Reland [clangd] Extract per-dir CDB cache to its own threadsafe class. NFC This reverts commit 4d956af594c5adc9d566d1846d86dd89c70c9c0b. Assertion failures on windows fixed by 965d71c69acce658e9e3de00b25a351b00937820 --- .../clangd/GlobalCompilationDatabase.cpp | 252 +++++++++++++++------ .../clangd/GlobalCompilationDatabase.h | 30 ++- 2 files changed, 194 insertions(+), 88 deletions(-) diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp index 20139c1..bad9c35 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.cpp @@ -16,11 +16,13 @@ #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallString.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/FileUtilities.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" +#include #include #include #include @@ -72,10 +74,117 @@ GlobalCompilationDatabase::getFallbackCommand(PathRef File) const { return Cmd; } +// Loads and caches the CDB from a single directory. +// +// This class is threadsafe, which is to say we have independent locks for each +// directory we're searching for a CDB. +// Loading is deferred until first access. +// +// The DirectoryBasedCDB keeps a map from path => DirectoryCache. +// Typical usage is to: +// - 1) determine all the paths that might be searched +// - 2) acquire the map lock and get-or-create all the DirectoryCache entries +// - 3) release the map lock and query the caches as desired +// +// FIXME: this should revalidate the cache sometimes +// FIXME: IO should go through a VFS +class DirectoryBasedGlobalCompilationDatabase::DirectoryCache { + // Absolute canonical path that we're the cache for. (Not case-folded). + const std::string Path; + + // True if we've looked for a CDB here and found none. + // (This makes it possible for get() to return without taking a lock) + // FIXME: this should have an expiry time instead of lasting forever. + std::atomic FinalizedNoCDB = {false}; + + // Guards following cache state. + std::mutex Mu; + // Has cache been filled from disk? FIXME: this should be an expiry time. + bool CachePopulated = false; + // Whether a new CDB has been loaded but not broadcast yet. + bool NeedsBroadcast = false; + // Last loaded CDB, meaningful if CachePopulated is set. + // shared_ptr so we can overwrite this when callers are still using the CDB. + std::shared_ptr CDB; + +public: + DirectoryCache(llvm::StringRef Path) : Path(Path) { + assert(llvm::sys::path::is_absolute(Path)); + } + + // Get the CDB associated with this directory. + // ShouldBroadcast: + // - as input, signals whether the caller is willing to broadcast a + // newly-discovered CDB. (e.g. to trigger background indexing) + // - as output, signals whether the caller should do so. + // (If a new CDB is discovered and ShouldBroadcast is false, we mark the + // CDB as needing broadcast, and broadcast it next time we can). + std::shared_ptr + get(bool &ShouldBroadcast) { + // Fast path for common case without taking lock. + if (FinalizedNoCDB.load()) { + ShouldBroadcast = false; + return nullptr; + } + std::lock_guard Lock(Mu); + auto RequestBroadcast = llvm::make_scope_exit([&, OldCDB(CDB.get())] { + // If we loaded a new CDB, it should be broadcast at some point. + if (CDB != nullptr && CDB.get() != OldCDB) + NeedsBroadcast = true; + else if (CDB == nullptr) // nothing to broadcast anymore! + NeedsBroadcast = false; + // If we have something to broadcast, then do so iff allowed. + if (!ShouldBroadcast) + return; + ShouldBroadcast = NeedsBroadcast; + NeedsBroadcast = false; + }); + + // For now, we never actually attempt to revalidate a populated cache. + if (CachePopulated) + return CDB; + assert(CDB == nullptr); + + load(); + CachePopulated = true; + + if (!CDB) + FinalizedNoCDB.store(true); + return CDB; + } + + llvm::StringRef path() const { return Path; } + +private: + // Updates `CDB` from disk state. + void load() { + std::string Error; // ignored, because it's often "didn't find anything". + CDB = tooling::CompilationDatabase::loadFromDirectory(Path, Error); + if (!CDB) { + // Fallback: check for $src/build, the conventional CMake build root. + // Probe existence first to avoid each plugin doing IO if it doesn't + // exist. + llvm::SmallString<256> BuildDir(Path); + llvm::sys::path::append(BuildDir, "build"); + if (llvm::sys::fs::is_directory(BuildDir)) { + vlog("Found candidate build directory {0}", BuildDir); + CDB = tooling::CompilationDatabase::loadFromDirectory(BuildDir, Error); + } + } + if (CDB) { + log("Loaded compilation database from {0}", Path); + } else { + vlog("No compilation database at {0}", Path); + } + } +}; + DirectoryBasedGlobalCompilationDatabase:: DirectoryBasedGlobalCompilationDatabase( - llvm::Optional CompileCommandsDir) - : CompileCommandsDir(std::move(CompileCommandsDir)) {} + llvm::Optional CompileCommandsDir) { + if (CompileCommandsDir) + OnlyDirCache = std::make_unique(*CompileCommandsDir); +} DirectoryBasedGlobalCompilationDatabase:: ~DirectoryBasedGlobalCompilationDatabase() = default; @@ -121,31 +230,26 @@ static bool pathEqual(PathRef A, PathRef B) { #endif } -DirectoryBasedGlobalCompilationDatabase::CachedCDB & -DirectoryBasedGlobalCompilationDatabase::getCDBInDirLocked(PathRef Dir) const { - // FIXME(ibiryukov): Invalidate cached compilation databases on changes - auto Key = maybeCaseFoldPath(Dir); - auto R = CompilationDatabases.try_emplace(Key); - if (R.second) { // Cache miss, try to load CDB. - CachedCDB &Entry = R.first->second; - std::string Error; - Entry.Path = std::string(Dir); - Entry.CDB = tooling::CompilationDatabase::loadFromDirectory(Dir, Error); - // Check for $src/build, the conventional CMake build root. - // Probe existence first to avoid each plugin doing IO if it doesn't exist. - if (!CompileCommandsDir && !Entry.CDB) { - llvm::SmallString<256> BuildDir = Dir; - llvm::sys::path::append(BuildDir, "build"); - if (llvm::sys::fs::is_directory(BuildDir)) { - vlog("Found candidate build directory {0}", BuildDir); - Entry.CDB = - tooling::CompilationDatabase::loadFromDirectory(BuildDir, Error); - } - } - if (Entry.CDB) - log("Loaded compilation database from {0}", Dir); +std::vector +DirectoryBasedGlobalCompilationDatabase::getDirectoryCaches( + llvm::ArrayRef Dirs) const { + std::vector FoldedDirs; + FoldedDirs.reserve(Dirs.size()); + for (const auto &Dir : Dirs) { +#ifndef NDEBUG + if (!llvm::sys::path::is_absolute(Dir)) + elog("Trying to cache CDB for relative {0}"); +#endif + FoldedDirs.push_back(maybeCaseFoldPath(Dir)); } - return R.first->second; + + std::vector Ret; + Ret.reserve(Dirs.size()); + + std::lock_guard Lock(DirCachesMutex); + for (unsigned I = 0; I < Dirs.size(); ++I) + Ret.push_back(&DirCaches.try_emplace(FoldedDirs[I], Dirs[I]).first->second); + return Ret; } llvm::Optional @@ -155,39 +259,40 @@ DirectoryBasedGlobalCompilationDatabase::lookupCDB( "path must be absolute"); bool ShouldBroadcast = false; - CDBLookupResult Result; - - { - std::lock_guard Lock(Mutex); - CachedCDB *Entry = nullptr; - if (CompileCommandsDir) { - Entry = &getCDBInDirLocked(*CompileCommandsDir); - } else { - // Traverse the canonical version to prevent false positives. i.e.: - // src/build/../a.cc can detect a CDB in /src/build if not canonicalized. - // FIXME(sammccall): this loop is hot, use a union-find-like structure. - actOnAllParentDirectories(removeDots(Request.FileName), - [&](PathRef Path) { - Entry = &getCDBInDirLocked(Path); - return Entry->CDB != nullptr; - }); + DirectoryCache *DirCache = nullptr; + std::shared_ptr CDB = nullptr; + if (OnlyDirCache) { + DirCache = OnlyDirCache.get(); + ShouldBroadcast = Request.ShouldBroadcast; + CDB = DirCache->get(ShouldBroadcast); + } else { + // Traverse the canonical version to prevent false positives. i.e.: + // src/build/../a.cc can detect a CDB in /src/build if not canonicalized. + std::string CanonicalPath = removeDots(Request.FileName); + std::vector SearchDirs; + actOnAllParentDirectories(CanonicalPath, [&](PathRef Path) { + SearchDirs.push_back(Path); + return false; + }); + for (DirectoryCache *Candidate : getDirectoryCaches(SearchDirs)) { + bool CandidateShouldBroadcast = Request.ShouldBroadcast; + if ((CDB = Candidate->get(CandidateShouldBroadcast))) { + DirCache = Candidate; + ShouldBroadcast = CandidateShouldBroadcast; + break; + } } + } - if (!Entry || !Entry->CDB) - return llvm::None; - - // Mark CDB as broadcasted to make sure discovery is performed once. - if (Request.ShouldBroadcast && !Entry->SentBroadcast) { - Entry->SentBroadcast = true; - ShouldBroadcast = true; - } + if (!CDB) + return llvm::None; - Result.CDB = Entry->CDB.get(); - Result.PI.SourceRoot = Entry->Path; - } + CDBLookupResult Result; + Result.CDB = std::move(CDB); + Result.PI.SourceRoot = DirCache->path().str(); - // FIXME: Maybe make the following part async, since this can block retrieval - // of compile commands. + // FIXME: Maybe make the following part async, since this can block + // retrieval of compile commands. if (ShouldBroadcast) broadcastCDB(Result); return Result; @@ -200,29 +305,32 @@ void DirectoryBasedGlobalCompilationDatabase::broadcastCDB( std::vector AllFiles = Result.CDB->getAllFiles(); // We assume CDB in CompileCommandsDir owns all of its entries, since we don't // perform any search in parent paths whenever it is set. - if (CompileCommandsDir) { - assert(*CompileCommandsDir == Result.PI.SourceRoot && + if (OnlyDirCache) { + assert(OnlyDirCache->path() == Result.PI.SourceRoot && "Trying to broadcast a CDB outside of CompileCommandsDir!"); OnCommandChanged.broadcast(std::move(AllFiles)); return; } + // Uniquify all parent directories of all files. llvm::StringMap DirectoryHasCDB; - { - std::lock_guard Lock(Mutex); - // Uniquify all parent directories of all files. - for (llvm::StringRef File : AllFiles) { - actOnAllParentDirectories(File, [&](PathRef Path) { - auto It = DirectoryHasCDB.try_emplace(Path); - // Already seen this path, and all of its parents. - if (!It.second) - return true; - - CachedCDB &Entry = getCDBInDirLocked(Path); - It.first->second = Entry.CDB != nullptr; - return pathEqual(Path, Result.PI.SourceRoot); - }); - } + std::vector FileAncestors; + for (llvm::StringRef File : AllFiles) { + actOnAllParentDirectories(File, [&](PathRef Path) { + auto It = DirectoryHasCDB.try_emplace(Path); + // Already seen this path, and all of its parents. + if (!It.second) + return true; + + FileAncestors.push_back(It.first->getKey()); + return pathEqual(Path, Result.PI.SourceRoot); + }); + } + // Work out which ones have CDBs in them. + for (DirectoryCache *Dir : getDirectoryCaches(FileAncestors)) { + bool ShouldBroadcast = false; + if (Dir->get(ShouldBroadcast)) + DirectoryHasCDB.find(Dir->path())->setValue(true); } std::vector GovernedFiles; diff --git a/clang-tools-extra/clangd/GlobalCompilationDatabase.h b/clang-tools-extra/clangd/GlobalCompilationDatabase.h index 95677f9..e654ccb 100644 --- a/clang-tools-extra/clangd/GlobalCompilationDatabase.h +++ b/clang-tools-extra/clangd/GlobalCompilationDatabase.h @@ -81,13 +81,19 @@ public: llvm::Optional getProjectInfo(PathRef File) const override; private: - /// Caches compilation databases loaded from directories. - struct CachedCDB { - std::string Path; // Not case-folded. - std::unique_ptr CDB = nullptr; - bool SentBroadcast = false; - }; - CachedCDB &getCDBInDirLocked(PathRef File) const; + class DirectoryCache; + // If there's an explicit CompileCommandsDir, cache of the CDB found there. + mutable std::unique_ptr OnlyDirCache; + + // Keyed by possibly-case-folded directory path. + // We can hand out pointers as they're stable and entries are never removed. + // Empty if CompileCommandsDir is given (OnlyDirCache is used instead). + mutable llvm::StringMap DirCaches; + // DirCaches access must be locked (unlike OnlyDirCache, which is threadsafe). + mutable std::mutex DirCachesMutex; + + std::vector + getDirectoryCaches(llvm::ArrayRef Dirs) const; struct CDBLookupRequest { PathRef FileName; @@ -95,21 +101,13 @@ private: bool ShouldBroadcast = false; }; struct CDBLookupResult { - tooling::CompilationDatabase *CDB = nullptr; + std::shared_ptr CDB; ProjectInfo PI; }; llvm::Optional lookupCDB(CDBLookupRequest Request) const; // Performs broadcast on governed files. void broadcastCDB(CDBLookupResult Res) const; - - mutable std::mutex Mutex; - // Keyed by possibly-case-folded directory path. - mutable llvm::StringMap CompilationDatabases; - - /// Used for command argument pointing to folder where compile_commands.json - /// is located. - llvm::Optional CompileCommandsDir; }; /// Extracts system include search path from drivers matching QueryDriverGlobs -- 2.7.4