From 561443818a15e7b368ddbb58207a14c60ba55c58 Mon Sep 17 00:00:00 2001 From: Sam McCall Date: Wed, 5 Oct 2022 01:33:36 +0200 Subject: [PATCH] [clangd] Optimize Dex::generateProximityURIs(). Production profiles show that generateProximityURIs is roughly 3.8% of buildPreamble. Of this, the majority (3% of buildPreamble) is parsing and reserializing URIs. We can do this with ugly string manipulation instead. Differential Revision: https://reviews.llvm.org/D135226 --- clang-tools-extra/clangd/index/dex/Dex.cpp | 69 +++++++++++++++++++----------- clang-tools-extra/clangd/index/dex/Dex.h | 2 +- 2 files changed, 45 insertions(+), 26 deletions(-) diff --git a/clang-tools-extra/clangd/index/dex/Dex.cpp b/clang-tools-extra/clangd/index/dex/Dex.cpp index 2d778c1..c66e7f0 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.cpp +++ b/clang-tools-extra/clangd/index/dex/Dex.cpp @@ -163,8 +163,8 @@ std::unique_ptr Dex::createFileProximityIterator( llvm::StringMap Sources; for (const auto &Path : ProximityPaths) { Sources[Path] = SourceParams(); - auto PathURI = URI::create(Path); - const auto PathProximityURIs = generateProximityURIs(PathURI.toString()); + auto PathURI = URI::create(Path).toString(); + const auto PathProximityURIs = generateProximityURIs(PathURI.c_str()); for (const auto &ProximityURI : PathProximityURIs) ParentURIs.insert(ProximityURI); } @@ -353,30 +353,49 @@ size_t Dex::estimateMemoryUsage() const { return Bytes + BackingDataSize; } -std::vector generateProximityURIs(llvm::StringRef URIPath) { - std::vector Result; - auto ParsedURI = URI::parse(URIPath); - assert(ParsedURI && - "Non-empty argument of generateProximityURIs() should be a valid " - "URI."); - llvm::StringRef Body = ParsedURI->body(); - // FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum - // size of resulting vector. Some projects might want to have higher limit if - // the file hierarchy is deeper. For the generic case, it would be useful to - // calculate Limit in the index build stage by calculating the maximum depth - // of the project source tree at runtime. - size_t Limit = 5; - // Insert original URI before the loop: this would save a redundant iteration - // with a URI parse. - Result.emplace_back(ParsedURI->toString()); - while (!Body.empty() && --Limit > 0) { - // FIXME(kbobyrev): Parsing and encoding path to URIs is not necessary and - // could be optimized. - Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix); - if (!Body.empty()) - Result.emplace_back( - URI(ParsedURI->scheme(), ParsedURI->authority(), Body).toString()); +// Given foo://bar/one/two +// Returns ~~~~~~~~ (or empty for bad URI) +llvm::StringRef findPathInURI(llvm::StringRef S) { + S = S.split(':').second; // Eat scheme. + if (S.consume_front("//")) // Eat authority. + S = S.drop_until([](char C) { return C == '/'; }); + return S; +} + +// FIXME(kbobyrev): Currently, this is a heuristic which defines the maximum +// size of resulting vector. Some projects might want to have higher limit if +// the file hierarchy is deeper. For the generic case, it would be useful to +// calculate Limit in the index build stage by calculating the maximum depth +// of the project source tree at runtime. +constexpr unsigned ProximityURILimit = 5; + +llvm::SmallVector +generateProximityURIs(llvm::StringRef URI) { + // This function is hot when indexing, so don't parse/reserialize URIPath, + // just emit substrings of it instead. + // + // foo://bar/one/two + // ~~~~~~~~ + // Path + llvm::StringRef Path = findPathInURI(URI); + if (Path.empty()) + return {}; // Bad URI. + assert(Path.begin() >= URI.begin() && Path.begin() < URI.end() && + Path.end() == URI.end()); + + // The original is a proximity URI. + llvm::SmallVector Result = {URI}; + // Form proximity URIs by chopping before each slash in the path part. + for (auto Slash = Path.rfind('/'); Slash > 0 && Slash != StringRef::npos; + Slash = Path.rfind('/')) { + Path = Path.substr(0, Slash); + Result.push_back(URI.substr(0, Path.end() - URI.data())); + if (Result.size() == ProximityURILimit) + return Result; } + // The root foo://bar/ is a proximity URI. + if (Path.startswith("/")) + Result.push_back(URI.substr(0, Path.begin() + 1 - URI.data())); return Result; } diff --git a/clang-tools-extra/clangd/index/dex/Dex.h b/clang-tools-extra/clangd/index/dex/Dex.h index f29e749..69e161d 100644 --- a/clang-tools-extra/clangd/index/dex/Dex.h +++ b/clang-tools-extra/clangd/index/dex/Dex.h @@ -132,7 +132,7 @@ private: /// Should be used within the index build process. /// /// This function is exposed for testing only. -std::vector generateProximityURIs(llvm::StringRef URIPath); +llvm::SmallVector generateProximityURIs(llvm::StringRef); } // namespace dex } // namespace clangd -- 2.7.4