[clangd] Optimize "don't include me" check.
authorSam McCall <sam.mccall@gmail.com>
Thu, 25 Apr 2019 17:47:07 +0000 (17:47 +0000)
committerSam McCall <sam.mccall@gmail.com>
Thu, 25 Apr 2019 17:47:07 +0000 (17:47 +0000)
Summary:
llvm::Regex is really slow, and regex evaluation during preamble indexing was
showing up as 25% on a profile of clangd in a codebase with large preambles.

Reviewers: ilya-biryukov

Subscribers: MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D61120

llvm-svn: 359214

clang-tools-extra/clangd/index/SymbolCollector.cpp
clang-tools-extra/clangd/index/SymbolCollector.h

index d4dc29a..05e2c5c 100644 (file)
@@ -639,7 +639,7 @@ bool SymbolCollector::isSelfContainedHeader(FileID FID) {
       return false;
     // This pattern indicates that a header can't be used without
     // particular preprocessor state, usually set up by another header.
-    if (DontIncludeMePattern.match(SM.getBufferData(FID)))
+    if (isDontIncludeMeHeader(SM.getBufferData(FID)))
       return false;
     return true;
   };
@@ -650,5 +650,36 @@ bool SymbolCollector::isSelfContainedHeader(FileID FID) {
   return R.first->second;
 }
 
+// Is Line an #if or #ifdef directive?
+static bool isIf(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  return Line.startswith("if");
+}
+// Is Line an #error directive mentioning includes?
+static bool isErrorAboutInclude(llvm::StringRef Line) {
+  Line = Line.ltrim();
+  if (!Line.consume_front("#"))
+    return false;
+  Line = Line.ltrim();
+  if (! Line.startswith("error"))
+    return false;
+  return Line.contains_lower("includ"); // Matches "include" or "including".
+}
+
+bool SymbolCollector::isDontIncludeMeHeader(llvm::StringRef Content) {
+  llvm::StringRef Line;
+  // Only sniff up to 100 lines or 10KB.
+  Content = Content.take_front(100*100);
+  for (unsigned I = 0; I < 100 && !Content.empty(); ++I) {
+    std::tie(Line, Content) = Content.split('\n');
+    if (isIf(Line) && isErrorAboutInclude(Content.split('\n').first))
+      return true;
+  }
+  return false;
+}
+
 } // namespace clangd
 } // namespace clang
index ac1a57d..689d4a4 100644 (file)
@@ -120,12 +120,8 @@ private:
 
   llvm::Optional<std::string> getIncludeHeader(llvm::StringRef QName, FileID);
   bool isSelfContainedHeader(FileID);
-  // Heuristic to detect headers that aren't self-contained, usually because
-  // they need to be included via an umbrella header. e.g. GTK matches this.
-  llvm::Regex DontIncludeMePattern = {
-      "^[ \t]*#[ \t]*if.*\n"         // An #if, #ifndef etc directive, then
-      "[ \t]*#[ \t]*error.*include", // an #error directive mentioning "include"
-      llvm::Regex::Newline};
+  // Heuristically headers that only want to be included via an umbrella.
+  static bool isDontIncludeMeHeader(llvm::StringRef);
 
   // All Symbols collected from the AST.
   SymbolSlab::Builder Symbols;