[clang-tidy] Optimize GlobList::contains
authorAlexander Kornienko <alexfh@google.com>
Thu, 18 May 2017 01:13:51 +0000 (01:13 +0000)
committerAlexander Kornienko <alexfh@google.com>
Thu, 18 May 2017 01:13:51 +0000 (01:13 +0000)
With large lists of checks and large number of warnings GlobList::contains
starts being ridiculously CPU hungry, since it runs regexp match per glob.
Caching results of glob matching in a StringMap significantly speeds up check
filtering even for small GlobLists.

/tmp/q.cc:

void f() {
  int I;
  {int I;}
  {int I;}
  {int I;}
  ... // 200k times
}

Before the patch:

GlobList with 2 entries:
  $ time clang-tidy-old -checks=-*,modernize-use-override /tmp/q.cc -- -Wshadow
  200000 warnings generated.
  Suppressed 200000 warnings (200000 with check filters).

  real    0m3.826s
  user    0m3.176s
  sys     0m0.504s

GlobList with 28 entries:
  $ time clang-tidy-old -checks=-*,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,modernize-use-override /tmp/q.cc -- -Wshadow
  200000 warnings generated.
  Suppressed 200000 warnings (200000 with check filters).

  real    0m5.000s
  user    0m4.744s
  sys     0m0.060s

GlobList with 158 entries:
  $ time clang-tidy-old -checks=-*,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,modernize-use-override /tmp/q.cc -- -Wshadow
  200000 warnings generated.
  Suppressed 200000 warnings (200000 with check filters).

  real    0m13.920s
  user    0m13.636s
  sys     0m0.104s

With the patch runtime is practically independent from the length of the GlobList:
  $ time clang-tidy-new -checks=-*,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,a,b,c,d,e,f,g,h,i,j,k,l,m,n,o,p,q,r,s,t,u,v,w,x,y,z,modernize-use-override /tmp/q.cc -- -Wshadow
  200000 warnings generated.
  Suppressed 200000 warnings (200000 with check filters).

  real    0m2.300s
  user    0m2.104s
  sys     0m0.044s

llvm-svn: 303321

clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.cpp
clang-tools-extra/clang-tidy/ClangTidyDiagnosticConsumer.h

index 741bac6..3a3aa01 100644 (file)
@@ -158,6 +158,26 @@ bool GlobList::contains(StringRef S, bool Contains) {
   return Contains;
 }
 
+class ClangTidyContext::CachedGlobList {
+public:
+  CachedGlobList(StringRef Globs) : Globs(Globs) {}
+
+  bool contains(StringRef S) {
+    switch (auto &Result = Cache[S]) {
+      case Yes: return true;
+      case No: return false;
+      case None:
+        Result = Globs.contains(S) ? Yes : No;
+        return Result == Yes;
+    }
+  }
+
+private:
+  GlobList Globs;
+  enum Tristate { None, Yes, No };
+  llvm::StringMap<Tristate> Cache;
+};
+
 ClangTidyContext::ClangTidyContext(
     std::unique_ptr<ClangTidyOptionsProvider> OptionsProvider)
     : DiagEngine(nullptr), OptionsProvider(std::move(OptionsProvider)),
@@ -167,6 +187,8 @@ ClangTidyContext::ClangTidyContext(
   setCurrentFile("");
 }
 
+ClangTidyContext::~ClangTidyContext() = default;
+
 DiagnosticBuilder ClangTidyContext::diag(
     StringRef CheckName, SourceLocation Loc, StringRef Description,
     DiagnosticIDs::Level Level /* = DiagnosticIDs::Warning*/) {
@@ -188,8 +210,9 @@ void ClangTidyContext::setSourceManager(SourceManager *SourceMgr) {
 void ClangTidyContext::setCurrentFile(StringRef File) {
   CurrentFile = File;
   CurrentOptions = getOptionsForFile(CurrentFile);
-  CheckFilter.reset(new GlobList(*getOptions().Checks));
-  WarningAsErrorFilter.reset(new GlobList(*getOptions().WarningsAsErrors));
+  CheckFilter = llvm::make_unique<CachedGlobList>(*getOptions().Checks);
+  WarningAsErrorFilter =
+      llvm::make_unique<CachedGlobList>(*getOptions().WarningsAsErrors);
 }
 
 void ClangTidyContext::setASTContext(ASTContext *Context) {
@@ -243,9 +266,9 @@ ClangTidyDiagnosticConsumer::ClangTidyDiagnosticConsumer(
       LastErrorRelatesToUserCode(false), LastErrorPassesLineFilter(false),
       LastErrorWasIgnored(false) {
   IntrusiveRefCntPtr<DiagnosticOptions> DiagOpts = new DiagnosticOptions();
-  Diags.reset(new DiagnosticsEngine(
+  Diags = llvm::make_unique<DiagnosticsEngine>(
       IntrusiveRefCntPtr<DiagnosticIDs>(new DiagnosticIDs), &*DiagOpts, this,
-      /*ShouldOwnClient=*/false));
+      /*ShouldOwnClient=*/false);
   Context.setDiagnosticsEngine(Diags.get());
 }
 
@@ -461,8 +484,8 @@ void ClangTidyDiagnosticConsumer::checkFilters(SourceLocation Location) {
 
 llvm::Regex *ClangTidyDiagnosticConsumer::getHeaderFilter() {
   if (!HeaderFilter)
-    HeaderFilter.reset(
-        new llvm::Regex(*Context.getOptions().HeaderFilterRegex));
+    HeaderFilter =
+        llvm::make_unique<llvm::Regex>(*Context.getOptions().HeaderFilterRegex);
   return HeaderFilter.get();
 }
 
index 19dd6f2..b484794 100644 (file)
@@ -106,6 +106,8 @@ public:
   /// \brief Initializes \c ClangTidyContext instance.
   ClangTidyContext(std::unique_ptr<ClangTidyOptionsProvider> OptionsProvider);
 
+  ~ClangTidyContext();
+
   /// \brief Report any errors detected using this method.
   ///
   /// This is still under heavy development and will likely change towards using
@@ -202,8 +204,9 @@ private:
 
   std::string CurrentFile;
   ClangTidyOptions CurrentOptions;
-  std::unique_ptr<GlobList> CheckFilter;
-  std::unique_ptr<GlobList> WarningAsErrorFilter;
+  class CachedGlobList;
+  std::unique_ptr<CachedGlobList> CheckFilter;
+  std::unique_ptr<CachedGlobList> WarningAsErrorFilter;
 
   LangOptions LangOpts;