[llvm-objcopy] Improve performance of long pattern lists
authorFangrui Song <i@maskray.me>
Mon, 12 Jul 2021 16:03:33 +0000 (09:03 -0700)
committerFangrui Song <i@maskray.me>
Mon, 12 Jul 2021 16:03:34 +0000 (09:03 -0700)
Some users use a long list of fixed patterns (PR50404) and
O(|patterns|*|symbols|) can be too slow. Such usage typically does not use
--regex or --wildcard. We can use a DenseSet<CachedHashStringRef> to optimize
name lookups.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D105218

llvm/tools/llvm-objcopy/CommonConfig.h

index 49a77e1..131ce5c 100644 (file)
@@ -10,6 +10,7 @@
 #define LLVM_TOOLS_LLVM_OBJCOPY_COMMONCONFIG_H
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/CachedHashString.h"
 #include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
@@ -113,6 +114,11 @@ public:
          llvm::function_ref<Error(Error)> ErrorCallback);
 
   bool isPositiveMatch() const { return IsPositiveMatch; }
+  Optional<StringRef> getName() const {
+    if (!R && !G)
+      return Name;
+    return None;
+  }
   bool operator==(StringRef S) const {
     return R ? R->match(S) : G ? G->match(S) : Name == S;
   }
@@ -122,23 +128,32 @@ public:
 // Matcher that checks symbol or section names against the command line flags
 // provided for that option.
 class NameMatcher {
-  std::vector<NameOrPattern> PosMatchers;
+  DenseSet<CachedHashStringRef> PosNames;
+  std::vector<NameOrPattern> PosPatterns;
   std::vector<NameOrPattern> NegMatchers;
 
 public:
   Error addMatcher(Expected<NameOrPattern> Matcher) {
     if (!Matcher)
       return Matcher.takeError();
-    if (Matcher->isPositiveMatch())
-      PosMatchers.push_back(std::move(*Matcher));
-    else
+    if (Matcher->isPositiveMatch()) {
+      if (Optional<StringRef> MaybeName = Matcher->getName())
+        PosNames.insert(CachedHashStringRef(*MaybeName));
+      else
+        PosPatterns.push_back(std::move(*Matcher));
+    } else {
       NegMatchers.push_back(std::move(*Matcher));
+    }
     return Error::success();
   }
   bool matches(StringRef S) const {
-    return is_contained(PosMatchers, S) && !is_contained(NegMatchers, S);
+    return (PosNames.contains(CachedHashStringRef(S)) ||
+            is_contained(PosPatterns, S)) &&
+           !is_contained(NegMatchers, S);
+  }
+  bool empty() const {
+    return PosNames.empty() && PosPatterns.empty() && NegMatchers.empty();
   }
-  bool empty() const { return PosMatchers.empty() && NegMatchers.empty(); }
 };
 
 enum class SymbolFlag {