[ASTMatchers] Enhanced support for matchers taking Regex arguments
authorNathan James <n.james93@hotmail.co.uk>
Thu, 2 Jul 2020 13:52:24 +0000 (14:52 +0100)
committerNathan James <n.james93@hotmail.co.uk>
Thu, 2 Jul 2020 13:52:25 +0000 (14:52 +0100)
Added new Macros `AST(_POLYMORPHIC)_MATCHER_REGEX(_OVERLOAD)` that define a matchers that take a regular expression string and optionally regular expression flags. This lets users match against nodes while ignoring the case without having to manually use `[Aa]` or `[A-Fa-f]` in their regex. The other point this addresses is in the current state, matchers that use regular expressions have to compile them for each node they try to match on, Now the regular expression is compiled once when you define the matcher and used for every node that it tries to match against. If there is an error while compiling the regular expression an error will be logged to stderr showing the bad regex string and the reason it couldn't be compiled. The old behaviour of this was down to the Matcher implementation and some would assert, whereas others just would never match. Support for this has been added to the documentation script as well. Support for this has been added to dynamic matchers ensuring functionality is the same between the 2 use cases.

Reviewed By: aaron.ballman

Differential Revision: https://reviews.llvm.org/D82706

13 files changed:
clang/docs/LibASTMatchersReference.html
clang/docs/tools/dump_ast_matchers.py
clang/include/clang/ASTMatchers/ASTMatchers.h
clang/include/clang/ASTMatchers/ASTMatchersInternal.h
clang/include/clang/ASTMatchers/ASTMatchersMacros.h
clang/lib/ASTMatchers/ASTMatchersInternal.cpp
clang/lib/ASTMatchers/Dynamic/Marshallers.cpp
clang/lib/ASTMatchers/Dynamic/Marshallers.h
clang/lib/ASTMatchers/Dynamic/Registry.cpp
clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp
clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp
llvm/include/llvm/Support/Regex.h
llvm/lib/Support/Regex.cpp

index ae90ba0..9c04322 100644 (file)
@@ -3007,7 +3007,7 @@ passed as a quoted string. e.g., hasAttr("attr::CUDADevice").
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('isExpansionInFileMatching0')"><a name="isExpansionInFileMatching0Anchor">isExpansionInFileMatching</a></td><td>std::string RegExp</td></tr>
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;</td><td class="name" onclick="toggle('isExpansionInFileMatching0')"><a name="isExpansionInFileMatching0Anchor">isExpansionInFileMatching</a></td><td>StringRef RegExp, Regex::RegexFlags Flags = NoFlags</td></tr>
 <tr><td colspan="4" class="doc" id="isExpansionInFileMatching0"><pre>Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
@@ -3019,6 +3019,10 @@ ASTMatcher.h:
   class Y {};
 
 Usable as: Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;, Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;, Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>&gt;
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 </pre></td></tr>
 
 
@@ -3725,7 +3729,7 @@ Example matches X (Name is one of "::a::b::X", "a::b::X", "b::X", "X")
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>&gt;</td><td class="name" onclick="toggle('matchesName0')"><a name="matchesName0Anchor">matchesName</a></td><td>std::string RegExp</td></tr>
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1NamedDecl.html">NamedDecl</a>&gt;</td><td class="name" onclick="toggle('matchesName0')"><a name="matchesName0Anchor">matchesName</a></td><td>StringRef RegExp, Regex::RegexFlags Flags = NoFlags</td></tr>
 <tr><td colspan="4" class="doc" id="matchesName0"><pre>Matches NamedDecl nodes whose fully qualified names contain
 a substring matched by the given RegExp.
 
@@ -3738,6 +3742,10 @@ Example matches X (regexp == "::X")
 
 Example matches X (regexp is one of "::X", "^foo::.*X", among others)
   namespace foo { namespace bar { class X; } }
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 </pre></td></tr>
 
 
@@ -3932,12 +3940,16 @@ but not
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('matchesSelector0')"><a name="matchesSelector0Anchor">matchesSelector</a></td><td>std::string RegExp</td></tr>
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1ObjCMessageExpr.html">ObjCMessageExpr</a>&gt;</td><td class="name" onclick="toggle('matchesSelector0')"><a name="matchesSelector0Anchor">matchesSelector</a></td><td>StringRef RegExp, Regex::RegexFlags Flags = NoFlags</td></tr>
 <tr><td colspan="4" class="doc" id="matchesSelector0"><pre>Matches ObjC selectors whose name contains
 a substring matched by the given RegExp.
  matcher = objCMessageExpr(matchesSelector("loadHTMLStringmatches the outer message expr in the code below, but NOT the message
  invocation for self.bodyView.
     [self.bodyView loadHTMLString:html baseURL:NULL];
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 </pre></td></tr>
 
 
@@ -4228,7 +4240,7 @@ node. There's nothing `Stmt`-specific about it.
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('isExpansionInFileMatching1')"><a name="isExpansionInFileMatching1Anchor">isExpansionInFileMatching</a></td><td>std::string RegExp</td></tr>
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;</td><td class="name" onclick="toggle('isExpansionInFileMatching1')"><a name="isExpansionInFileMatching1Anchor">isExpansionInFileMatching</a></td><td>StringRef RegExp, Regex::RegexFlags Flags = NoFlags</td></tr>
 <tr><td colspan="4" class="doc" id="isExpansionInFileMatching1"><pre>Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
@@ -4240,6 +4252,10 @@ ASTMatcher.h:
   class Y {};
 
 Usable as: Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;, Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;, Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>&gt;
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 </pre></td></tr>
 
 
@@ -4410,7 +4426,7 @@ classTemplateSpecializationDecl(templateArgumentCountIs(1))
 </pre></td></tr>
 
 
-<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>&gt;</td><td class="name" onclick="toggle('isExpansionInFileMatching2')"><a name="isExpansionInFileMatching2Anchor">isExpansionInFileMatching</a></td><td>std::string RegExp</td></tr>
+<tr><td>Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>&gt;</td><td class="name" onclick="toggle('isExpansionInFileMatching2')"><a name="isExpansionInFileMatching2Anchor">isExpansionInFileMatching</a></td><td>StringRef RegExp, Regex::RegexFlags Flags = NoFlags</td></tr>
 <tr><td colspan="4" class="doc" id="isExpansionInFileMatching2"><pre>Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
@@ -4422,6 +4438,10 @@ ASTMatcher.h:
   class Y {};
 
 Usable as: Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Decl.html">Decl</a>&gt;, Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1Stmt.html">Stmt</a>&gt;, Matcher&lt;<a href="https://clang.llvm.org/doxygen/classclang_1_1TypeLoc.html">TypeLoc</a>&gt;
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 </pre></td></tr>
 
 
index baba4cf..045833b 100755 (executable)
@@ -230,6 +230,28 @@ def act_on_decl(declaration, comment, allowed_types):
         add_matcher(result_type, name, args, comment)
       return
 
+    m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER_REGEX(?:_OVERLOAD)?\(
+                          \s*([^\s,]+)\s*,
+                          \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\),
+                          \s*([^\s,]+)\s*
+                       (?:,\s*\d+\s*)?
+                      \)\s*{\s*$""", declaration, flags=re.X)
+
+    if m:
+      name, results, arg_name = m.groups()[0:3]
+      result_types = [r.strip() for r in results.split(',')]
+      if allowed_types and allowed_types != result_types:
+        raise Exception('Inconsistent documentation for: %s' % name)
+      arg = "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name
+      comment += """
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example \"IgnoreCase | BasicRegex\"
+"""
+      for result_type in result_types:
+        add_matcher(result_type, name, arg, comment)
+      return
+
     m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\(
                        (?:\s*([^\s,]+)\s*,)?
                           \s*([^\s,]+)\s*
@@ -275,6 +297,31 @@ def act_on_decl(declaration, comment, allowed_types):
         add_matcher(result_type, name, args, comment)
       return
 
+    m = re.match(r"""^\s*AST_MATCHER_REGEX(?:_OVERLOAD)?\(
+                       \s*([^\s,]+)\s*,
+                       \s*([^\s,]+)\s*,
+                       \s*([^\s,]+)\s*
+                       (?:,\s*\d+\s*)?
+                      \)\s*{""", declaration, flags=re.X)
+    if m:
+      result, name, arg_name = m.groups()[0:3]
+      if not result:
+        if not allowed_types:
+          raise Exception('Did not find allowed result types for: %s' % name)
+        result_types = allowed_types
+      else:
+        result_types = [result]
+      arg = "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name
+      comment += """
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example \"IgnoreCase | BasicRegex\"
+"""
+
+      for result_type in result_types:
+        add_matcher(result_type, name, arg, comment)
+      return
+
     # Parse ArgumentAdapting matchers.
     m = re.match(
         r"""^.*ArgumentAdaptingMatcherFunc<.*>\s*
index 4d5e423..153b517 100644 (file)
@@ -283,9 +283,10 @@ AST_POLYMORPHIC_MATCHER(isExpansionInSystemHeader,
 /// \endcode
 ///
 /// Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
-AST_POLYMORPHIC_MATCHER_P(isExpansionInFileMatching,
-                          AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt, TypeLoc),
-                          std::string, RegExp) {
+AST_POLYMORPHIC_MATCHER_REGEX(isExpansionInFileMatching,
+                              AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt,
+                                                              TypeLoc),
+                              RegExp) {
   auto &SourceManager = Finder->getASTContext().getSourceManager();
   auto ExpansionLoc = SourceManager.getExpansionLoc(Node.getBeginLoc());
   if (ExpansionLoc.isInvalid()) {
@@ -298,8 +299,7 @@ AST_POLYMORPHIC_MATCHER_P(isExpansionInFileMatching,
   }
 
   auto Filename = FileEntry->getName();
-  llvm::Regex RE(RegExp);
-  return RE.match(Filename);
+  return RegExp->match(Filename);
 }
 
 /// Matches statements that are (transitively) expanded from the named macro.
@@ -2748,11 +2748,9 @@ extern const internal::VariadicFunction<internal::Matcher<NamedDecl>, StringRef,
 /// \code
 ///   namespace foo { namespace bar { class X; } }
 /// \endcode
-AST_MATCHER_P(NamedDecl, matchesName, std::string, RegExp) {
-  assert(!RegExp.empty());
+AST_MATCHER_REGEX(NamedDecl, matchesName, RegExp) {
   std::string FullNameString = "::" + Node.getQualifiedNameAsString();
-  llvm::Regex RE(RegExp);
-  return RE.match(FullNameString);
+  return RegExp->match(FullNameString);
 }
 
 /// Matches overloaded operator names.
@@ -3373,11 +3371,9 @@ extern const internal::VariadicFunction<internal::Matcher<ObjCMessageExpr>,
 /// \code
 ///     [self.bodyView loadHTMLString:html baseURL:NULL];
 /// \endcode
-AST_MATCHER_P(ObjCMessageExpr, matchesSelector, std::string, RegExp) {
-  assert(!RegExp.empty());
+AST_MATCHER_REGEX(ObjCMessageExpr, matchesSelector, RegExp) {
   std::string SelectorString = Node.getSelector().getAsString();
-  llvm::Regex RE(RegExp);
-  return RE.match(SelectorString);
+  return RegExp->match(SelectorString);
 }
 
 /// Matches when the selector is the empty selector
index fc41407..3992850 100644 (file)
@@ -40,7 +40,6 @@
 #include "clang/AST/DeclFriend.h"
 #include "clang/AST/DeclTemplate.h"
 #include "clang/AST/Expr.h"
-#include "clang/AST/ExprObjC.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ExprObjC.h"
 #include "clang/AST/NestedNameSpecifier.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Regex.h"
 #include <algorithm>
 #include <cassert>
 #include <cstddef>
 #include <cstdint>
 #include <map>
+#include <memory>
 #include <string>
 #include <tuple>
 #include <type_traits>
@@ -1948,6 +1949,10 @@ bool matchesAnyBase(const CXXRecordDecl &Node,
                     const Matcher<CXXBaseSpecifier> &BaseSpecMatcher,
                     ASTMatchFinder *Finder, BoundNodesTreeBuilder *Builder);
 
+std::shared_ptr<llvm::Regex> createAndVerifyRegex(StringRef Regex,
+                                                  llvm::Regex::RegexFlags Flags,
+                                                  StringRef MatcherID);
+
 } // namespace internal
 
 } // namespace ast_matchers
index 4977bf3..45e8b1a 100644 (file)
       ReturnTypesF>::Func MatcherName##Loc;                                    \
   AST_TYPE_TRAVERSE_MATCHER(MatcherName, FunctionName##Type, ReturnTypesF)
 
+/// AST_MATCHER_REGEX(Type, DefineMatcher, Param) { ... }
+/// defines a function named DefineMatcher() that takes a regular expression
+/// string paramater and an optional RegexFlags parameter and returns a
+/// Matcher<Type> object.
+///
+/// The code between the curly braces has access to the following variables:
+///
+///   Node:                  the AST node being matched; its type is Type.
+///   Param:                 a pointer to an \ref llvm::Regex object
+///   Finder:                an ASTMatchFinder*.
+///   Builder:               a BoundNodesTreeBuilder*.
+///
+/// The code should return true if 'Node' matches.
+#define AST_MATCHER_REGEX(Type, DefineMatcher, Param)                          \
+  AST_MATCHER_REGEX_OVERLOAD(Type, DefineMatcher, Param, 0)
+
+#define AST_MATCHER_REGEX_OVERLOAD(Type, DefineMatcher, Param, OverloadId)     \
+  namespace internal {                                                         \
+  class matcher_##DefineMatcher##OverloadId##Matcher                           \
+      : public ::clang::ast_matchers::internal::MatcherInterface<Type> {       \
+  public:                                                                      \
+    explicit matcher_##DefineMatcher##OverloadId##Matcher(                     \
+        std::shared_ptr<llvm::Regex> RE)                                       \
+        : Param(std::move(RE)) {}                                              \
+    bool matches(const Type &Node,                                             \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
+                                                                               \
+  private:                                                                     \
+    std::shared_ptr<llvm::Regex> const Param;                                  \
+  };                                                                           \
+  }                                                                            \
+  inline ::clang::ast_matchers::internal::Matcher<Type> DefineMatcher(         \
+      llvm::StringRef Param, llvm::Regex::RegexFlags RegexFlags) {             \
+    return ::clang::ast_matchers::internal::makeMatcher(                       \
+        new internal::matcher_##DefineMatcher##OverloadId##Matcher(            \
+            ::clang::ast_matchers::internal::createAndVerifyRegex(             \
+                Param, RegexFlags, #DefineMatcher)));                          \
+  }                                                                            \
+  inline ::clang::ast_matchers::internal::Matcher<Type> DefineMatcher(         \
+      llvm::StringRef Param) {                                                 \
+    return DefineMatcher(Param, llvm::Regex::NoFlags);                         \
+  }                                                                            \
+                                                                               \
+  typedef ::clang::ast_matchers::internal::Matcher<Type> (                     \
+      &DefineMatcher##_Type##OverloadId##Flags)(llvm::StringRef,               \
+                                                llvm::Regex::RegexFlags);      \
+  typedef ::clang::ast_matchers::internal::Matcher<Type> (                     \
+      &DefineMatcher##_Type##OverloadId)(llvm::StringRef);                     \
+  inline bool internal::matcher_##DefineMatcher##OverloadId##Matcher::matches( \
+      const Type &Node,                                                        \
+      ::clang::ast_matchers::internal::ASTMatchFinder *Finder,                 \
+      ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) const
+
+/// AST_POLYMORPHIC_MATCHER_REGEX(DefineMatcher, ReturnTypesF, Param) { ... }
+/// defines a function named DefineMatcher() that takes a regular expression
+/// string paramater and an optional RegexFlags parameter that is polymorphic in
+/// the return type.
+///
+/// The variables are the same as for
+/// AST_MATCHER_REGEX, with the addition of NodeType, which specifies the node
+/// type of the matcher Matcher<NodeType> returned by the function matcher().
+#define AST_POLYMORPHIC_MATCHER_REGEX(DefineMatcher, ReturnTypesF, Param)      \
+  AST_POLYMORPHIC_MATCHER_REGEX_OVERLOAD(DefineMatcher, ReturnTypesF, Param, 0)
+
+#define AST_POLYMORPHIC_MATCHER_REGEX_OVERLOAD(DefineMatcher, ReturnTypesF,    \
+                                               Param, OverloadId)              \
+  namespace internal {                                                         \
+  template <typename NodeType, typename ParamT>                                \
+  class matcher_##DefineMatcher##OverloadId##Matcher                           \
+      : public ::clang::ast_matchers::internal::MatcherInterface<NodeType> {   \
+  public:                                                                      \
+    explicit matcher_##DefineMatcher##OverloadId##Matcher(                     \
+        std::shared_ptr<llvm::Regex> RE)                                       \
+        : Param(std::move(RE)) {}                                              \
+    bool matches(const NodeType &Node,                                         \
+                 ::clang::ast_matchers::internal::ASTMatchFinder *Finder,      \
+                 ::clang::ast_matchers::internal::BoundNodesTreeBuilder        \
+                     *Builder) const override;                                 \
+                                                                               \
+  private:                                                                     \
+    std::shared_ptr<llvm::Regex> const Param;                                  \
+  };                                                                           \
+  }                                                                            \
+  inline ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<        \
+      internal::matcher_##DefineMatcher##OverloadId##Matcher,                  \
+      std::shared_ptr<llvm::Regex>, ReturnTypesF>                              \
+  DefineMatcher(llvm::StringRef Param, llvm::Regex::RegexFlags RegexFlags) {   \
+    return ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<      \
+        internal::matcher_##DefineMatcher##OverloadId##Matcher,                \
+        std::shared_ptr<llvm::Regex>, ReturnTypesF>(                           \
+        ::clang::ast_matchers::internal::createAndVerifyRegex(                 \
+            Param, RegexFlags, #DefineMatcher));                               \
+  }                                                                            \
+  inline ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<        \
+      internal::matcher_##DefineMatcher##OverloadId##Matcher,                  \
+      std::shared_ptr<llvm::Regex>, ReturnTypesF>                              \
+  DefineMatcher(llvm::StringRef Param) {                                       \
+    return DefineMatcher(Param, llvm::Regex::NoFlags);                         \
+  }                                                                            \
+  typedef ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<       \
+      internal::matcher_##DefineMatcher##OverloadId##Matcher,                  \
+      std::shared_ptr<llvm::Regex>, ReturnTypesF> (                            \
+      &DefineMatcher##_Type##OverloadId##Flags)(                               \
+      llvm::StringRef Param, llvm::Regex::RegexFlags RegexFlags);              \
+  typedef ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1<       \
+      internal::matcher_##DefineMatcher##OverloadId##Matcher,                  \
+      std::shared_ptr<llvm::Regex>, ReturnTypesF> (                            \
+      &DefineMatcher##_Type##OverloadId)(llvm::StringRef Param);               \
+  template <typename NodeType, typename ParamT>                                \
+  bool internal::                                                              \
+      matcher_##DefineMatcher##OverloadId##Matcher<NodeType, ParamT>::matches( \
+          const NodeType &Node,                                                \
+          ::clang::ast_matchers::internal::ASTMatchFinder *Finder,             \
+          ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder)     \
+          const
+
 #endif // LLVM_CLANG_ASTMATCHERS_ASTMATCHERSMACROS_H
index 9b69734..4b9baf7 100644 (file)
@@ -29,6 +29,8 @@
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/ErrorHandling.h"
 #include "llvm/Support/ManagedStatic.h"
+#include "llvm/Support/Regex.h"
+#include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
 #include <algorithm>
 #include <cassert>
@@ -682,6 +684,19 @@ getExpansionLocOfMacro(StringRef MacroName, SourceLocation Loc,
   return llvm::None;
 }
 
+std::shared_ptr<llvm::Regex> createAndVerifyRegex(StringRef Regex,
+                                                  llvm::Regex::RegexFlags Flags,
+                                                  StringRef MatcherID) {
+  assert(!Regex.empty() && "Empty regex string");
+  auto SharedRegex = std::make_shared<llvm::Regex>(Regex, Flags);
+  std::string Error;
+  if (!SharedRegex->isValid(Error)) {
+    llvm::WithColor::error()
+        << "building matcher '" << MatcherID << "': " << Error << "\n";
+    llvm::WithColor::note() << " input was '" << Regex << "'\n";
+  }
+  return SharedRegex;
+}
 } // end namespace internal
 
 const internal::VariadicDynCastAllOfMatcher<Stmt, ObjCAutoreleasePoolStmt>
index 78b6ca1..989ee0f 100644 (file)
@@ -10,6 +10,7 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Support/Regex.h"
 #include <string>
 
 static llvm::Optional<std::string>
@@ -110,3 +111,62 @@ clang::ast_matchers::dynamic::internal::ArgTypeTraits<
                           "UETT_");
   return llvm::None;
 }
+
+static constexpr std::pair<llvm::StringRef, llvm::Regex::RegexFlags>
+    RegexMap[] = {
+        {"NoFlags", llvm::Regex::RegexFlags::NoFlags},
+        {"IgnoreCase", llvm::Regex::RegexFlags::IgnoreCase},
+        {"Newline", llvm::Regex::RegexFlags::Newline},
+        {"BasicRegex", llvm::Regex::RegexFlags::BasicRegex},
+};
+
+llvm::Optional<llvm::Regex::RegexFlags> getRegexFlag(llvm::StringRef Flag) {
+  for (const auto &StringFlag : RegexMap) {
+    if (Flag == StringFlag.first)
+      return StringFlag.second;
+  }
+  return llvm::None;
+}
+
+llvm::Optional<llvm::StringRef> getCloseRegexMatch(llvm::StringRef Flag) {
+  for (const auto &StringFlag : RegexMap) {
+    if (Flag.edit_distance(StringFlag.first) < 3)
+      return StringFlag.first;
+  }
+  return llvm::None;
+}
+
+llvm::Optional<llvm::Regex::RegexFlags>
+clang::ast_matchers::dynamic::internal::ArgTypeTraits<
+    llvm::Regex::RegexFlags>::getFlags(llvm::StringRef Flags) {
+  llvm::Optional<llvm::Regex::RegexFlags> Flag;
+  SmallVector<StringRef, 4> Split;
+  Flags.split(Split, '|', -1, false);
+  for (StringRef OrFlag : Split) {
+    if (llvm::Optional<llvm::Regex::RegexFlags> NextFlag =
+            getRegexFlag(OrFlag.trim()))
+      Flag = Flag.getValueOr(llvm::Regex::NoFlags) | *NextFlag;
+    else
+      return None;
+  }
+  return Flag;
+}
+
+llvm::Optional<std::string>
+clang::ast_matchers::dynamic::internal::ArgTypeTraits<
+    llvm::Regex::RegexFlags>::getBestGuess(const VariantValue &Value) {
+  if (!Value.isString())
+    return llvm::None;
+  SmallVector<StringRef, 4> Split;
+  llvm::StringRef(Value.getString()).split(Split, '|', -1, false);
+  for (llvm::StringRef &Flag : Split) {
+    if (llvm::Optional<llvm::StringRef> BestGuess =
+            getCloseRegexMatch(Flag.trim()))
+      Flag = *BestGuess;
+    else
+      return None;
+  }
+  if (Split.empty())
+    return None;
+  return llvm::join(Split, " | ");
+}
index e07db59..33f6d1e 100644 (file)
@@ -35,6 +35,7 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/StringSwitch.h"
 #include "llvm/ADT/Twine.h"
+#include "llvm/Support/Regex.h"
 #include <cassert>
 #include <cstddef>
 #include <iterator>
@@ -192,6 +193,24 @@ public:
   static llvm::Optional<std::string> getBestGuess(const VariantValue &Value);
 };
 
+template <> struct ArgTypeTraits<llvm::Regex::RegexFlags> {
+private:
+  static Optional<llvm::Regex::RegexFlags> getFlags(llvm::StringRef Flags);
+
+public:
+  static bool is(const VariantValue &Value) {
+    return Value.isString() && getFlags(Value.getString());
+  }
+
+  static llvm::Regex::RegexFlags get(const VariantValue &Value) {
+    return *getFlags(Value.getString());
+  }
+
+  static ArgKind getKind() { return ArgKind(ArgKind::AK_String); }
+
+  static llvm::Optional<std::string> getBestGuess(const VariantValue &Value);
+};
+
 template <> struct ArgTypeTraits<OpenMPClauseKind> {
 private:
   static Optional<OpenMPClauseKind> getClauseKind(llvm::StringRef ClauseKind) {
@@ -711,6 +730,71 @@ private:
   std::vector<std::unique_ptr<MatcherDescriptor>> Overloads;
 };
 
+template <typename ReturnType>
+class RegexMatcherDescriptor : public MatcherDescriptor {
+public:
+  RegexMatcherDescriptor(ReturnType (*WithFlags)(StringRef,
+                                                 llvm::Regex::RegexFlags),
+                         ReturnType (*NoFlags)(StringRef),
+                         ArrayRef<ASTNodeKind> RetKinds)
+      : WithFlags(WithFlags), NoFlags(NoFlags),
+        RetKinds(RetKinds.begin(), RetKinds.end()) {}
+  bool isVariadic() const override { return true; }
+  unsigned getNumArgs() const override { return 0; }
+
+  void getArgKinds(ASTNodeKind ThisKind, unsigned ArgNo,
+                   std::vector<ArgKind> &Kinds) const override {
+    assert(ArgNo < 2);
+    Kinds.push_back(ArgKind::AK_String);
+  }
+
+  bool isConvertibleTo(ASTNodeKind Kind, unsigned *Specificity,
+                       ASTNodeKind *LeastDerivedKind) const override {
+    return isRetKindConvertibleTo(RetKinds, Kind, Specificity,
+                                  LeastDerivedKind);
+  }
+
+  VariantMatcher create(SourceRange NameRange, ArrayRef<ParserValue> Args,
+                        Diagnostics *Error) const override {
+    if (Args.size() < 1 || Args.size() > 2) {
+      Error->addError(NameRange, Diagnostics::ET_RegistryWrongArgCount)
+          << "1 or 2" << Args.size();
+      return VariantMatcher();
+    }
+    if (!ArgTypeTraits<StringRef>::is(Args[0].Value)) {
+      Error->addError(Args[0].Range, Error->ET_RegistryWrongArgType)
+          << 1 << ArgTypeTraits<StringRef>::getKind().asString()
+          << Args[0].Value.getTypeAsString();
+      return VariantMatcher();
+    }
+    if (Args.size() == 1) {
+      return outvalueToVariantMatcher(
+          NoFlags(ArgTypeTraits<StringRef>::get(Args[0].Value)));
+    }
+    if (!ArgTypeTraits<llvm::Regex::RegexFlags>::is(Args[1].Value)) {
+      if (llvm::Optional<std::string> BestGuess =
+              ArgTypeTraits<llvm::Regex::RegexFlags>::getBestGuess(
+                  Args[1].Value)) {
+        Error->addError(Args[1].Range, Error->ET_RegistryUnknownEnumWithReplace)
+            << 2 << Args[1].Value.getString() << *BestGuess;
+      } else {
+        Error->addError(Args[1].Range, Error->ET_RegistryWrongArgType)
+            << 2 << ArgTypeTraits<llvm::Regex::RegexFlags>::getKind().asString()
+            << Args[1].Value.getTypeAsString();
+      }
+      return VariantMatcher();
+    }
+    return outvalueToVariantMatcher(
+        WithFlags(ArgTypeTraits<StringRef>::get(Args[0].Value),
+                  ArgTypeTraits<llvm::Regex::RegexFlags>::get(Args[1].Value)));
+  }
+
+private:
+  ReturnType (*const WithFlags)(StringRef, llvm::Regex::RegexFlags);
+  ReturnType (*const NoFlags)(StringRef);
+  const std::vector<ASTNodeKind> RetKinds;
+};
+
 /// Variadic operator marshaller function.
 class VariadicOperatorMatcherDescriptor : public MatcherDescriptor {
 public:
@@ -814,6 +898,16 @@ makeMatcherAutoMarshall(ReturnType (*Func)(ArgType1, ArgType2),
       reinterpret_cast<void (*)()>(Func), MatcherName, RetTypes, AKs);
 }
 
+template <typename ReturnType>
+std::unique_ptr<MatcherDescriptor> makeMatcherRegexMarshall(
+    ReturnType (*FuncFlags)(llvm::StringRef, llvm::Regex::RegexFlags),
+    ReturnType (*Func)(llvm::StringRef)) {
+  std::vector<ASTNodeKind> RetTypes;
+  BuildReturnTypeVector<ReturnType>::build(RetTypes);
+  return std::make_unique<RegexMatcherDescriptor<ReturnType>>(FuncFlags, Func,
+                                                              RetTypes);
+}
+
 /// Variadic overload.
 template <typename ResultT, typename ArgT,
           ResultT (*Func)(ArrayRef<const ArgT *>)>
index c7f151f..f01c68a 100644 (file)
@@ -90,6 +90,9 @@ void RegistryMaps::registerMatcher(
     REGISTER_MATCHER_OVERLOAD(name);                                           \
   } while (false)
 
+#define REGISTER_REGEX_MATCHER(name)                                           \
+  registerMatcher(#name, internal::makeMatcherRegexMarshall(name, name))
+
 /// Generate a registry map with all the known matchers.
 /// Please keep sorted alphabetically!
 RegistryMaps::RegistryMaps() {
@@ -121,6 +124,10 @@ RegistryMaps::RegistryMaps() {
   };
   REGISTER_MATCHER_OVERLOAD(equals);
 
+  REGISTER_REGEX_MATCHER(isExpansionInFileMatching);
+  REGISTER_REGEX_MATCHER(matchesName);
+  REGISTER_REGEX_MATCHER(matchesSelector);
+
   REGISTER_MATCHER(accessSpecDecl);
   REGISTER_MATCHER(addrLabelExpr);
   REGISTER_MATCHER(alignOfExpr);
@@ -374,7 +381,6 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(isEnum);
   REGISTER_MATCHER(isExceptionVariable);
   REGISTER_MATCHER(isExpandedFromMacro);
-  REGISTER_MATCHER(isExpansionInFileMatching);
   REGISTER_MATCHER(isExpansionInMainFile);
   REGISTER_MATCHER(isExpansionInSystemHeader);
   REGISTER_MATCHER(isExplicit);
@@ -429,8 +435,6 @@ RegistryMaps::RegistryMaps() {
   REGISTER_MATCHER(labelStmt);
   REGISTER_MATCHER(lambdaExpr);
   REGISTER_MATCHER(linkageSpecDecl);
-  REGISTER_MATCHER(matchesName);
-  REGISTER_MATCHER(matchesSelector);
   REGISTER_MATCHER(materializeTemporaryExpr);
   REGISTER_MATCHER(member);
   REGISTER_MATCHER(memberExpr);
index b9f95d8..3e1e2be 100644 (file)
@@ -87,6 +87,12 @@ TEST_P(ASTMatchersTest, MatchesNameRE_CXX) {
   EXPECT_TRUE(matches("namespace x { int kTest; }", StartsWithK));
   EXPECT_TRUE(matches("class C { int k; };", StartsWithK));
   EXPECT_TRUE(notMatches("class C { int ckc; };", StartsWithK));
+  EXPECT_TRUE(notMatches("int K;", StartsWithK));
+
+  DeclarationMatcher StartsWithKIgnoreCase =
+      namedDecl(matchesName(":k[^:]*$", llvm::Regex::IgnoreCase));
+  EXPECT_TRUE(matches("int k;", StartsWithKIgnoreCase));
+  EXPECT_TRUE(matches("int K;", StartsWithKIgnoreCase));
 }
 
 TEST_P(ASTMatchersTest, DeclarationMatcher_MatchClass) {
index f5e324a..3af5574 100644 (file)
@@ -259,6 +259,15 @@ TEST(ParserTest, FullParserTest) {
   EXPECT_TRUE(matches("unsigned X = sizeof(int);", MStmt));
   EXPECT_FALSE(matches("unsigned X = alignof(int);", MStmt));
 
+  Code =
+      R"query(namedDecl(matchesName("^::[ABC]*$", "IgnoreCase | BasicRegex")))query";
+  llvm::Optional<DynTypedMatcher> MatchesName(
+      Parser::parseMatcherExpression(Code, nullptr, nullptr, &Error));
+  EXPECT_EQ("", Error.toStringFull());
+  M = MatchesName->unconditionalConvertTo<Decl>();
+  EXPECT_TRUE(matches("unsigned AAACCBB;", M));
+  EXPECT_TRUE(matches("unsigned aaaccbb;", M));
+
   Code = "hasInitializer(\n    binaryOperator(hasLHS(\"A\")))";
   EXPECT_TRUE(!Parser::parseMatcherExpression(Code, &Error).hasValue());
   EXPECT_EQ("1:1: Error parsing argument 1 for matcher hasInitializer.\n"
@@ -348,6 +357,26 @@ TEST(ParserTest, Errors) {
             "1:14: Incorrect type for arg 1. (Expected = string) != (Actual = "
             "String)",
             ParseMatcherWithError(R"query(decl(hasAttr("unrelated")))query"));
+  EXPECT_EQ(
+      "1:1: Error parsing argument 1 for matcher namedDecl.\n"
+      "1:11: Error building matcher matchesName.\n"
+      "1:33: Unknown value 'Ignorecase' for arg 2; did you mean 'IgnoreCase'",
+      ParseMatcherWithError(
+          R"query(namedDecl(matchesName("[ABC]*", "Ignorecase")))query"));
+  EXPECT_EQ(
+      "1:1: Error parsing argument 1 for matcher namedDecl.\n"
+      "1:11: Error building matcher matchesName.\n"
+      "1:33: Incorrect type for arg 2. (Expected = string) != (Actual = "
+      "String)",
+      ParseMatcherWithError(
+          R"query(namedDecl(matchesName("[ABC]*", "IgnoreCase & BasicRegex")))query"));
+  EXPECT_EQ(
+      "1:1: Error parsing argument 1 for matcher namedDecl.\n"
+      "1:11: Error building matcher matchesName.\n"
+      "1:33: Unknown value 'IgnoreCase | Basicregex' for arg 2; did you mean "
+      "'IgnoreCase | BasicRegex'",
+      ParseMatcherWithError(
+          R"query(namedDecl(matchesName("[ABC]*", "IgnoreCase | Basicregex")))query"));
 }
 
 TEST(ParserTest, OverloadErrors) {
index b2620ab..ae4b951 100644 (file)
@@ -16,6 +16,7 @@
 #ifndef LLVM_SUPPORT_REGEX_H
 #define LLVM_SUPPORT_REGEX_H
 
+#include "llvm/ADT/BitmaskEnum.h"
 #include <string>
 
 struct llvm_regex;
@@ -26,20 +27,22 @@ namespace llvm {
 
   class Regex {
   public:
-    enum {
-      NoFlags=0,
+    enum RegexFlags : unsigned {
+      NoFlags = 0,
       /// Compile for matching that ignores upper/lower case distinctions.
-      IgnoreCase=1,
+      IgnoreCase = 1,
       /// Compile for newline-sensitive matching. With this flag '[^' bracket
       /// expressions and '.' never match newline. A ^ anchor matches the
       /// null string after any newline in the string in addition to its normal
       /// function, and the $ anchor matches the null string before any
       /// newline in the string in addition to its normal function.
-      Newline=2,
+      Newline = 2,
       /// By default, the POSIX extended regular expression (ERE) syntax is
       /// assumed. Pass this flag to turn on basic regular expressions (BRE)
       /// instead.
-      BasicRegex=4
+      BasicRegex = 4,
+
+      LLVM_MARK_AS_BITMASK_ENUM(BasicRegex)
     };
 
     Regex();
@@ -47,7 +50,8 @@ namespace llvm {
     ///
     /// \param Regex - referenced string is no longer needed after this
     /// constructor does finish.  Only its compiled form is kept stored.
-    Regex(StringRef Regex, unsigned Flags = NoFlags);
+    Regex(StringRef Regex, RegexFlags Flags = NoFlags);
+    Regex(StringRef Regex, unsigned Flags);
     Regex(const Regex &) = delete;
     Regex &operator=(Regex regex) {
       std::swap(preg, regex.preg);
index f065ada..0d5cc1c 100644 (file)
@@ -26,7 +26,7 @@ using namespace llvm;
 
 Regex::Regex() : preg(nullptr), error(REG_BADPAT) {}
 
-Regex::Regex(StringRef regex, unsigned Flags) {
+Regex::Regex(StringRef regex, RegexFlags Flags) {
   unsigned flags = 0;
   preg = new llvm_regex();
   preg->re_endp = regex.end();
@@ -39,6 +39,9 @@ Regex::Regex(StringRef regex, unsigned Flags) {
   error = llvm_regcomp(preg, regex.data(), flags|REG_PEND);
 }
 
+Regex::Regex(StringRef regex, unsigned Flags)
+    : Regex(regex, static_cast<RegexFlags>(Flags)) {}
+
 Regex::Regex(Regex &&regex) {
   preg = regex.preg;
   error = regex.error;