From f51a319cacd44819b4fb9fa9f005c2445bcee984 Mon Sep 17 00:00:00 2001 From: Nathan James Date: Thu, 2 Jul 2020 14:52:24 +0100 Subject: [PATCH] [ASTMatchers] Enhanced support for matchers taking Regex arguments Added new Macros `AST(_POLYMORPHIC)_MATCHER_REGEX(_OVERLOAD)` that define a matchers that take a regular expression string and optionally regular expression flags. This lets users match against nodes while ignoring the case without having to manually use `[Aa]` or `[A-Fa-f]` in their regex. The other point this addresses is in the current state, matchers that use regular expressions have to compile them for each node they try to match on, Now the regular expression is compiled once when you define the matcher and used for every node that it tries to match against. If there is an error while compiling the regular expression an error will be logged to stderr showing the bad regex string and the reason it couldn't be compiled. The old behaviour of this was down to the Matcher implementation and some would assert, whereas others just would never match. Support for this has been added to the documentation script as well. Support for this has been added to dynamic matchers ensuring functionality is the same between the 2 use cases. Reviewed By: aaron.ballman Differential Revision: https://reviews.llvm.org/D82706 --- clang/docs/LibASTMatchersReference.html | 30 +++++- clang/docs/tools/dump_ast_matchers.py | 47 ++++++++ clang/include/clang/ASTMatchers/ASTMatchers.h | 22 ++-- .../clang/ASTMatchers/ASTMatchersInternal.h | 7 +- .../include/clang/ASTMatchers/ASTMatchersMacros.h | 118 +++++++++++++++++++++ clang/lib/ASTMatchers/ASTMatchersInternal.cpp | 15 +++ clang/lib/ASTMatchers/Dynamic/Marshallers.cpp | 60 +++++++++++ clang/lib/ASTMatchers/Dynamic/Marshallers.h | 94 ++++++++++++++++ clang/lib/ASTMatchers/Dynamic/Registry.cpp | 10 +- .../unittests/ASTMatchers/ASTMatchersNodeTest.cpp | 6 ++ clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp | 29 +++++ llvm/include/llvm/Support/Regex.h | 16 +-- llvm/lib/Support/Regex.cpp | 5 +- 13 files changed, 430 insertions(+), 29 deletions(-) diff --git a/clang/docs/LibASTMatchersReference.html b/clang/docs/LibASTMatchersReference.html index ae90ba0..9c04322 100644 --- a/clang/docs/LibASTMatchersReference.html +++ b/clang/docs/LibASTMatchersReference.html @@ -3007,7 +3007,7 @@ passed as a quoted string. e.g., hasAttr("attr::CUDADevice"). -Matcher<Decl>isExpansionInFileMatchingstd::string RegExp +Matcher<Decl>isExpansionInFileMatchingStringRef RegExp, Regex::RegexFlags Flags = NoFlags
Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
@@ -3019,6 +3019,10 @@ ASTMatcher.h:
   class Y {};
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 
@@ -3725,7 +3729,7 @@ Example matches X (Name is one of "::a::b::X", "a::b::X", "b::X", "X") -Matcher<NamedDecl>matchesNamestd::string RegExp +Matcher<NamedDecl>matchesNameStringRef RegExp, Regex::RegexFlags Flags = NoFlags
Matches NamedDecl nodes whose fully qualified names contain
 a substring matched by the given RegExp.
 
@@ -3738,6 +3742,10 @@ Example matches X (regexp == "::X")
 
 Example matches X (regexp is one of "::X", "^foo::.*X", among others)
   namespace foo { namespace bar { class X; } }
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 
@@ -3932,12 +3940,16 @@ but not -Matcher<ObjCMessageExpr>matchesSelectorstd::string RegExp +Matcher<ObjCMessageExpr>matchesSelectorStringRef RegExp, Regex::RegexFlags Flags = NoFlags
Matches ObjC selectors whose name contains
 a substring matched by the given RegExp.
  matcher = objCMessageExpr(matchesSelector("loadHTMLStringmatches the outer message expr in the code below, but NOT the message
  invocation for self.bodyView.
     [self.bodyView loadHTMLString:html baseURL:NULL];
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 
@@ -4228,7 +4240,7 @@ node. There's nothing `Stmt`-specific about it. -Matcher<Stmt>isExpansionInFileMatchingstd::string RegExp +Matcher<Stmt>isExpansionInFileMatchingStringRef RegExp, Regex::RegexFlags Flags = NoFlags
Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
@@ -4240,6 +4252,10 @@ ASTMatcher.h:
   class Y {};
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 
@@ -4410,7 +4426,7 @@ classTemplateSpecializationDecl(templateArgumentCountIs(1)) -Matcher<TypeLoc>isExpansionInFileMatchingstd::string RegExp +Matcher<TypeLoc>isExpansionInFileMatchingStringRef RegExp, Regex::RegexFlags Flags = NoFlags
Matches AST nodes that were expanded within files whose name is
 partially matching a given regex.
 
@@ -4422,6 +4438,10 @@ ASTMatcher.h:
   class Y {};
 
 Usable as: Matcher<Decl>, Matcher<Stmt>, Matcher<TypeLoc>
+
+If the matcher is used in clang-query, RegexFlags parameter
+should be passed as a quoted string. e.g: "NoFlags".
+Flags can be combined with '|' example "IgnoreCase | BasicRegex"
 
diff --git a/clang/docs/tools/dump_ast_matchers.py b/clang/docs/tools/dump_ast_matchers.py index baba4cf..045833b 100755 --- a/clang/docs/tools/dump_ast_matchers.py +++ b/clang/docs/tools/dump_ast_matchers.py @@ -230,6 +230,28 @@ def act_on_decl(declaration, comment, allowed_types): add_matcher(result_type, name, args, comment) return + m = re.match(r"""^\s*AST_POLYMORPHIC_MATCHER_REGEX(?:_OVERLOAD)?\( + \s*([^\s,]+)\s*, + \s*AST_POLYMORPHIC_SUPPORTED_TYPES\(([^)]*)\), + \s*([^\s,]+)\s* + (?:,\s*\d+\s*)? + \)\s*{\s*$""", declaration, flags=re.X) + + if m: + name, results, arg_name = m.groups()[0:3] + result_types = [r.strip() for r in results.split(',')] + if allowed_types and allowed_types != result_types: + raise Exception('Inconsistent documentation for: %s' % name) + arg = "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name + comment += """ +If the matcher is used in clang-query, RegexFlags parameter +should be passed as a quoted string. e.g: "NoFlags". +Flags can be combined with '|' example \"IgnoreCase | BasicRegex\" +""" + for result_type in result_types: + add_matcher(result_type, name, arg, comment) + return + m = re.match(r"""^\s*AST_MATCHER_FUNCTION(_P)?(.?)(?:_OVERLOAD)?\( (?:\s*([^\s,]+)\s*,)? \s*([^\s,]+)\s* @@ -275,6 +297,31 @@ def act_on_decl(declaration, comment, allowed_types): add_matcher(result_type, name, args, comment) return + m = re.match(r"""^\s*AST_MATCHER_REGEX(?:_OVERLOAD)?\( + \s*([^\s,]+)\s*, + \s*([^\s,]+)\s*, + \s*([^\s,]+)\s* + (?:,\s*\d+\s*)? + \)\s*{""", declaration, flags=re.X) + if m: + result, name, arg_name = m.groups()[0:3] + if not result: + if not allowed_types: + raise Exception('Did not find allowed result types for: %s' % name) + result_types = allowed_types + else: + result_types = [result] + arg = "StringRef %s, Regex::RegexFlags Flags = NoFlags" % arg_name + comment += """ +If the matcher is used in clang-query, RegexFlags parameter +should be passed as a quoted string. e.g: "NoFlags". +Flags can be combined with '|' example \"IgnoreCase | BasicRegex\" +""" + + for result_type in result_types: + add_matcher(result_type, name, arg, comment) + return + # Parse ArgumentAdapting matchers. m = re.match( r"""^.*ArgumentAdaptingMatcherFunc<.*>\s* diff --git a/clang/include/clang/ASTMatchers/ASTMatchers.h b/clang/include/clang/ASTMatchers/ASTMatchers.h index 4d5e423..153b517 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchers.h +++ b/clang/include/clang/ASTMatchers/ASTMatchers.h @@ -283,9 +283,10 @@ AST_POLYMORPHIC_MATCHER(isExpansionInSystemHeader, /// \endcode /// /// Usable as: Matcher, Matcher, Matcher -AST_POLYMORPHIC_MATCHER_P(isExpansionInFileMatching, - AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt, TypeLoc), - std::string, RegExp) { +AST_POLYMORPHIC_MATCHER_REGEX(isExpansionInFileMatching, + AST_POLYMORPHIC_SUPPORTED_TYPES(Decl, Stmt, + TypeLoc), + RegExp) { auto &SourceManager = Finder->getASTContext().getSourceManager(); auto ExpansionLoc = SourceManager.getExpansionLoc(Node.getBeginLoc()); if (ExpansionLoc.isInvalid()) { @@ -298,8 +299,7 @@ AST_POLYMORPHIC_MATCHER_P(isExpansionInFileMatching, } auto Filename = FileEntry->getName(); - llvm::Regex RE(RegExp); - return RE.match(Filename); + return RegExp->match(Filename); } /// Matches statements that are (transitively) expanded from the named macro. @@ -2748,11 +2748,9 @@ extern const internal::VariadicFunction, StringRef, /// \code /// namespace foo { namespace bar { class X; } } /// \endcode -AST_MATCHER_P(NamedDecl, matchesName, std::string, RegExp) { - assert(!RegExp.empty()); +AST_MATCHER_REGEX(NamedDecl, matchesName, RegExp) { std::string FullNameString = "::" + Node.getQualifiedNameAsString(); - llvm::Regex RE(RegExp); - return RE.match(FullNameString); + return RegExp->match(FullNameString); } /// Matches overloaded operator names. @@ -3373,11 +3371,9 @@ extern const internal::VariadicFunction, /// \code /// [self.bodyView loadHTMLString:html baseURL:NULL]; /// \endcode -AST_MATCHER_P(ObjCMessageExpr, matchesSelector, std::string, RegExp) { - assert(!RegExp.empty()); +AST_MATCHER_REGEX(ObjCMessageExpr, matchesSelector, RegExp) { std::string SelectorString = Node.getSelector().getAsString(); - llvm::Regex RE(RegExp); - return RE.match(SelectorString); + return RegExp->match(SelectorString); } /// Matches when the selector is the empty selector diff --git a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h index fc41407..3992850 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchersInternal.h +++ b/clang/include/clang/ASTMatchers/ASTMatchersInternal.h @@ -40,7 +40,6 @@ #include "clang/AST/DeclFriend.h" #include "clang/AST/DeclTemplate.h" #include "clang/AST/Expr.h" -#include "clang/AST/ExprObjC.h" #include "clang/AST/ExprCXX.h" #include "clang/AST/ExprObjC.h" #include "clang/AST/NestedNameSpecifier.h" @@ -61,11 +60,13 @@ #include "llvm/ADT/iterator.h" #include "llvm/Support/Casting.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Regex.h" #include #include #include #include #include +#include #include #include #include @@ -1948,6 +1949,10 @@ bool matchesAnyBase(const CXXRecordDecl &Node, const Matcher &BaseSpecMatcher, ASTMatchFinder *Finder, BoundNodesTreeBuilder *Builder); +std::shared_ptr createAndVerifyRegex(StringRef Regex, + llvm::Regex::RegexFlags Flags, + StringRef MatcherID); + } // namespace internal } // namespace ast_matchers diff --git a/clang/include/clang/ASTMatchers/ASTMatchersMacros.h b/clang/include/clang/ASTMatchers/ASTMatchersMacros.h index 4977bf3..45e8b1a 100644 --- a/clang/include/clang/ASTMatchers/ASTMatchersMacros.h +++ b/clang/include/clang/ASTMatchers/ASTMatchersMacros.h @@ -438,4 +438,122 @@ ReturnTypesF>::Func MatcherName##Loc; \ AST_TYPE_TRAVERSE_MATCHER(MatcherName, FunctionName##Type, ReturnTypesF) +/// AST_MATCHER_REGEX(Type, DefineMatcher, Param) { ... } +/// defines a function named DefineMatcher() that takes a regular expression +/// string paramater and an optional RegexFlags parameter and returns a +/// Matcher object. +/// +/// The code between the curly braces has access to the following variables: +/// +/// Node: the AST node being matched; its type is Type. +/// Param: a pointer to an \ref llvm::Regex object +/// Finder: an ASTMatchFinder*. +/// Builder: a BoundNodesTreeBuilder*. +/// +/// The code should return true if 'Node' matches. +#define AST_MATCHER_REGEX(Type, DefineMatcher, Param) \ + AST_MATCHER_REGEX_OVERLOAD(Type, DefineMatcher, Param, 0) + +#define AST_MATCHER_REGEX_OVERLOAD(Type, DefineMatcher, Param, OverloadId) \ + namespace internal { \ + class matcher_##DefineMatcher##OverloadId##Matcher \ + : public ::clang::ast_matchers::internal::MatcherInterface { \ + public: \ + explicit matcher_##DefineMatcher##OverloadId##Matcher( \ + std::shared_ptr RE) \ + : Param(std::move(RE)) {} \ + bool matches(const Type &Node, \ + ::clang::ast_matchers::internal::ASTMatchFinder *Finder, \ + ::clang::ast_matchers::internal::BoundNodesTreeBuilder \ + *Builder) const override; \ + \ + private: \ + std::shared_ptr const Param; \ + }; \ + } \ + inline ::clang::ast_matchers::internal::Matcher DefineMatcher( \ + llvm::StringRef Param, llvm::Regex::RegexFlags RegexFlags) { \ + return ::clang::ast_matchers::internal::makeMatcher( \ + new internal::matcher_##DefineMatcher##OverloadId##Matcher( \ + ::clang::ast_matchers::internal::createAndVerifyRegex( \ + Param, RegexFlags, #DefineMatcher))); \ + } \ + inline ::clang::ast_matchers::internal::Matcher DefineMatcher( \ + llvm::StringRef Param) { \ + return DefineMatcher(Param, llvm::Regex::NoFlags); \ + } \ + \ + typedef ::clang::ast_matchers::internal::Matcher ( \ + &DefineMatcher##_Type##OverloadId##Flags)(llvm::StringRef, \ + llvm::Regex::RegexFlags); \ + typedef ::clang::ast_matchers::internal::Matcher ( \ + &DefineMatcher##_Type##OverloadId)(llvm::StringRef); \ + inline bool internal::matcher_##DefineMatcher##OverloadId##Matcher::matches( \ + const Type &Node, \ + ::clang::ast_matchers::internal::ASTMatchFinder *Finder, \ + ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) const + +/// AST_POLYMORPHIC_MATCHER_REGEX(DefineMatcher, ReturnTypesF, Param) { ... } +/// defines a function named DefineMatcher() that takes a regular expression +/// string paramater and an optional RegexFlags parameter that is polymorphic in +/// the return type. +/// +/// The variables are the same as for +/// AST_MATCHER_REGEX, with the addition of NodeType, which specifies the node +/// type of the matcher Matcher returned by the function matcher(). +#define AST_POLYMORPHIC_MATCHER_REGEX(DefineMatcher, ReturnTypesF, Param) \ + AST_POLYMORPHIC_MATCHER_REGEX_OVERLOAD(DefineMatcher, ReturnTypesF, Param, 0) + +#define AST_POLYMORPHIC_MATCHER_REGEX_OVERLOAD(DefineMatcher, ReturnTypesF, \ + Param, OverloadId) \ + namespace internal { \ + template \ + class matcher_##DefineMatcher##OverloadId##Matcher \ + : public ::clang::ast_matchers::internal::MatcherInterface { \ + public: \ + explicit matcher_##DefineMatcher##OverloadId##Matcher( \ + std::shared_ptr RE) \ + : Param(std::move(RE)) {} \ + bool matches(const NodeType &Node, \ + ::clang::ast_matchers::internal::ASTMatchFinder *Finder, \ + ::clang::ast_matchers::internal::BoundNodesTreeBuilder \ + *Builder) const override; \ + \ + private: \ + std::shared_ptr const Param; \ + }; \ + } \ + inline ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1< \ + internal::matcher_##DefineMatcher##OverloadId##Matcher, \ + std::shared_ptr, ReturnTypesF> \ + DefineMatcher(llvm::StringRef Param, llvm::Regex::RegexFlags RegexFlags) { \ + return ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1< \ + internal::matcher_##DefineMatcher##OverloadId##Matcher, \ + std::shared_ptr, ReturnTypesF>( \ + ::clang::ast_matchers::internal::createAndVerifyRegex( \ + Param, RegexFlags, #DefineMatcher)); \ + } \ + inline ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1< \ + internal::matcher_##DefineMatcher##OverloadId##Matcher, \ + std::shared_ptr, ReturnTypesF> \ + DefineMatcher(llvm::StringRef Param) { \ + return DefineMatcher(Param, llvm::Regex::NoFlags); \ + } \ + typedef ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1< \ + internal::matcher_##DefineMatcher##OverloadId##Matcher, \ + std::shared_ptr, ReturnTypesF> ( \ + &DefineMatcher##_Type##OverloadId##Flags)( \ + llvm::StringRef Param, llvm::Regex::RegexFlags RegexFlags); \ + typedef ::clang::ast_matchers::internal::PolymorphicMatcherWithParam1< \ + internal::matcher_##DefineMatcher##OverloadId##Matcher, \ + std::shared_ptr, ReturnTypesF> ( \ + &DefineMatcher##_Type##OverloadId)(llvm::StringRef Param); \ + template \ + bool internal:: \ + matcher_##DefineMatcher##OverloadId##Matcher::matches( \ + const NodeType &Node, \ + ::clang::ast_matchers::internal::ASTMatchFinder *Finder, \ + ::clang::ast_matchers::internal::BoundNodesTreeBuilder *Builder) \ + const + #endif // LLVM_CLANG_ASTMATCHERS_ASTMATCHERSMACROS_H diff --git a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp index 9b69734..4b9baf7 100644 --- a/clang/lib/ASTMatchers/ASTMatchersInternal.cpp +++ b/clang/lib/ASTMatchers/ASTMatchersInternal.cpp @@ -29,6 +29,8 @@ #include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Regex.h" +#include "llvm/Support/WithColor.h" #include "llvm/Support/raw_ostream.h" #include #include @@ -682,6 +684,19 @@ getExpansionLocOfMacro(StringRef MacroName, SourceLocation Loc, return llvm::None; } +std::shared_ptr createAndVerifyRegex(StringRef Regex, + llvm::Regex::RegexFlags Flags, + StringRef MatcherID) { + assert(!Regex.empty() && "Empty regex string"); + auto SharedRegex = std::make_shared(Regex, Flags); + std::string Error; + if (!SharedRegex->isValid(Error)) { + llvm::WithColor::error() + << "building matcher '" << MatcherID << "': " << Error << "\n"; + llvm::WithColor::note() << " input was '" << Regex << "'\n"; + } + return SharedRegex; +} } // end namespace internal const internal::VariadicDynCastAllOfMatcher diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp index 78b6ca1..989ee0f 100644 --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp @@ -10,6 +10,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Regex.h" #include static llvm::Optional @@ -110,3 +111,62 @@ clang::ast_matchers::dynamic::internal::ArgTypeTraits< "UETT_"); return llvm::None; } + +static constexpr std::pair + RegexMap[] = { + {"NoFlags", llvm::Regex::RegexFlags::NoFlags}, + {"IgnoreCase", llvm::Regex::RegexFlags::IgnoreCase}, + {"Newline", llvm::Regex::RegexFlags::Newline}, + {"BasicRegex", llvm::Regex::RegexFlags::BasicRegex}, +}; + +llvm::Optional getRegexFlag(llvm::StringRef Flag) { + for (const auto &StringFlag : RegexMap) { + if (Flag == StringFlag.first) + return StringFlag.second; + } + return llvm::None; +} + +llvm::Optional getCloseRegexMatch(llvm::StringRef Flag) { + for (const auto &StringFlag : RegexMap) { + if (Flag.edit_distance(StringFlag.first) < 3) + return StringFlag.first; + } + return llvm::None; +} + +llvm::Optional +clang::ast_matchers::dynamic::internal::ArgTypeTraits< + llvm::Regex::RegexFlags>::getFlags(llvm::StringRef Flags) { + llvm::Optional Flag; + SmallVector Split; + Flags.split(Split, '|', -1, false); + for (StringRef OrFlag : Split) { + if (llvm::Optional NextFlag = + getRegexFlag(OrFlag.trim())) + Flag = Flag.getValueOr(llvm::Regex::NoFlags) | *NextFlag; + else + return None; + } + return Flag; +} + +llvm::Optional +clang::ast_matchers::dynamic::internal::ArgTypeTraits< + llvm::Regex::RegexFlags>::getBestGuess(const VariantValue &Value) { + if (!Value.isString()) + return llvm::None; + SmallVector Split; + llvm::StringRef(Value.getString()).split(Split, '|', -1, false); + for (llvm::StringRef &Flag : Split) { + if (llvm::Optional BestGuess = + getCloseRegexMatch(Flag.trim())) + Flag = *BestGuess; + else + return None; + } + if (Split.empty()) + return None; + return llvm::join(Split, " | "); +} diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.h b/clang/lib/ASTMatchers/Dynamic/Marshallers.h index e07db59..33f6d1e 100644 --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.h +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.h @@ -35,6 +35,7 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Twine.h" +#include "llvm/Support/Regex.h" #include #include #include @@ -192,6 +193,24 @@ public: static llvm::Optional getBestGuess(const VariantValue &Value); }; +template <> struct ArgTypeTraits { +private: + static Optional getFlags(llvm::StringRef Flags); + +public: + static bool is(const VariantValue &Value) { + return Value.isString() && getFlags(Value.getString()); + } + + static llvm::Regex::RegexFlags get(const VariantValue &Value) { + return *getFlags(Value.getString()); + } + + static ArgKind getKind() { return ArgKind(ArgKind::AK_String); } + + static llvm::Optional getBestGuess(const VariantValue &Value); +}; + template <> struct ArgTypeTraits { private: static Optional getClauseKind(llvm::StringRef ClauseKind) { @@ -711,6 +730,71 @@ private: std::vector> Overloads; }; +template +class RegexMatcherDescriptor : public MatcherDescriptor { +public: + RegexMatcherDescriptor(ReturnType (*WithFlags)(StringRef, + llvm::Regex::RegexFlags), + ReturnType (*NoFlags)(StringRef), + ArrayRef RetKinds) + : WithFlags(WithFlags), NoFlags(NoFlags), + RetKinds(RetKinds.begin(), RetKinds.end()) {} + bool isVariadic() const override { return true; } + unsigned getNumArgs() const override { return 0; } + + void getArgKinds(ASTNodeKind ThisKind, unsigned ArgNo, + std::vector &Kinds) const override { + assert(ArgNo < 2); + Kinds.push_back(ArgKind::AK_String); + } + + bool isConvertibleTo(ASTNodeKind Kind, unsigned *Specificity, + ASTNodeKind *LeastDerivedKind) const override { + return isRetKindConvertibleTo(RetKinds, Kind, Specificity, + LeastDerivedKind); + } + + VariantMatcher create(SourceRange NameRange, ArrayRef Args, + Diagnostics *Error) const override { + if (Args.size() < 1 || Args.size() > 2) { + Error->addError(NameRange, Diagnostics::ET_RegistryWrongArgCount) + << "1 or 2" << Args.size(); + return VariantMatcher(); + } + if (!ArgTypeTraits::is(Args[0].Value)) { + Error->addError(Args[0].Range, Error->ET_RegistryWrongArgType) + << 1 << ArgTypeTraits::getKind().asString() + << Args[0].Value.getTypeAsString(); + return VariantMatcher(); + } + if (Args.size() == 1) { + return outvalueToVariantMatcher( + NoFlags(ArgTypeTraits::get(Args[0].Value))); + } + if (!ArgTypeTraits::is(Args[1].Value)) { + if (llvm::Optional BestGuess = + ArgTypeTraits::getBestGuess( + Args[1].Value)) { + Error->addError(Args[1].Range, Error->ET_RegistryUnknownEnumWithReplace) + << 2 << Args[1].Value.getString() << *BestGuess; + } else { + Error->addError(Args[1].Range, Error->ET_RegistryWrongArgType) + << 2 << ArgTypeTraits::getKind().asString() + << Args[1].Value.getTypeAsString(); + } + return VariantMatcher(); + } + return outvalueToVariantMatcher( + WithFlags(ArgTypeTraits::get(Args[0].Value), + ArgTypeTraits::get(Args[1].Value))); + } + +private: + ReturnType (*const WithFlags)(StringRef, llvm::Regex::RegexFlags); + ReturnType (*const NoFlags)(StringRef); + const std::vector RetKinds; +}; + /// Variadic operator marshaller function. class VariadicOperatorMatcherDescriptor : public MatcherDescriptor { public: @@ -814,6 +898,16 @@ makeMatcherAutoMarshall(ReturnType (*Func)(ArgType1, ArgType2), reinterpret_cast(Func), MatcherName, RetTypes, AKs); } +template +std::unique_ptr makeMatcherRegexMarshall( + ReturnType (*FuncFlags)(llvm::StringRef, llvm::Regex::RegexFlags), + ReturnType (*Func)(llvm::StringRef)) { + std::vector RetTypes; + BuildReturnTypeVector::build(RetTypes); + return std::make_unique>(FuncFlags, Func, + RetTypes); +} + /// Variadic overload. template )> diff --git a/clang/lib/ASTMatchers/Dynamic/Registry.cpp b/clang/lib/ASTMatchers/Dynamic/Registry.cpp index c7f151f..f01c68a 100644 --- a/clang/lib/ASTMatchers/Dynamic/Registry.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Registry.cpp @@ -90,6 +90,9 @@ void RegistryMaps::registerMatcher( REGISTER_MATCHER_OVERLOAD(name); \ } while (false) +#define REGISTER_REGEX_MATCHER(name) \ + registerMatcher(#name, internal::makeMatcherRegexMarshall(name, name)) + /// Generate a registry map with all the known matchers. /// Please keep sorted alphabetically! RegistryMaps::RegistryMaps() { @@ -121,6 +124,10 @@ RegistryMaps::RegistryMaps() { }; REGISTER_MATCHER_OVERLOAD(equals); + REGISTER_REGEX_MATCHER(isExpansionInFileMatching); + REGISTER_REGEX_MATCHER(matchesName); + REGISTER_REGEX_MATCHER(matchesSelector); + REGISTER_MATCHER(accessSpecDecl); REGISTER_MATCHER(addrLabelExpr); REGISTER_MATCHER(alignOfExpr); @@ -374,7 +381,6 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(isEnum); REGISTER_MATCHER(isExceptionVariable); REGISTER_MATCHER(isExpandedFromMacro); - REGISTER_MATCHER(isExpansionInFileMatching); REGISTER_MATCHER(isExpansionInMainFile); REGISTER_MATCHER(isExpansionInSystemHeader); REGISTER_MATCHER(isExplicit); @@ -429,8 +435,6 @@ RegistryMaps::RegistryMaps() { REGISTER_MATCHER(labelStmt); REGISTER_MATCHER(lambdaExpr); REGISTER_MATCHER(linkageSpecDecl); - REGISTER_MATCHER(matchesName); - REGISTER_MATCHER(matchesSelector); REGISTER_MATCHER(materializeTemporaryExpr); REGISTER_MATCHER(member); REGISTER_MATCHER(memberExpr); diff --git a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp index b9f95d8..3e1e2be 100644 --- a/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp +++ b/clang/unittests/ASTMatchers/ASTMatchersNodeTest.cpp @@ -87,6 +87,12 @@ TEST_P(ASTMatchersTest, MatchesNameRE_CXX) { EXPECT_TRUE(matches("namespace x { int kTest; }", StartsWithK)); EXPECT_TRUE(matches("class C { int k; };", StartsWithK)); EXPECT_TRUE(notMatches("class C { int ckc; };", StartsWithK)); + EXPECT_TRUE(notMatches("int K;", StartsWithK)); + + DeclarationMatcher StartsWithKIgnoreCase = + namedDecl(matchesName(":k[^:]*$", llvm::Regex::IgnoreCase)); + EXPECT_TRUE(matches("int k;", StartsWithKIgnoreCase)); + EXPECT_TRUE(matches("int K;", StartsWithKIgnoreCase)); } TEST_P(ASTMatchersTest, DeclarationMatcher_MatchClass) { diff --git a/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp b/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp index f5e324a..3af5574 100644 --- a/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp +++ b/clang/unittests/ASTMatchers/Dynamic/ParserTest.cpp @@ -259,6 +259,15 @@ TEST(ParserTest, FullParserTest) { EXPECT_TRUE(matches("unsigned X = sizeof(int);", MStmt)); EXPECT_FALSE(matches("unsigned X = alignof(int);", MStmt)); + Code = + R"query(namedDecl(matchesName("^::[ABC]*$", "IgnoreCase | BasicRegex")))query"; + llvm::Optional MatchesName( + Parser::parseMatcherExpression(Code, nullptr, nullptr, &Error)); + EXPECT_EQ("", Error.toStringFull()); + M = MatchesName->unconditionalConvertTo(); + EXPECT_TRUE(matches("unsigned AAACCBB;", M)); + EXPECT_TRUE(matches("unsigned aaaccbb;", M)); + Code = "hasInitializer(\n binaryOperator(hasLHS(\"A\")))"; EXPECT_TRUE(!Parser::parseMatcherExpression(Code, &Error).hasValue()); EXPECT_EQ("1:1: Error parsing argument 1 for matcher hasInitializer.\n" @@ -348,6 +357,26 @@ TEST(ParserTest, Errors) { "1:14: Incorrect type for arg 1. (Expected = string) != (Actual = " "String)", ParseMatcherWithError(R"query(decl(hasAttr("unrelated")))query")); + EXPECT_EQ( + "1:1: Error parsing argument 1 for matcher namedDecl.\n" + "1:11: Error building matcher matchesName.\n" + "1:33: Unknown value 'Ignorecase' for arg 2; did you mean 'IgnoreCase'", + ParseMatcherWithError( + R"query(namedDecl(matchesName("[ABC]*", "Ignorecase")))query")); + EXPECT_EQ( + "1:1: Error parsing argument 1 for matcher namedDecl.\n" + "1:11: Error building matcher matchesName.\n" + "1:33: Incorrect type for arg 2. (Expected = string) != (Actual = " + "String)", + ParseMatcherWithError( + R"query(namedDecl(matchesName("[ABC]*", "IgnoreCase & BasicRegex")))query")); + EXPECT_EQ( + "1:1: Error parsing argument 1 for matcher namedDecl.\n" + "1:11: Error building matcher matchesName.\n" + "1:33: Unknown value 'IgnoreCase | Basicregex' for arg 2; did you mean " + "'IgnoreCase | BasicRegex'", + ParseMatcherWithError( + R"query(namedDecl(matchesName("[ABC]*", "IgnoreCase | Basicregex")))query")); } TEST(ParserTest, OverloadErrors) { diff --git a/llvm/include/llvm/Support/Regex.h b/llvm/include/llvm/Support/Regex.h index b2620ab..ae4b951 100644 --- a/llvm/include/llvm/Support/Regex.h +++ b/llvm/include/llvm/Support/Regex.h @@ -16,6 +16,7 @@ #ifndef LLVM_SUPPORT_REGEX_H #define LLVM_SUPPORT_REGEX_H +#include "llvm/ADT/BitmaskEnum.h" #include struct llvm_regex; @@ -26,20 +27,22 @@ namespace llvm { class Regex { public: - enum { - NoFlags=0, + enum RegexFlags : unsigned { + NoFlags = 0, /// Compile for matching that ignores upper/lower case distinctions. - IgnoreCase=1, + IgnoreCase = 1, /// Compile for newline-sensitive matching. With this flag '[^' bracket /// expressions and '.' never match newline. A ^ anchor matches the /// null string after any newline in the string in addition to its normal /// function, and the $ anchor matches the null string before any /// newline in the string in addition to its normal function. - Newline=2, + Newline = 2, /// By default, the POSIX extended regular expression (ERE) syntax is /// assumed. Pass this flag to turn on basic regular expressions (BRE) /// instead. - BasicRegex=4 + BasicRegex = 4, + + LLVM_MARK_AS_BITMASK_ENUM(BasicRegex) }; Regex(); @@ -47,7 +50,8 @@ namespace llvm { /// /// \param Regex - referenced string is no longer needed after this /// constructor does finish. Only its compiled form is kept stored. - Regex(StringRef Regex, unsigned Flags = NoFlags); + Regex(StringRef Regex, RegexFlags Flags = NoFlags); + Regex(StringRef Regex, unsigned Flags); Regex(const Regex &) = delete; Regex &operator=(Regex regex) { std::swap(preg, regex.preg); diff --git a/llvm/lib/Support/Regex.cpp b/llvm/lib/Support/Regex.cpp index f065ada..0d5cc1c 100644 --- a/llvm/lib/Support/Regex.cpp +++ b/llvm/lib/Support/Regex.cpp @@ -26,7 +26,7 @@ using namespace llvm; Regex::Regex() : preg(nullptr), error(REG_BADPAT) {} -Regex::Regex(StringRef regex, unsigned Flags) { +Regex::Regex(StringRef regex, RegexFlags Flags) { unsigned flags = 0; preg = new llvm_regex(); preg->re_endp = regex.end(); @@ -39,6 +39,9 @@ Regex::Regex(StringRef regex, unsigned Flags) { error = llvm_regcomp(preg, regex.data(), flags|REG_PEND); } +Regex::Regex(StringRef regex, unsigned Flags) + : Regex(regex, static_cast(Flags)) {} + Regex::Regex(Regex &®ex) { preg = regex.preg; error = regex.error; -- 2.7.4