1 /****************************************************************************
3 ** Copyright (C) 2012 Giuseppe D'Angelo <dangelog@gmail.com>.
4 ** Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies).
5 ** Contact: http://www.qt-project.org/
7 ** This file is part of the QtCore module of the Qt Toolkit.
9 ** $QT_BEGIN_LICENSE:LGPL$
10 ** GNU Lesser General Public License Usage
11 ** This file may be used under the terms of the GNU Lesser General Public
12 ** License version 2.1 as published by the Free Software Foundation and
13 ** appearing in the file LICENSE.LGPL included in the packaging of this
14 ** file. Please review the following information to ensure the GNU Lesser
15 ** General Public License version 2.1 requirements will be met:
16 ** http://www.gnu.org/licenses/old-licenses/lgpl-2.1.html.
18 ** In addition, as a special exception, Nokia gives you certain additional
19 ** rights. These rights are described in the Nokia Qt LGPL Exception
20 ** version 1.1, included in the file LGPL_EXCEPTION.txt in this package.
22 ** GNU General Public License Usage
23 ** Alternatively, this file may be used under the terms of the GNU General
24 ** Public License version 3.0 as published by the Free Software Foundation
25 ** and appearing in the file LICENSE.GPL included in the packaging of this
26 ** file. Please review the following information to ensure the GNU General
27 ** Public License version 3.0 requirements will be met:
28 ** http://www.gnu.org/copyleft/gpl.html.
31 ** Alternatively, this file may be used in accordance with the terms and
32 ** conditions contained in a signed written agreement between you and Nokia.
41 ****************************************************************************/
43 #include "qregularexpression.h"
45 #include <QtCore/qcoreapplication.h>
46 #include <QtCore/qmutex.h>
47 #include <QtCore/qvector.h>
48 #include <QtCore/qstringlist.h>
49 #include <QtCore/qdebug.h>
50 #include <QtCore/qthreadstorage.h>
51 #include <QtCore/qglobal.h>
58 \class QRegularExpression
61 \brief The QRegularExpression class provides pattern matching using regular
69 \keyword regular expression
71 Regular expressions, or \e{regexps}, are a very powerful tool to handle
72 strings and texts. This is useful in many contexts, e.g.,
76 \li A regexp can test whether a substring meets some criteria,
77 e.g. is an integer or contains no whitespace.
79 \li A regexp provides more powerful pattern matching than
80 simple substring matching, e.g., match one of the words
81 \e{mail}, \e{letter} or \e{correspondence}, but none of the
82 words \e{email}, \e{mailman}, \e{mailer}, \e{letterbox}, etc.
83 \row \li Search and Replace
84 \li A regexp can replace all occurrences of a substring with a
85 different substring, e.g., replace all occurrences of \e{&}
86 with \e{\&} except where the \e{&} is already followed by
88 \row \li String Splitting
89 \li A regexp can be used to identify where a string should be
90 split apart, e.g. splitting tab-delimited strings.
93 This document is by no means a complete reference to pattern matching using
94 regular expressions, and the following parts will require the reader to
95 have some basic knowledge about Perl-like regular expressions and their
98 Good references about regular expressions include:
101 \li \e {Mastering Regular Expressions} (Third Edition) by Jeffrey E. F.
102 Friedl, ISBN 0-596-52812-4;
103 \li the \l{http://pcre.org/pcre.txt} {pcrepattern(3)} man page, describing
104 the pattern syntax supported by PCRE (the reference implementation of
105 Perl-compatible regular expressions);
106 \li the \l{http://perldoc.perl.org/perlre.html} {Perl's regular expression
107 documentation} and the \l{http://perldoc.perl.org/perlretut.html} {Perl's
108 regular expression tutorial}.
113 \section1 Introduction
115 QRegularExpression implements Perl-compatible regular expressions. It fully
116 supports Unicode. For an overview of the regular expression syntax
117 supported by QRegularExpression, please refer to the aforementioned
118 pcrepattern(3) man page. A regular expression is made up of two things: a
119 \b{pattern string} and a set of \b{pattern options} that change the
120 meaning of the pattern string.
122 You can set the pattern string by passing a string to the QRegularExpression
125 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 0
127 This sets the pattern string to \c{a pattern}. You can also use the
128 setPattern() function to set a pattern on an existing QRegularExpression
131 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 1
133 Note that due to C++ literal strings rules, you must escape all backslashes
134 inside the pattern string with another backslash:
136 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 2
138 The pattern() function returns the pattern that is currently set for a
139 QRegularExpression object:
141 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 3
143 \section1 Pattern options
145 The meaning of the pattern string can be modified by setting one or more
146 \e{pattern options}. For instance, it is possible to set a pattern to match
147 case insensitively by setting the QRegularExpression::CaseInsensitiveOption.
149 You can set the options by passing them to the QRegularExpression
152 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 4
154 Alternatively, you can use the setPatternOptions() function on an existing
155 QRegularExpressionObject:
157 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 5
159 It is possible to get the pattern options currently set on a
160 QRegularExpression object by using the patternOptions() function:
162 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 6
164 Please refer to the QRegularExpression::PatternOption enum documentation for
165 more information about each pattern option.
167 \section1 Match type and match options
169 The last two arguments of the match() and the globalMatch() functions set
170 the match type and the match options. The match type is a value of the
171 QRegularExpression::MatchType enum; the "traditional" matching algorithm is
172 chosen by using the NormalMatch match type (the default). It is also
173 possible to enable partial matching of the regular expression against a
174 subject string: see the \l{partial matching} section for more details.
176 The match options are a set of one or more QRegularExpression::MatchOption
177 values. They change the way a specific match of a regular expression
178 against a subject string is done. Please refer to the
179 QRegularExpression::MatchOption enum documentation for more details.
181 \target normal matching
182 \section1 Normal matching
184 In order to perform a match you can simply invoke the match() function
185 passing a string to match against. We refer to this string as the
186 \e{subject string}. The result of the match() function is a
187 QRegularExpressionMatch object that can be used to inspect the results of
188 the match. For instance:
190 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 7
192 If a match is successful, the (implicit) capturing group number 0 can be
193 used to retrieve the substring matched by the entire pattern (see also the
194 section about \l{extracting captured substrings}):
196 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 8
198 It's also possible to start a match at an arbitrary offset inside the
199 subject string by passing the offset as an argument of the
200 match() function. In the following example \c{"12 abc"}
201 is not matched because the match is started at offset 1:
203 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 9
205 \target extracting captured substrings
206 \section2 Extracting captured substrings
208 The QRegularExpressionMatch object contains also information about the
209 substrings captured by the capturing groups in the pattern string. The
210 \l{QRegularExpressionMatch::}{captured()} function will return the string
211 captured by the n-th capturing group:
213 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 10
215 Capturing groups in the pattern are numbered starting from 1, and the
216 implicit capturing group 0 is used to capture the substring that matched
219 It's also possible to retrieve the starting and the ending offsets (inside
220 the subject string) of each captured substring, by using the
221 \l{QRegularExpressionMatch::}{capturedStart()} and the
222 \l{QRegularExpressionMatch::}{capturedEnd()} functions:
224 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 11
226 All of these functions have an overload taking a QString as a parameter
227 in order to extract \e{named} captured substrings. For instance:
229 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 12
231 \target global matching
232 \section1 Global matching
234 \e{Global matching} is useful to find all the occurrences of a given
235 regular expression inside a subject string. Suppose that we want to extract
236 all the words from a given string, where a word is a substring matching
239 QRegularExpression::globalMatch returns a QRegularExpressionMatchIterator,
240 which is a Java-like forward iterator that can be used to iterate over the
241 results. For instance:
243 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 13
245 Since it's a Java-like iterator, the QRegularExpressionMatchIterator will
246 point immediately before the first result. Every result is returned as a
247 QRegularExpressionMatch object. The
248 \l{QRegularExpressionMatchIterator::}{hasNext()} function will return true
249 if there's at least one more result, and
250 \l{QRegularExpressionMatchIterator::}{next()} will return the next result
251 and advance the iterator. Continuing from the previous example:
253 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 14
255 You can also use \l{QRegularExpressionMatchIterator::}{peekNext()} to get
256 the next result without advancing the iterator.
258 It is possible to pass a starting offset and one or more match options to
259 the globalMatch() function, exactly like normal matching with match().
261 \target partial matching
262 \section1 Partial matching
264 A \e{partial match} is obtained when the end of the subject string is
265 reached, but more characters are needed to successfully complete the match.
266 Note that a partial match is usually much more inefficient than a normal
267 match because many optimizations of the matching algorithm cannot be
270 A partial match must be explicitly requested by specifying a match type of
271 PartialPreferCompleteMatch or PartialPreferFirstMatch when calling
272 QRegularExpression::match or QRegularExpression::globalMatch. If a partial
273 match is found, then calling the \l{QRegularExpressionMatch::}{hasMatch()}
274 function on the QRegularExpressionMatch object returned by match() will
275 return \c{false}, but \l{QRegularExpressionMatch::}{hasPartialMatch()} will return
278 When a partial match is found, no captured substrings are returned, and the
279 (implicit) capturing group 0 corresponding to the whole match captures the
280 partially matched substring of the subject string.
282 Note that asking for a partial match can still lead to a complete match, if
283 one is found; in this case, \l{QRegularExpressionMatch::}{hasMatch()} will
284 return \c{true} and \l{QRegularExpressionMatch::}{hasPartialMatch()}
285 \c{false}. It never happens that a QRegularExpressionMatch reports both a
286 partial and a complete match.
288 Partial matching is mainly useful in two scenarios: validating user input
289 in real time and incremental/multi-segment matching.
292 \section2 Validating user input
294 Suppose that we would like the user to input a date in a specific
295 format, for instance "MMM dd, yyyy". We can check the input validity with
298 \c{^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec) \d\d?, \d\d\d\d$}
300 (This pattern doesn't catch invalid days, but let's keep it for the
303 We would like to validate the input with this regular expression \e{while}
304 the user is typing it, so that we can report an error in the input as soon
305 as it is committed (for instance, the user typed the wrong key). In order
306 to do so we must distinguish three cases:
309 \li the input cannot possibly match the regular expression;
310 \li the input does match the regular expression;
311 \li the input does not match the regular expression right now,
312 but it will if more charaters will be added to it.
315 Note that these three cases represent exactly the possible states of a
316 QValidator (see the QValidator::State enum).
318 In particular, in the last case we want the regular expression engine to
319 report a partial match: we are successfully matching the pattern against
320 the subject string but the matching cannot continue because the end of the
321 subject is encountered. Notice, however, that the matching algorithm should
322 continue and try all possibilities, and in case a complete (non-partial)
323 match is found, then this one should be reported, and the input string
324 accepted as fully valid.
326 This behaviour is implemented by the PartialPreferCompleteMatch match type.
329 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 15
331 If matching the same regular expression against the subject string leads to
332 a complete match, it is reported as usual:
334 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 16
336 Another example with a different pattern, showing the behaviour of
337 preferring a complete match over a partial one:
339 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 17
341 In this case, the subpattern \c{abc\\w+X} partially matches the subject
342 string; however, the subpattern \c{def} matches the subject string
343 completely, and therefore a complete match is reported.
345 If multiple partial matches are found when matching (but no complete
346 match), then the QRegularExpressionMatch object will report the first one
347 that is found. For instance:
349 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 18
351 \section2 Incremental/multi-segment matching
353 Incremental matching is another use case of partial matching. Suppose that
354 we want to find the occurrences of a regular expression inside a large text
355 (that is, substrings matching the regular expression). In order to do so we
356 would like to "feed" the large text to the regular expression engines in
357 smaller chunks. The obvious problem is what happens if the substring that
358 matches the regular expression spans across two or more chunks.
360 In this case, the regular expression engine should report a partial match,
361 so that we can match again adding new data and (eventually) get a complete
362 match. This implies that the regular expression engine may assume that
363 there are other characters \e{beyond the end} of the subject string. This
364 is not to be taken literally -- the engine will never try to access
365 any character after the last one in the subject.
367 QRegularExpression implements this behaviour when using the
368 PartialPreferFirstMatch match type. This match type reports a partial match
369 as soon as it is found, and other match alternatives are not tried
370 (even if they could lead to a complete match). For instance:
372 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 19
374 This happens because when matching the first branch of the alternation
375 operator a partial match is found, and therefore matching stops, without
376 trying the second branch. Another example:
378 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 20
380 This shows what could seem a counterintuitve behaviour of quantifiers:
381 since \c{?} is greedy, then the engine tries first to continue the match
382 after having matched \c{"abc"}; but then the matching reaches the end of the
383 subject string, and therefore a partial match is reported. This is
384 even more surprising in the following example:
386 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 21
388 It's easy to understand this behaviour if we remember that the engine
389 expects the subject string to be only a substring of the whole text we're
390 looking for a match into (that is, how we said before, that the engine
391 assumes that there are other characters beyond the end of the subject
394 Since the \c{*} quantifier is greedy, then reporting a complete match could
395 be an error, because after the current subject \c{"abc"} there may be other
396 occurrences of \c{"abc"}. For instance, the complete text could have been
397 "abcabcX", and therefore the \e{right} match to report (in the complete
398 text) would have been \c{"abcabc"}; by matching only against the leading
399 \c{"abc"} we instead get a partial match.
401 \section1 Error handling
403 It is possible for a QRegularExpression object to be invalid because of
404 syntax errors in the pattern string. The isValid() function will return
405 true if the regular expression is valid, or false otherwise:
407 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 22
409 You can get more information about the specific error by calling the
410 errorString() function; moreover, the patternErrorOffset() function
411 will return the offset inside the pattern string
413 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 23
415 If a match is attempted with an invalid QRegularExpression, then the
416 returned QRegularExpressionMatch object will be invalid as well (that is,
417 its \l{QRegularExpressionMatch::}{isValid()} function will return false).
418 The same applies for attempting a global match.
420 \section1 Unsupported Perl-compatible regular expressions features
422 QRegularExpression does not support all the features available in
423 Perl-compatible regular expressions. The most notable one is the fact that
424 duplicated names for capturing groups are not supported, and using them can
425 lead to undefined behaviour.
427 This may change in a future version of Qt.
429 \section1 Notes for QRegExp users
431 The QRegularExpression class introduced in Qt 5 is a big improvement upon
432 QRegExp, in terms of APIs offered, supported pattern syntax and speed of
433 execution. The biggest difference is that QRegularExpression simply holds a
434 regular expression, and it's \e{not} modified when a match is requested.
435 Instead, a QRegularExpressionMatch object is returned, in order to check
436 the result of a match and extract the captured substring. The same applies
437 with global matching and QRegularExpressionMatchIterator.
439 Other differences are outlined below.
441 \section2 Exact matching
443 QRegExp::exactMatch() in Qt 4 served two purposes: it exactly matched
444 a regular expression against a subject string, and it implemented partial
445 matching. In fact, if an exact match was not found, one could still find
446 out how much of the subject string was matched by the regular expression
447 by calling QRegExp::matchedLength(). If the returned length was equal
448 to the subject string's length, then one could desume that a partial match
451 QRegularExpression supports partial matching explicitly by means of the
452 appropriate MatchType. If instead you simply want to be sure that the
453 subject string matches the regular expression exactly, you can wrap the
454 pattern between a couple of anchoring expressions. Simply
455 putting the pattern between the \c{^} and the \c{$} anchors is enough
458 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 24
460 However, remember that the \c{$} anchor not only matches at the end of the
461 string, but also at a newline character right before the end of the string;
462 that is, the previous pattern matches against the string "this pattern must
463 match exactly\n". Also, the behaviour of both the \c{^} and the \c{$}
464 anchors changes if the MultiLineOption is set either explicitely (as a
465 pattern option) or implicitly (as a directive inside the pattern string).
467 Therefore, in the most general case, you should wrap the pattern between
468 the \c{\A} and the \c{\z} anchors:
470 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 25
472 Note the usage of the non-capturing group in order to preserve the meaning
473 of the branch operator inside the pattern.
475 \section2 Global matching
477 Due to limitations of the QRegExp API it was impossible to implement global
478 matching correctly (that is, like Perl does). In particular, patterns that
479 can match 0 characters (like \c{"a*"}) are problematic.
481 QRegularExpression::globalMatch() implements Perl global match correctly, and
482 the returned iterator can be used to examine each result.
484 \section2 Unicode properties support
486 When using QRegExp, character classes such as \c{\w}, \c{\d}, etc. match
487 characters with the corresponding Unicode property: for instance, \c{\d}
488 matches any character with the Unicode Nd (decimal digit) property.
490 Those character classes only match ASCII characters by default when using
491 QRegularExpression: for instance, \c{\d} matches exactly a character in the
492 \c{0-9} ASCII range. It is possible to change this behaviour by using the
493 UseUnicodePropertiesOption pattern option.
495 \section2 Wildcard matching
497 There is no equivalent of wildcard matching in QRegularExpression.
498 Nevertheless, rewriting a regular expression in wildcard syntax to a
499 Perl-compatible regular expression is a very easy task, given the fact
500 that wildcard syntax supported by QRegExp is very simple.
502 \section2 Other pattern syntaxes
504 QRegularExpression supports only Perl-compatible regular expressions.
506 \section2 Minimal matching
508 QRegExp::setMinimal() implemented minimal matching by simply reversing the
509 greediness of the quantifiers (QRegExp did not support lazy quantifiers,
510 like \c{*?}, \c{+?}, etc.). QRegularExpression instead does support greedy,
511 lazy and possessive quantifiers. The InvertedGreedinessOption
512 pattern option can be useful to emulate the effects of QRegExp::setMinimal():
513 if enabled, it inverts the greediness of quantifiers (greedy ones become
514 lazy and vice versa).
516 \section2 Caret modes
518 The AnchoredMatchOption match option can be used to emulate the
519 QRegExp::CaretAtOffset behaviour. There is no equivalent for the other
520 QRegExp::CaretMode modes.
522 \section1 Debugging code that uses QRegularExpression
524 QRegularExpression internally uses a just in time compiler (JIT) to
525 optimize the execution of the matching algorithm. The JIT makes extensive
526 usage of self-modifying code, which can lead debugging tools such as
527 Valgrind to crash. You must enable all checks for self-modifying code if
528 you want to debug programs using QRegularExpression (f.i., see Valgrind's
529 \c{--smc-check} command line option). The downside of enabling such checks
530 is that your program will run considerably slower.
532 To avoid that, the JIT is disabled by default if you compile Qt in debug
533 mode. It is possible to override the default and enable or disable the JIT
534 usage (both in debug or release mode) by setting the
535 \c{QT_ENABLE_REGEXP_JIT} environment variable to a non-zero or zero value
538 \sa QRegularExpressionMatch, QRegularExpressionMatchIterator
542 \class QRegularExpressionMatch
545 \brief The QRegularExpressionMatch class provides the results of a matching
546 a QRegularExpression against a string.
553 \keyword regular expression match
555 A QRegularExpressionMatch object can be obtained by calling the
556 QRegularExpression::match() function, or as a single result of a global
557 match from a QRegularExpressionMatchIterator.
559 The success or the failure of a match attempt can be inspected by calling
560 the hasMatch() function. QRegularExpressionMatch also reports a successful
561 partial match through the hasPartialMatch() function.
563 In addition, QRegularExpressionMatch returns the substrings captured by the
564 capturing groups in the pattern string. The implicit capturing group with
565 index 0 captures the result of the whole match. The captured() function
566 returns each substring captured, either by the capturing group's index or
569 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 29
571 For each captured substring it is possible to query its starting and ending
572 offsets in the subject string by calling the capturedStart() and the
573 capturedEnd() function, respectively. The length of each captured
574 substring is available using the capturedLength() function.
576 The convenience function capturedTexts() will return \e{all} the captured
577 substrings at once (including the substring matched by the entire pattern)
578 in the order they have been captured by captring groups; that is,
579 \c{captured(i) == capturedTexts().at(i)}.
581 You can retrieve the QRegularExpression object the subject string was
582 matched against by calling the regularExpression() function; the
583 match type and the match options are available as well by calling
584 the matchType() and the matchOptions() respectively.
586 Please refer to the QRegularExpression documentation for more information
587 about the Qt regular expression classes.
589 \sa QRegularExpression
593 \class QRegularExpressionMatchIterator
596 \brief The QRegularExpressionMatchIterator class provides an iterator on
597 the results of a global match of a QRegularExpression object against a string.
604 \keyword regular expression iterator
606 A QRegularExpressionMatchIterator object is a forward only Java-like
607 iterator; it can be obtained by calling the
608 QRegularExpression::globalMatch() function. A new
609 QRegularExpressionMatchIterator will be positioned before the first result.
610 You can then call the hasNext() function to check if there are more
611 results available; if so, the next() function will return the next
612 result and advance the iterator.
614 Each result is a QRegularExpressionMatch object holding all the information
615 for that result (including captured substrings).
619 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 30
621 Moreover, QRegularExpressionMatchIterator offers a peekNext() function
622 to get the next result \e{without} advancing the iterator.
624 You can retrieve the QRegularExpression object the subject string was
625 matched against by calling the regularExpression() function; the
626 match type and the match options are available as well by calling
627 the matchType() and the matchOptions() respectively.
629 Please refer to the QRegularExpression documentation for more information
630 about the Qt regular expression classes.
632 \sa QRegularExpression, QRegularExpressionMatch
637 \enum QRegularExpression::PatternOption
639 The PatternOption enum defines modifiers to the way the pattern string
640 should be interpreted, and therefore the way the pattern matches against a
643 \value NoPatternOption
644 No pattern options are set.
646 \value CaseInsensitiveOption
647 The pattern should match against the subject string in a case
648 insensitive way. This option corresponds to the /i modifier in Perl
651 \value DotMatchesEverythingOption
652 The dot metacharacter (\c{.}) in the pattern string is allowed to match
653 any character in the subject string, including newlines (normally, the
654 dot does not match newlines). This option corresponds to the \c{/s}
655 modifier in Perl regular expressions.
657 \value MultilineOption
658 The caret (\c{^}) and the dollar (\c{$}) metacharacters in the pattern
659 string are allowed to match, respectively, immediately after and
660 immediately before any newline in the subject string, as well as at the
661 very beginning and at the very end of the subject string. This option
662 corresponds to the \c{/m} modifier in Perl regular expressions.
664 \value ExtendedPatternSyntaxOption
665 Any whitespace in the pattern string which is not escaped and outside a
666 character class is ignored. Moreover, an unescaped sharp (\b{#})
667 outside a character class causes all the following characters, until
668 the first newline (included), to be ignored. This can be used to
669 increase the readability of a pattern string as well as put comments
670 inside regular expressions; this is particulary useful if the pattern
671 string is loaded from a file or written by the user, because in C++
672 code it is always possible to use the rules for string literals to put
673 comments outside the pattern string. This option corresponds to the \c{/x}
674 modifier in Perl regular expressions.
676 \value InvertedGreedinessOption
677 The greediness of the quantifiers is inverted: \c{*}, \c{+}, \c{?},
678 \c{{m,n}}, etc. become lazy, while their lazy versions (\c{*?},
679 \c{+?}, \c{??}, \c{{m,n}?}, etc.) become greedy. There is no equivalent
680 for this option in Perl regular expressions.
682 \value DontCaptureOption
683 The non-named capturing groups do not capture substrings; named
684 capturing groups still work as intended, as well as the implicit
685 capturing group number 0 corresponding to the entire match. There is no
686 equivalent for this option in Perl regular expressions.
688 \value UseUnicodePropertiesOption
689 The meaning of the \c{\w}, \c{\d}, etc., character classes, as well as
690 the meaning of their counterparts (\c{\W}, \c{\D}, etc.), is changed
691 from matching ASCII charaters only to matching any character with the
692 corresponding Unicode property. For instance, \c{\d} is changed to
693 match any character with the Unicode Nd (decimal digit) property;
694 \c{\w} to match any character with either the Unicode L (letter) or N
695 (digit) property, plus underscore, and so on. This option corresponds
696 to the \c{/u} modifier in Perl regular expressions.
700 \enum QRegularExpression::MatchType
702 The MatchType enum defines the type of the match that should be attempted
703 against the subject string.
706 A normal match is done.
708 \value PartialPreferCompleteMatch
709 The pattern string is matched partially against the subject string. If
710 a partial match is found, then it is recorded, and other matching
711 alternatives are tried as usual. If a complete match is then found,
712 then it's preferred to the partial match; in this case only the
713 complete match is reported. If instead no complete match is found (but
714 only the partial one), then the partial one is reported.
716 \value PartialPreferFirstMatch
717 The pattern string is matched partially against the subject string. If
718 a partial match is found, then matching stops and the partial match is
719 reported. In this case, other matching alternatives (potentially
720 leading to a complete match) are not tried. Moreover, this match type
721 assumes that the subject string only a substring of a larger text, and
722 that (in this text) there are other characters beyond the end of the
723 subject string. This can lead to surprising results; see the discussion
724 in the \l{partial matching} section for more details.
728 \enum QRegularExpression::MatchOption
731 No match options are set.
733 \value AnchoredMatchOption
734 The match is constrained to start exactly at the offset passed to
735 match() in order to be successful, even if the pattern string does not
736 contain any metacharacter that anchors the match at that point.
739 // after how many usages we optimize the regexp
740 #ifdef QT_BUILD_INTERNAL
741 Q_AUTOTEST_EXPORT unsigned int qt_qregularexpression_optimize_after_use_count = 10;
743 static const unsigned int qt_qregularexpression_optimize_after_use_count = 10;
744 #endif // QT_BUILD_INTERNAL
749 static int convertToPcreOptions(QRegularExpression::PatternOptions patternOptions)
753 if (patternOptions & QRegularExpression::CaseInsensitiveOption)
754 options |= PCRE_CASELESS;
755 if (patternOptions & QRegularExpression::DotMatchesEverythingOption)
756 options |= PCRE_DOTALL;
757 if (patternOptions & QRegularExpression::MultilineOption)
758 options |= PCRE_MULTILINE;
759 if (patternOptions & QRegularExpression::ExtendedPatternSyntaxOption)
760 options |= PCRE_EXTENDED;
761 if (patternOptions & QRegularExpression::InvertedGreedinessOption)
762 options |= PCRE_UNGREEDY;
763 if (patternOptions & QRegularExpression::DontCaptureOption)
764 options |= PCRE_NO_AUTO_CAPTURE;
765 if (patternOptions & QRegularExpression::UseUnicodePropertiesOption)
774 static int convertToPcreOptions(QRegularExpression::MatchOptions matchOptions)
778 if (matchOptions & QRegularExpression::AnchoredMatchOption)
779 options |= PCRE_ANCHORED;
784 struct QRegularExpressionPrivate : QSharedData
786 QRegularExpressionPrivate();
787 ~QRegularExpressionPrivate();
788 QRegularExpressionPrivate(const QRegularExpressionPrivate &other);
790 void cleanCompiledPattern();
791 void compilePattern();
792 void getPatternInfo();
793 pcre16_extra *optimizePattern();
795 QRegularExpressionMatchPrivate *doMatch(const QString &subject,
797 QRegularExpression::MatchType matchType,
798 QRegularExpression::MatchOptions matchOptions,
799 const QRegularExpressionMatchPrivate *previous = 0) const;
801 int captureIndexForName(const QString &name) const;
804 QRegularExpression::PatternOptions patternOptions;
806 // *All* of the following members are set managed while holding this mutex,
807 // except for isDirty which is set to true by QRegularExpression setters
808 // (right after a detach happened).
809 // On the other hand, after the compilation and studying,
810 // it's safe to *use* (i.e. read) them from multiple threads at the same time.
811 // Therefore, doMatch doesn't need to lock this mutex.
814 // The PCRE pointers are reference-counted by the QRegularExpressionPrivate
815 // objects themselves; when the private is copied (i.e. a detach happened)
817 pcre16 *compiledPattern;
818 pcre16_extra *studyData;
819 const char *errorString;
822 unsigned int usedCount;
823 bool usingCrLfNewlines;
827 struct QRegularExpressionMatchPrivate : QSharedData
829 QRegularExpressionMatchPrivate(const QRegularExpression &re,
830 const QString &subject,
831 QRegularExpression::MatchType matchType,
832 QRegularExpression::MatchOptions matchOptions,
835 QRegularExpressionMatch nextMatch() const;
837 const QRegularExpression regularExpression;
838 const QString subject;
839 // the capturedOffsets vector contains pairs of (start, end) positions
840 // for each captured substring
841 QVector<int> capturedOffsets;
843 const QRegularExpression::MatchType matchType;
844 const QRegularExpression::MatchOptions matchOptions;
849 bool hasPartialMatch;
853 struct QRegularExpressionMatchIteratorPrivate : QSharedData
855 QRegularExpressionMatchIteratorPrivate(const QRegularExpression &re,
856 QRegularExpression::MatchType matchType,
857 QRegularExpression::MatchOptions matchOptions,
858 const QRegularExpressionMatch &next);
860 bool hasNext() const;
861 QRegularExpressionMatch next;
862 const QRegularExpression regularExpression;
863 const QRegularExpression::MatchType matchType;
864 const QRegularExpression::MatchOptions matchOptions;
870 QRegularExpression::QRegularExpression(QRegularExpressionPrivate &dd)
878 QRegularExpressionPrivate::QRegularExpressionPrivate()
879 : pattern(), patternOptions(0),
881 compiledPattern(0), studyData(0),
882 errorString(0), errorOffset(-1),
885 usingCrLfNewlines(false),
893 QRegularExpressionPrivate::~QRegularExpressionPrivate()
895 cleanCompiledPattern();
901 Copies the private, which means copying only the pattern and the pattern
902 options. The compiledPattern and the studyData pointers are NOT copied (we
903 do not own them any more), and in general all the members set when
904 compiling a pattern are set to default values. isDirty is set back to true
905 so that the pattern has to be recompiled again.
907 QRegularExpressionPrivate::QRegularExpressionPrivate(const QRegularExpressionPrivate &other)
908 : QSharedData(other),
909 pattern(other.pattern), patternOptions(other.patternOptions),
911 compiledPattern(0), studyData(0),
913 errorOffset(-1), capturingCount(0),
915 usingCrLfNewlines(false), isDirty(true)
922 void QRegularExpressionPrivate::cleanCompiledPattern()
924 pcre16_free(compiledPattern);
925 pcre16_free_study(studyData);
929 usingCrLfNewlines = false;
937 void QRegularExpressionPrivate::compilePattern()
939 QMutexLocker lock(&mutex);
945 cleanCompiledPattern();
947 int options = convertToPcreOptions(patternOptions);
948 options |= PCRE_UTF16;
951 compiledPattern = pcre16_compile2(pattern.utf16(), options,
952 &errorCode, &errorString, &errorOffset, 0);
954 if (!compiledPattern)
957 Q_ASSERT(errorCode == 0);
958 Q_ASSERT(studyData == 0); // studying (=>optimizing) is always done later
967 void QRegularExpressionPrivate::getPatternInfo()
969 Q_ASSERT(compiledPattern);
971 pcre16_fullinfo(compiledPattern, 0, PCRE_INFO_CAPTURECOUNT, &capturingCount);
973 // detect the settings for the newline
974 int patternNewlineSetting;
975 pcre16_fullinfo(compiledPattern, studyData, PCRE_INFO_OPTIONS, &patternNewlineSetting);
976 patternNewlineSetting &= PCRE_NEWLINE_CR | PCRE_NEWLINE_LF | PCRE_NEWLINE_CRLF
977 | PCRE_NEWLINE_ANY | PCRE_NEWLINE_ANYCRLF;
978 if (patternNewlineSetting == 0) {
979 // no option was specified in the regexp, grab PCRE build defaults
980 int pcreNewlineSetting;
981 pcre16_config(PCRE_CONFIG_NEWLINE, &pcreNewlineSetting);
982 switch (pcreNewlineSetting) {
984 patternNewlineSetting = PCRE_NEWLINE_CR; break;
986 patternNewlineSetting = PCRE_NEWLINE_LF; break;
987 case 3338: // (13<<8 | 10)
988 patternNewlineSetting = PCRE_NEWLINE_CRLF; break;
990 patternNewlineSetting = PCRE_NEWLINE_ANYCRLF; break;
992 patternNewlineSetting = PCRE_NEWLINE_ANY; break;
994 qWarning("QRegularExpressionPrivate::compilePattern(): "
995 "PCRE_CONFIG_NEWLINE returned an unknown newline");
1000 usingCrLfNewlines = (patternNewlineSetting == PCRE_NEWLINE_CRLF) ||
1001 (patternNewlineSetting == PCRE_NEWLINE_ANY) ||
1002 (patternNewlineSetting == PCRE_NEWLINE_ANYCRLF);
1007 \class QPcreJitStackPointer
1010 Simple "smartpointer" wrapper around a pcre_jit_stack, to be used with
1013 class QPcreJitStackPointer
1015 Q_DISABLE_COPY(QPcreJitStackPointer);
1021 QPcreJitStackPointer()
1023 // The default JIT stack size in PCRE is 32K,
1024 // we allocate from 32K up to 512K.
1025 stack = pcre16_jit_stack_alloc(32*1024, 512*1024);
1030 ~QPcreJitStackPointer()
1033 pcre16_jit_stack_free(stack);
1036 pcre16_jit_stack *stack;
1039 Q_GLOBAL_STATIC(QThreadStorage<QPcreJitStackPointer *>, jitStacks)
1044 static pcre16_jit_stack *qtPcreCallback(void *)
1046 if (jitStacks()->hasLocalData())
1047 return jitStacks()->localData()->stack;
1055 static bool isJitEnabled()
1057 QByteArray jitEnvironment = qgetenv("QT_ENABLE_REGEXP_JIT");
1058 if (!jitEnvironment.isEmpty()) {
1060 int enableJit = jitEnvironment.toInt(&ok);
1061 return ok ? (enableJit != 0) : true;
1074 The purpose of the function is to call pcre16_study (which allows some
1075 optimizations to be performed, including JIT-compiling the pattern), and
1076 setting the studyData member variable to the result of the study. It gets
1077 called by doMatch() every time a match is performed. As of now, the
1078 optimizations on the pattern are performed after a certain number of usages
1079 (i.e. the qt_qregularexpression_optimize_after_use_count constant).
1081 Notice that although the method is protected by a mutex, one thread may
1082 invoke this function and return immediately (i.e. not study the pattern,
1083 leaving studyData to NULL); but before calling pcre16_exec to perform the
1084 match, another thread performs the studying and sets studyData to something
1085 else. Although the assignment to studyData is itself atomic, the release of
1086 the memory pointed by studyData isn't. Therefore, the current studyData
1087 value is returned and used by doMatch.
1089 pcre16_extra *QRegularExpressionPrivate::optimizePattern()
1091 Q_ASSERT(compiledPattern);
1093 QMutexLocker lock(&mutex);
1095 if (studyData || (++usedCount != qt_qregularexpression_optimize_after_use_count))
1098 static const bool enableJit = isJitEnabled();
1100 int studyOptions = 0;
1102 studyOptions |= PCRE_STUDY_JIT_COMPILE;
1105 studyData = pcre16_study(compiledPattern, studyOptions, &err);
1107 if (studyData && studyData->flags & PCRE_EXTRA_EXECUTABLE_JIT)
1108 pcre16_assign_jit_stack(studyData, qtPcreCallback, 0);
1110 if (!studyData && err)
1111 qWarning("QRegularExpressionPrivate::optimizePattern(): pcre_study failed: %s", err);
1119 Returns the capturing group number for the given name. Duplicated names for
1120 capturing groups are not supported.
1122 int QRegularExpressionPrivate::captureIndexForName(const QString &name) const
1124 Q_ASSERT(!name.isEmpty());
1126 if (!compiledPattern)
1129 int index = pcre16_get_stringnumber(compiledPattern, name.utf16());
1139 This is a simple wrapper for pcre16_exec for handling the case in which the
1140 JIT runs out of memory. In that case, we allocate a thread-local JIT stack
1141 and re-run pcre16_exec.
1143 static int pcre16SafeExec(const pcre16 *code, const pcre16_extra *extra,
1144 const unsigned short *subject, int length,
1145 int startOffset, int options,
1146 int *ovector, int ovecsize)
1148 int result = pcre16_exec(code, extra, subject, length,
1149 startOffset, options, ovector, ovecsize);
1151 if (result == PCRE_ERROR_JIT_STACKLIMIT && !jitStacks()->hasLocalData()) {
1152 QPcreJitStackPointer *p = new QPcreJitStackPointer;
1153 jitStacks()->setLocalData(p);
1155 result = pcre16_exec(code, extra, subject, length,
1156 startOffset, options, ovector, ovecsize);
1165 Performs a match of type \a matchType on the given \a subject string with
1166 options \a matchOptions and returns the QRegularExpressionMatchPrivate of
1167 the result. It also advances a match if a previous result is given as \a
1170 Advancing a match is a tricky algorithm. If the previous match matched a
1171 non-empty string, we just do an ordinary match at the offset position.
1173 If the previous match matched an empty string, then an anchored, non-empty
1174 match is attempted at the offset position. If that succeeds, then we got
1175 the next match and we can return it. Otherwise, we advance by 1 position
1176 (which can be one or two code units in UTF-16!) and reattempt a "normal"
1177 match. We also have the problem of detecting the current newline format: if
1178 the new advanced offset is pointing to the beginning of a CRLF sequence, we
1179 must advance over it.
1181 QRegularExpressionMatchPrivate *QRegularExpressionPrivate::doMatch(const QString &subject,
1183 QRegularExpression::MatchType matchType,
1184 QRegularExpression::MatchOptions matchOptions,
1185 const QRegularExpressionMatchPrivate *previous) const
1188 offset += subject.length();
1190 QRegularExpression re(*const_cast<QRegularExpressionPrivate *>(this));
1192 if (offset < 0 || offset > subject.length())
1193 return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions, 0);
1195 if (!compiledPattern) {
1196 qWarning("QRegularExpressionPrivate::doMatch(): called on an invalid QRegularExpression object");
1197 return new QRegularExpressionMatchPrivate(re, subject, matchType, matchOptions, 0);
1200 QRegularExpressionMatchPrivate *priv = new QRegularExpressionMatchPrivate(re, subject,
1201 matchType, matchOptions,
1204 // this is mutex protected
1205 const pcre16_extra *currentStudyData = const_cast<QRegularExpressionPrivate *>(this)->optimizePattern();
1207 int pcreOptions = convertToPcreOptions(matchOptions);
1209 if (matchType == QRegularExpression::PartialPreferCompleteMatch)
1210 pcreOptions |= PCRE_PARTIAL_SOFT;
1211 else if (matchType == QRegularExpression::PartialPreferFirstMatch)
1212 pcreOptions |= PCRE_PARTIAL_HARD;
1214 bool previousMatchWasEmpty = false;
1215 if (previous && previous->hasMatch &&
1216 (previous->capturedOffsets.at(0) == previous->capturedOffsets.at(1))) {
1217 previousMatchWasEmpty = true;
1220 int * const captureOffsets = priv->capturedOffsets.data();
1221 const int captureOffsetsCount = priv->capturedOffsets.size();
1223 const unsigned short * const subjectUtf16 = subject.utf16();
1224 const int subjectLength = subject.length();
1228 if (!previousMatchWasEmpty) {
1229 result = pcre16SafeExec(compiledPattern, currentStudyData,
1230 subjectUtf16, subjectLength,
1231 offset, pcreOptions,
1232 captureOffsets, captureOffsetsCount);
1234 result = pcre16SafeExec(compiledPattern, currentStudyData,
1235 subjectUtf16, subjectLength,
1236 offset, pcreOptions | PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED,
1237 captureOffsets, captureOffsetsCount);
1239 if (result == PCRE_ERROR_NOMATCH) {
1242 if (usingCrLfNewlines
1243 && offset < subjectLength
1244 && subjectUtf16[offset - 1] == QLatin1Char('\r')
1245 && subjectUtf16[offset] == QLatin1Char('\n')) {
1247 } else if (offset < subjectLength
1248 && QChar::isLowSurrogate(subjectUtf16[offset])) {
1252 result = pcre16SafeExec(compiledPattern, currentStudyData,
1253 subjectUtf16, subjectLength,
1254 offset, pcreOptions,
1255 captureOffsets, captureOffsetsCount);
1259 #ifdef QREGULAREXPRESSION_DEBUG
1260 qDebug() << "Matching" << pattern << "against" << subject
1261 << offset << matchType << matchOptions << previousMatchWasEmpty
1262 << "result" << result;
1265 // result == 0 means not enough space in captureOffsets; should never happen
1266 Q_ASSERT(result != 0);
1270 priv->isValid = true;
1271 priv->hasMatch = true;
1272 priv->capturedCount = result;
1273 priv->capturedOffsets.resize(result * 2);
1275 // no match, partial match or error
1276 priv->hasPartialMatch = (result == PCRE_ERROR_PARTIAL);
1277 priv->isValid = (result == PCRE_ERROR_NOMATCH || result == PCRE_ERROR_PARTIAL);
1279 if (result == PCRE_ERROR_PARTIAL) {
1281 // leave the start and end capture offsets (i.e. cap(0))
1282 priv->capturedCount = 1;
1283 priv->capturedOffsets.resize(2);
1285 // no match or error
1286 priv->capturedCount = 0;
1287 priv->capturedOffsets.clear();
1297 QRegularExpressionMatchPrivate::QRegularExpressionMatchPrivate(const QRegularExpression &re,
1298 const QString &subject,
1299 QRegularExpression::MatchType matchType,
1300 QRegularExpression::MatchOptions matchOptions,
1302 : regularExpression(re), subject(subject),
1303 matchType(matchType), matchOptions(matchOptions),
1305 hasMatch(false), hasPartialMatch(false), isValid(false)
1307 Q_ASSERT(capturingCount >= 0);
1308 const int captureOffsetsCount = (capturingCount + 1) * 3;
1309 capturedOffsets.resize(captureOffsetsCount);
1316 QRegularExpressionMatch QRegularExpressionMatchPrivate::nextMatch() const
1319 Q_ASSERT(hasMatch || hasPartialMatch);
1321 QRegularExpressionMatchPrivate *nextPrivate = regularExpression.d->doMatch(subject,
1322 capturedOffsets.at(1),
1326 return QRegularExpressionMatch(*nextPrivate);
1332 QRegularExpressionMatchIteratorPrivate::QRegularExpressionMatchIteratorPrivate(const QRegularExpression &re,
1333 QRegularExpression::MatchType matchType,
1334 QRegularExpression::MatchOptions matchOptions,
1335 const QRegularExpressionMatch &next)
1337 regularExpression(re),
1338 matchType(matchType), matchOptions(matchOptions)
1345 bool QRegularExpressionMatchIteratorPrivate::hasNext() const
1347 return next.isValid() && (next.hasMatch() || next.hasPartialMatch());
1353 Constructs a QRegularExpression object with an empty pattern and no pattern
1356 \sa setPattern(), setPatternOptions()
1358 QRegularExpression::QRegularExpression()
1359 : d(new QRegularExpressionPrivate)
1364 Constructs a QRegularExpression object using the given \a pattern as
1365 pattern and the \a options as the pattern options.
1367 \sa setPattern(), setPatternOptions()
1369 QRegularExpression::QRegularExpression(const QString &pattern, PatternOptions options)
1370 : d(new QRegularExpressionPrivate)
1372 d->pattern = pattern;
1373 d->patternOptions = options;
1377 Constructs a QRegularExpression object as a copy of \a re.
1381 QRegularExpression::QRegularExpression(const QRegularExpression &re)
1387 Destroys the QRegularExpression object.
1389 QRegularExpression::~QRegularExpression()
1394 Assigns the regular expression \a re to this object, and returns a reference
1395 to the copy. Both the pattern and the pattern options are copied.
1397 QRegularExpression &QRegularExpression::operator=(const QRegularExpression &re)
1404 \fn void QRegularExpression::swap(QRegularExpression &other)
1406 Swaps the regular expression \a other with this regular expression. This
1407 operation is very fast and never fails.
1411 Returns the pattern string of the regular expression.
1413 \sa setPattern(), patternOptions()
1415 QString QRegularExpression::pattern() const
1421 Sets the pattern string of the regular expression to \a pattern. The
1422 pattern options are left unchanged.
1424 \sa pattern(), setPatternOptions()
1426 void QRegularExpression::setPattern(const QString &pattern)
1430 d->pattern = pattern;
1434 Returns the pattern options for the regular expression.
1436 \sa setPatternOptions(), pattern()
1438 QRegularExpression::PatternOptions QRegularExpression::patternOptions() const
1440 return d->patternOptions;
1444 Sets the given \a options as the pattern options of the regular expression.
1445 The pattern string is left unchanged.
1447 \sa patternOptions(), setPattern()
1449 void QRegularExpression::setPatternOptions(PatternOptions options)
1453 d->patternOptions = options;
1457 Returns the number of capturing groups inside the pattern string,
1458 or -1 if the regular expression is not valid.
1462 int QRegularExpression::captureCount() const
1464 if (!isValid()) // will compile the pattern
1466 return d->capturingCount;
1470 Returns true if the regular expression is a valid regular expression (that
1471 is, it contains no syntax errors, etc.), or false otherwise. Use
1472 errorString() to obtain a textual description of the error.
1474 \sa errorString(), patternErrorOffset()
1476 bool QRegularExpression::isValid() const
1478 d.data()->compilePattern();
1479 return d->compiledPattern;
1483 Returns a textual description of the error found when checking the validity
1484 of the regular expression, or "no error" if no error was found.
1486 \sa isValid(), patternErrorOffset()
1488 QString QRegularExpression::errorString() const
1490 d.data()->compilePattern();
1492 return QCoreApplication::translate("QRegularExpression", d->errorString, 0, QCoreApplication::UnicodeUTF8);
1493 return QCoreApplication::translate("QRegularExpression", "no error", 0, QCoreApplication::UnicodeUTF8);
1497 Returns the offset, inside the pattern string, at which an error was found
1498 when checking the validity of the regular expression. If no error was
1499 found, then -1 is returned.
1501 \sa pattern(), isValid(), errorString()
1503 int QRegularExpression::patternErrorOffset() const
1505 d.data()->compilePattern();
1506 return d->errorOffset;
1510 Attempts to match the regular expression against the given \a subject
1511 string, starting at the position \a offset inside the subject, using a
1512 match of type \a matchType and honoring the given \a matchOptions.
1514 The returned QRegularExpressionMatch object contains the results of the
1517 \sa QRegularExpressionMatch, {normal matching}
1519 QRegularExpressionMatch QRegularExpression::match(const QString &subject,
1521 MatchType matchType,
1522 MatchOptions matchOptions) const
1524 d.data()->compilePattern();
1526 QRegularExpressionMatchPrivate *priv = d->doMatch(subject, offset, matchType, matchOptions);
1527 return QRegularExpressionMatch(*priv);
1531 Attempts to perform a global match of the regular expression against the
1532 given \a subject string, starting at the position \a offset inside the
1533 subject, using a match of type \a matchType and honoring the given \a
1536 The returned QRegularExpressionMatchIterator is positioned before the
1537 first match result (if any).
1539 \sa QRegularExpressionMatchIterator, {global matching}
1541 QRegularExpressionMatchIterator QRegularExpression::globalMatch(const QString &subject,
1543 MatchType matchType,
1544 MatchOptions matchOptions) const
1546 QRegularExpressionMatchIteratorPrivate *priv =
1547 new QRegularExpressionMatchIteratorPrivate(*this,
1550 match(subject, offset, matchType, matchOptions));
1552 return QRegularExpressionMatchIterator(*priv);
1556 Returns true if the regular expression is equal to \a re, or false
1557 otherwise. Two QRegularExpression objects are equal if they have
1558 the same pattern string and the same pattern options.
1562 bool QRegularExpression::operator==(const QRegularExpression &re) const
1564 return (d == re.d) ||
1565 (d->pattern == re.d->pattern && d->patternOptions == re.d->patternOptions);
1569 \fn bool QRegularExpression::operator!=(const QRegularExpression &re) const
1571 Returns true if the regular expression is different from \a re, or
1578 Escapes all characters of \a str so that they no longer have any special
1579 meaning when used as a regular expression pattern string, and returns
1580 the escaped string. For instance:
1582 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 26
1584 This is very convenient in order to build patterns from arbitrary strings:
1586 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 27
1588 \note This function implements Perl's quotemeta algorithm and escapes with
1589 a backslash all characters in \a str, except for the characters in the
1590 \c{[A-Z]}, \c{[a-z]} and \c{[0-9]} ranges, as well as the underscore
1591 (\c{_}) character. The only difference with Perl is that a literal NUL
1592 inside \a str is escaped with the sequence \c{"\\0"} (backslash +
1593 \c{'0'}), instead of \c{"\\\0"} (backslash + \c{NUL}).
1595 QString QRegularExpression::escape(const QString &str)
1598 const int count = str.size();
1599 result.reserve(count * 2);
1601 // everything but [a-zA-Z0-9_] gets escaped,
1602 // cf. perldoc -f quotemeta
1603 for (int i = 0; i < count; ++i) {
1604 const QChar current = str.at(i);
1606 if (current == QChar::Null) {
1607 // unlike Perl, a literal NUL must be escaped with
1608 // "\\0" (backslash + 0) and not "\\\0" (backslash + NUL),
1609 // because pcre16_compile uses a NUL-terminated string
1610 result.append(QLatin1Char('\\'));
1611 result.append(QLatin1Char('0'));
1612 } else if ( (current < QLatin1Char('a') || current > QLatin1Char('z')) &&
1613 (current < QLatin1Char('A') || current > QLatin1Char('Z')) &&
1614 (current < QLatin1Char('0') || current > QLatin1Char('9')) &&
1615 current != QLatin1Char('_') )
1617 result.append(QLatin1Char('\\'));
1618 result.append(current);
1619 if (current.isHighSurrogate() && i < (count - 1))
1620 result.append(str.at(++i));
1622 result.append(current);
1631 Destroys the match result.
1633 QRegularExpressionMatch::~QRegularExpressionMatch()
1638 Constructs a match result by copying the result of the given \a match.
1642 QRegularExpressionMatch::QRegularExpressionMatch(const QRegularExpressionMatch &match)
1648 Assigns the match result \a match to this object, and returns a reference
1651 QRegularExpressionMatch &QRegularExpressionMatch::operator=(const QRegularExpressionMatch &match)
1658 \fn void QRegularExpressionMatch::swap(QRegularExpressionMatch &other)
1660 Swaps the match result \a other with this match result. This
1661 operation is very fast and never fails.
1667 QRegularExpressionMatch::QRegularExpressionMatch(QRegularExpressionMatchPrivate &dd)
1673 Returns the QRegularExpression object whose match() function returned this
1676 \sa QRegularExpression::match(), matchType(), matchOptions()
1678 QRegularExpression QRegularExpressionMatch::regularExpression() const
1680 return d->regularExpression;
1685 Returns the match type that was used to get this QRegularExpressionMatch
1686 object, that is, the match type that was passed to
1687 QRegularExpression::match() or QRegularExpression::globalMatch().
1689 \sa QRegularExpression::match(), regularExpression(), matchOptions()
1691 QRegularExpression::MatchType QRegularExpressionMatch::matchType() const
1693 return d->matchType;
1697 Returns the match options that were used to get this
1698 QRegularExpressionMatch object, that is, the match options that were passed
1699 to QRegularExpression::match() or QRegularExpression::globalMatch().
1701 \sa QRegularExpression::match(), regularExpression(), matchType()
1703 QRegularExpression::MatchOptions QRegularExpressionMatch::matchOptions() const
1705 return d->matchOptions;
1709 Returns the index of the last capturing group that captured something,
1710 including the implicit capturing group 0. This can be used to extract all
1711 the substrings that were captured:
1713 \snippet doc/src/snippets/code/src_corelib_tools_qregularexpression.cpp 28
1715 Note that some of the capturing groups with an index less than
1716 lastCapturedIndex() could have not matched, and therefore captured nothing.
1718 If the regular expression did not match, this function returns -1.
1720 \sa captured(), capturedStart(), capturedEnd(), capturedLength()
1722 int QRegularExpressionMatch::lastCapturedIndex() const
1724 return d->capturedCount - 1;
1728 Returns the substring captured by the \a nth capturing group. If the \a nth
1729 capturing group did not capture a string or doesn't exist, returns a null
1732 \sa capturedRef(), lastCapturedIndex(), capturedStart(), capturedEnd(),
1733 capturedLength(), QString::isNull()
1735 QString QRegularExpressionMatch::captured(int nth) const
1737 if (nth < 0 || nth > lastCapturedIndex())
1740 int start = capturedStart(nth);
1742 if (start == -1) // didn't capture
1745 return d->subject.mid(start, capturedLength(nth));
1749 Returns a reference to the substring captured by the \a nth capturing group.
1750 If the \a nth capturing group did not capture a string or doesn't exist,
1751 returns a null QStringRef.
1753 \sa captured(), lastCapturedIndex(), capturedStart(), capturedEnd(),
1754 capturedLength(), QStringRef::isNull()
1756 QStringRef QRegularExpressionMatch::capturedRef(int nth) const
1758 if (nth < 0 || nth > lastCapturedIndex())
1759 return QStringRef();
1761 int start = capturedStart(nth);
1763 if (start == -1) // didn't capture
1764 return QStringRef();
1766 return d->subject.midRef(start, capturedLength(nth));
1770 Returns the substring captured by the capturing group named \a name. If the
1771 capturing group named \a name did not capture a string or doesn't exist,
1772 returns a null QString.
1774 \sa capturedRef(), capturedStart(), capturedEnd(), capturedLength(),
1777 QString QRegularExpressionMatch::captured(const QString &name) const
1779 if (name.isEmpty()) {
1780 qWarning("QRegularExpressionMatch::captured: empty capturing group name passed");
1783 int nth = d->regularExpression.d->captureIndexForName(name);
1786 return captured(nth);
1790 Returns a reference to the string captured by the capturing group named \a
1791 name. If the capturing group named \a name did not capture a string or
1792 doesn't exist, returns a null QStringRef.
1794 \sa captured(), capturedStart(), capturedEnd(), capturedLength(),
1795 QStringRef::isNull()
1797 QStringRef QRegularExpressionMatch::capturedRef(const QString &name) const
1799 if (name.isEmpty()) {
1800 qWarning("QRegularExpressionMatch::capturedRef: empty capturing group name passed");
1801 return QStringRef();
1803 int nth = d->regularExpression.d->captureIndexForName(name);
1805 return QStringRef();
1806 return capturedRef(nth);
1810 Returns a list of all strings captured by capturing groups, in the order
1811 the groups themselves appear in the pattern string.
1813 QStringList QRegularExpressionMatch::capturedTexts() const
1816 for (int i = 0; i <= lastCapturedIndex(); ++i)
1817 texts << captured(i);
1822 Returns the offset inside the subject string corresponding to the
1823 starting position of the substring captured by the \a nth capturing group.
1824 If the \a nth capturing group did not capture a string or doesn't exist,
1827 \sa capturedEnd(), capturedLength(), captured()
1829 int QRegularExpressionMatch::capturedStart(int nth) const
1831 if (nth < 0 || nth > lastCapturedIndex())
1834 return d->capturedOffsets.at(nth * 2);
1838 Returns the length of the substring captured by the \a nth capturing group.
1840 \note This function returns 0 if the \a nth capturing group did not capture
1841 a string or doesn't exist.
1843 \sa capturedStart(), capturedEnd(), captured()
1845 int QRegularExpressionMatch::capturedLength(int nth) const
1847 // bound checking performed by these two functions
1848 return capturedEnd(nth) - capturedStart(nth);
1852 Returns the offset inside the subject string immediately after the ending
1853 position of the substring captured by the \a nth capturing group. If the \a
1854 nth capturing group did not capture a string or doesn't exist, returns -1.
1856 \sa capturedStart(), capturedLength(), captured()
1858 int QRegularExpressionMatch::capturedEnd(int nth) const
1860 if (nth < 0 || nth > lastCapturedIndex())
1863 return d->capturedOffsets.at(nth * 2 + 1);
1867 Returns the offset inside the subject string corresponding to the starting
1868 position of the substring captured by the capturing group named \a name.
1869 If the capturing group named \a name did not capture a string or doesn't
1872 \sa capturedEnd(), capturedLength(), captured()
1874 int QRegularExpressionMatch::capturedStart(const QString &name) const
1876 if (name.isEmpty()) {
1877 qWarning("QRegularExpressionMatch::capturedStart: empty capturing group name passed");
1880 int nth = d->regularExpression.d->captureIndexForName(name);
1883 return capturedStart(nth);
1887 Returns the offset inside the subject string corresponding to the starting
1888 position of the substring captured by the capturing group named \a name.
1890 \note This function returns 0 if the capturing group named \a name did not
1891 capture a string or doesn't exist.
1893 \sa capturedStart(), capturedEnd(), captured()
1895 int QRegularExpressionMatch::capturedLength(const QString &name) const
1897 if (name.isEmpty()) {
1898 qWarning("QRegularExpressionMatch::capturedLength: empty capturing group name passed");
1901 int nth = d->regularExpression.d->captureIndexForName(name);
1904 return capturedLength(nth);
1908 Returns the offset inside the subject string immediately after the ending
1909 position of the substring captured by the capturing group named \a name. If
1910 the capturing group named \a name did not capture a string or doesn't
1913 \sa capturedStart(), capturedLength(), captured()
1915 int QRegularExpressionMatch::capturedEnd(const QString &name) const
1917 if (name.isEmpty()) {
1918 qWarning("QRegularExpressionMatch::capturedEnd: empty capturing group name passed");
1921 int nth = d->regularExpression.d->captureIndexForName(name);
1924 return capturedEnd(nth);
1928 Returns true if the regular expression matched against the subject string,
1931 \sa QRegularExpression::match(), hasPartialMatch()
1933 bool QRegularExpressionMatch::hasMatch() const
1939 Returns true if the regular expression partially matched against the
1940 subject string, or false otherwise.
1942 \note Only a match that explicitely used the one of the partial match types
1943 can yield a partial match. Still, if such a match succeeds totally, this
1944 function will return false, while hasMatch() will return true.
1946 \sa QRegularExpression::match(), QRegularExpression::MatchType, hasMatch()
1948 bool QRegularExpressionMatch::hasPartialMatch() const
1950 return d->hasPartialMatch;
1954 Returns true if the match object was obtained as a result from the
1955 QRegularExpression::match() function invoked on a valid QRegularExpression
1956 object; returns false if the QRegularExpression was invalid.
1958 \sa QRegularExpression::match(), QRegularExpression::isValid()
1960 bool QRegularExpressionMatch::isValid() const
1968 QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(QRegularExpressionMatchIteratorPrivate &dd)
1974 Destroys the QRegularExpressionMatchIterator object.
1976 QRegularExpressionMatchIterator::~QRegularExpressionMatchIterator()
1981 Constructs a QRegularExpressionMatchIterator object as a copy of \a
1986 QRegularExpressionMatchIterator::QRegularExpressionMatchIterator(const QRegularExpressionMatchIterator &iterator)
1992 Assigns the iterator \a iterator to this object, and returns a reference to
1995 QRegularExpressionMatchIterator &QRegularExpressionMatchIterator::operator=(const QRegularExpressionMatchIterator &iterator)
2002 \fn void QRegularExpressionMatchIterator::swap(QRegularExpressionMatchIterator &other)
2004 Swaps the iterator \a other with this iterator object. This operation is
2005 very fast and never fails.
2009 Returns true if the iterator object was obtained as a result from the
2010 QRegularExpression::globalMatch() function invoked on a valid
2011 QRegularExpression object; returns false if the QRegularExpression was
2014 \sa QRegularExpression::globalMatch(), QRegularExpression::isValid()
2016 bool QRegularExpressionMatchIterator::isValid() const
2018 return d->next.isValid();
2022 Returns true if there is at least one match result ahead of the iterator;
2023 otherwise it returns false.
2027 bool QRegularExpressionMatchIterator::hasNext() const
2029 return d->hasNext();
2033 Returns the next match result without moving the iterator.
2035 \note Calling this function when the iterator is at the end of the result
2036 set leads to undefined results.
2038 QRegularExpressionMatch QRegularExpressionMatchIterator::peekNext() const
2041 qWarning("QRegularExpressionMatchIterator::peekNext() called on an iterator already at end");
2047 Returns the next match result and advances the iterator by one position.
2049 \note Calling this function when the iterator is at the end of the result
2050 set leads to undefined results.
2052 QRegularExpressionMatch QRegularExpressionMatchIterator::next()
2055 qWarning("QRegularExpressionMatchIterator::next() called on an iterator already at end");
2059 QRegularExpressionMatch current = d->next;
2060 d->next = d->next.d.constData()->nextMatch();
2065 Returns the QRegularExpression object whose globalMatch() function returned
2068 \sa QRegularExpression::globalMatch(), matchType(), matchOptions()
2070 QRegularExpression QRegularExpressionMatchIterator::regularExpression() const
2072 return d->regularExpression;
2076 Returns the match type that was used to get this
2077 QRegularExpressionMatchIterator object, that is, the match type that was
2078 passed to QRegularExpression::globalMatch().
2080 \sa QRegularExpression::globalMatch(), regularExpression(), matchOptions()
2082 QRegularExpression::MatchType QRegularExpressionMatchIterator::matchType() const
2084 return d->matchType;
2088 Returns the match options that were used to get this
2089 QRegularExpressionMatchIterator object, that is, the match options that
2090 were passed to QRegularExpression::globalMatch().
2092 \sa QRegularExpression::globalMatch(), regularExpression(), matchType()
2094 QRegularExpression::MatchOptions QRegularExpressionMatchIterator::matchOptions() const
2096 return d->matchOptions;
2099 #ifndef QT_NO_DATASTREAM
2101 \relates QRegularExpression
2103 Writes the regular expression \a re to stream \a out.
2105 \sa {Serializing Qt Data Types}
2107 QDataStream &operator<<(QDataStream &out, const QRegularExpression &re)
2109 out << re.pattern() << quint32(re.patternOptions());
2114 \relates QRegularExpression
2116 Reads a regular expression from stream \a in into \a re.
2118 \sa {Serializing Qt Data Types}
2120 QDataStream &operator>>(QDataStream &in, QRegularExpression &re)
2123 quint32 patternOptions;
2124 in >> pattern >> patternOptions;
2125 re.setPattern(pattern);
2126 re.setPatternOptions(QRegularExpression::PatternOptions(patternOptions));
2131 #ifndef QT_NO_DEBUG_STREAM
2133 \relates QRegularExpression
2135 Writes the regular expression \a re into the debug object \a debug for
2138 \sa {Debugging Techniques}
2140 QDebug operator<<(QDebug debug, const QRegularExpression &re)
2142 debug.nospace() << "QRegularExpression(" << re.pattern() << ", " << re.patternOptions() << ")";
2143 return debug.space();
2147 \relates QRegularExpression
2149 Writes the pattern options \a patternOptions into the debug object \a debug
2150 for debugging purposes.
2152 \sa {Debugging Techniques}
2154 QDebug operator<<(QDebug debug, QRegularExpression::PatternOptions patternOptions)
2158 if (patternOptions == QRegularExpression::NoPatternOption) {
2159 flags = "NoPatternOption";
2161 flags.reserve(200); // worst case...
2162 if (patternOptions & QRegularExpression::CaseInsensitiveOption)
2163 flags.append("CaseInsensitiveOption|");
2164 if (patternOptions & QRegularExpression::DotMatchesEverythingOption)
2165 flags.append("DotMatchesEverythingOption|");
2166 if (patternOptions & QRegularExpression::MultilineOption)
2167 flags.append("MultilineOption|");
2168 if (patternOptions & QRegularExpression::ExtendedPatternSyntaxOption)
2169 flags.append("ExtendedPatternSyntaxOption|");
2170 if (patternOptions & QRegularExpression::InvertedGreedinessOption)
2171 flags.append("InvertedGreedinessOption|");
2172 if (patternOptions & QRegularExpression::DontCaptureOption)
2173 flags.append("DontCaptureOption|");
2174 if (patternOptions & QRegularExpression::UseUnicodePropertiesOption)
2175 flags.append("UseUnicodePropertiesOption|");
2179 debug.nospace() << "QRegularExpression::PatternOptions(" << flags << ")";
2181 return debug.space();
2184 \relates QRegularExpressionMatch
2186 Writes the match object \a match into the debug object \a debug for
2189 \sa {Debugging Techniques}
2191 QDebug operator<<(QDebug debug, const QRegularExpressionMatch &match)
2193 debug.nospace() << "QRegularExpressionMatch(";
2195 if (!match.isValid()) {
2196 debug << "Invalid)";
2197 return debug.space();
2202 if (match.hasMatch()) {
2203 debug << ", has match: ";
2204 for (int i = 0; i <= match.lastCapturedIndex(); ++i) {
2206 << ":(" << match.capturedStart(i) << ", " << match.capturedEnd(i)
2207 << ", " << match.captured(i) << ")";
2208 if (i < match.lastCapturedIndex())
2211 } else if (match.hasPartialMatch()) {
2212 debug << ", has partial match: ("
2213 << match.capturedStart(0) << ", "
2214 << match.capturedEnd(0) << ", "
2215 << match.captured(0) << ")";
2217 debug << ", no match";
2222 return debug.space();
2226 // fool lupdate: make it extract those strings for translation, but don't put them
2227 // inside Qt -- they're already inside libpcre (cf. man 3 pcreapi, pcre_compile.c).
2230 /* PCRE is a library of functions to support regular expressions whose syntax
2231 and semantics are as close as possible to those of the Perl 5 language.
2233 Written by Philip Hazel
2234 Copyright (c) 1997-2012 University of Cambridge
2236 -----------------------------------------------------------------------------
2237 Redistribution and use in source and binary forms, with or without
2238 modification, are permitted provided that the following conditions are met:
2240 * Redistributions of source code must retain the above copyright notice,
2241 this list of conditions and the following disclaimer.
2243 * Redistributions in binary form must reproduce the above copyright
2244 notice, this list of conditions and the following disclaimer in the
2245 documentation and/or other materials provided with the distribution.
2247 * Neither the name of the University of Cambridge nor the names of its
2248 contributors may be used to endorse or promote products derived from
2249 this software without specific prior written permission.
2251 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
2252 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
2253 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
2254 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
2255 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
2256 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
2257 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
2258 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
2259 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
2260 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
2261 POSSIBILITY OF SUCH DAMAGE.
2262 -----------------------------------------------------------------------------
2265 static const char *pcreCompileErrorCodes[] =
2267 QT_TRANSLATE_NOOP("QRegularExpression", "no error"),
2268 QT_TRANSLATE_NOOP("QRegularExpression", "\\ at end of pattern"),
2269 QT_TRANSLATE_NOOP("QRegularExpression", "\\c at end of pattern"),
2270 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character follows \\"),
2271 QT_TRANSLATE_NOOP("QRegularExpression", "numbers out of order in {} quantifier"),
2272 QT_TRANSLATE_NOOP("QRegularExpression", "number too big in {} quantifier"),
2273 QT_TRANSLATE_NOOP("QRegularExpression", "missing terminating ] for character class"),
2274 QT_TRANSLATE_NOOP("QRegularExpression", "invalid escape sequence in character class"),
2275 QT_TRANSLATE_NOOP("QRegularExpression", "range out of order in character class"),
2276 QT_TRANSLATE_NOOP("QRegularExpression", "nothing to repeat"),
2277 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unexpected repeat"),
2278 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character after (? or (?-"),
2279 QT_TRANSLATE_NOOP("QRegularExpression", "POSIX named classes are supported only within a class"),
2280 QT_TRANSLATE_NOOP("QRegularExpression", "missing )"),
2281 QT_TRANSLATE_NOOP("QRegularExpression", "reference to non-existent subpattern"),
2282 QT_TRANSLATE_NOOP("QRegularExpression", "erroffset passed as NULL"),
2283 QT_TRANSLATE_NOOP("QRegularExpression", "unknown option bit(s) set"),
2284 QT_TRANSLATE_NOOP("QRegularExpression", "missing ) after comment"),
2285 QT_TRANSLATE_NOOP("QRegularExpression", "regular expression is too large"),
2286 QT_TRANSLATE_NOOP("QRegularExpression", "failed to get memory"),
2287 QT_TRANSLATE_NOOP("QRegularExpression", "unmatched parentheses"),
2288 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: code overflow"),
2289 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character after (?<"),
2290 QT_TRANSLATE_NOOP("QRegularExpression", "lookbehind assertion is not fixed length"),
2291 QT_TRANSLATE_NOOP("QRegularExpression", "malformed number or name after (?("),
2292 QT_TRANSLATE_NOOP("QRegularExpression", "conditional group contains more than two branches"),
2293 QT_TRANSLATE_NOOP("QRegularExpression", "assertion expected after (?("),
2294 QT_TRANSLATE_NOOP("QRegularExpression", "(?R or (?[+-]digits must be followed by )"),
2295 QT_TRANSLATE_NOOP("QRegularExpression", "unknown POSIX class name"),
2296 QT_TRANSLATE_NOOP("QRegularExpression", "POSIX collating elements are not supported"),
2297 QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE is not compiled with PCRE_UTF8 support"),
2298 QT_TRANSLATE_NOOP("QRegularExpression", "character value in \\x{...} sequence is too large"),
2299 QT_TRANSLATE_NOOP("QRegularExpression", "invalid condition (?(0)"),
2300 QT_TRANSLATE_NOOP("QRegularExpression", "\\C not allowed in lookbehind assertion"),
2301 QT_TRANSLATE_NOOP("QRegularExpression", "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u"),
2302 QT_TRANSLATE_NOOP("QRegularExpression", "number after (?C is > 255"),
2303 QT_TRANSLATE_NOOP("QRegularExpression", "closing ) for (?C expected"),
2304 QT_TRANSLATE_NOOP("QRegularExpression", "recursive call could loop indefinitely"),
2305 QT_TRANSLATE_NOOP("QRegularExpression", "unrecognized character after (?P"),
2306 QT_TRANSLATE_NOOP("QRegularExpression", "syntax error in subpattern name (missing terminator)"),
2307 QT_TRANSLATE_NOOP("QRegularExpression", "two named subpatterns have the same name"),
2308 QT_TRANSLATE_NOOP("QRegularExpression", "invalid UTF-8 string"),
2309 QT_TRANSLATE_NOOP("QRegularExpression", "support for \\P, \\p, and \\X has not been compiled"),
2310 QT_TRANSLATE_NOOP("QRegularExpression", "malformed \\P or \\p sequence"),
2311 QT_TRANSLATE_NOOP("QRegularExpression", "unknown property name after \\P or \\p"),
2312 QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name is too long (maximum 32 characters)"),
2313 QT_TRANSLATE_NOOP("QRegularExpression", "too many named subpatterns (maximum 10000)"),
2314 QT_TRANSLATE_NOOP("QRegularExpression", "octal value is greater than \\377 (not in UTF-8 mode)"),
2315 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: overran compiling workspace"),
2316 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: previously-checked referenced subpattern not found"),
2317 QT_TRANSLATE_NOOP("QRegularExpression", "DEFINE group contains more than one branch"),
2318 QT_TRANSLATE_NOOP("QRegularExpression", "repeating a DEFINE group is not allowed"),
2319 QT_TRANSLATE_NOOP("QRegularExpression", "inconsistent NEWLINE options"),
2320 QT_TRANSLATE_NOOP("QRegularExpression", "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number"),
2321 QT_TRANSLATE_NOOP("QRegularExpression", "a numbered reference must not be zero"),
2322 QT_TRANSLATE_NOOP("QRegularExpression", "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)"),
2323 QT_TRANSLATE_NOOP("QRegularExpression", "(*VERB) not recognized"),
2324 QT_TRANSLATE_NOOP("QRegularExpression", "number is too big"),
2325 QT_TRANSLATE_NOOP("QRegularExpression", "subpattern name expected"),
2326 QT_TRANSLATE_NOOP("QRegularExpression", "digit expected after (?+"),
2327 QT_TRANSLATE_NOOP("QRegularExpression", "] is an invalid data character in JavaScript compatibility mode"),
2328 QT_TRANSLATE_NOOP("QRegularExpression", "different names for subpatterns of the same number are not allowed"),
2329 QT_TRANSLATE_NOOP("QRegularExpression", "(*MARK) must have an argument"),
2330 QT_TRANSLATE_NOOP("QRegularExpression", "this version of PCRE is not compiled with PCRE_UCP support"),
2331 QT_TRANSLATE_NOOP("QRegularExpression", "\\c must be followed by an ASCII character"),
2332 QT_TRANSLATE_NOOP("QRegularExpression", "\\k is not followed by a braced, angle-bracketed, or quoted name"),
2333 QT_TRANSLATE_NOOP("QRegularExpression", "internal error: unknown opcode in find_fixedlength()"),
2334 QT_TRANSLATE_NOOP("QRegularExpression", "\\N is not supported in a class"),
2335 QT_TRANSLATE_NOOP("QRegularExpression", "too many forward references"),
2336 QT_TRANSLATE_NOOP("QRegularExpression", "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)"),
2337 QT_TRANSLATE_NOOP("QRegularExpression", "invalid UTF-16 string")