#if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
ERs |I32 |regmatch |NN regmatch_info *reginfo|NN regnode *prog
-ERs |I32 |regrepeat |NN const regexp *prog|NN const regnode *p|I32 max
+ERs |I32 |regrepeat |NN const regexp *prog|NN const regnode *p|I32 max|int depth
ERs |I32 |regtry |NN regmatch_info *reginfo|NN char **startpos
ERs |bool |reginclass |NULLOK const regexp *prog|NN const regnode *n|NN const U8 *p|NULLOK STRLEN *lenp\
|bool do_utf8sv_is_utf8
#if defined(PERL_IN_REGEXEC_C) || defined(PERL_DECL_PROT)
#if defined(PERL_CORE) || defined(PERL_EXT)
#define regmatch(a,b) S_regmatch(aTHX_ a,b)
-#define regrepeat(a,b,c) S_regrepeat(aTHX_ a,b,c)
+#define regrepeat(a,b,c,d) S_regrepeat(aTHX_ a,b,c,d)
#define regtry(a,b) S_regtry(aTHX_ a,b)
#define reginclass(a,b,c,d,e) S_reginclass(aTHX_ a,b,c,d,e)
#define regcppush(a) S_regcppush(aTHX_ a)
#Guessed: match at offset 0
#%MATCHED%
#Freeing REx: "[q]"
-Got 100 bytes for offset annotations.
-Offsets: [12]
+Got 108 bytes for offset annotations.
+Offsets: [13]
1:1[3] 3:4[0]
%MATCHED%
=item Backtracking control verbs
The regex engine now supports a number of special purpose backtrack
-control verbs: (?COMMIT), (?CUT), (?ERROR) and (?FAIL). See L<perlre>
-for their descriptions.
+control verbs: (*COMMIT), (*MARK), (*CUT), (*ERROR), (*FAIL) and
+(*ACCEPT). See L<perlre> for their descriptions.
=back
Note that if you want to enable a warnings category registered by a module
(e.g. C<use warnings 'File::Find'>), you must have imported this module
+
+=item Unknown verb pattern '%s' in regex; marked by <-- HERE in m/%s/
+
+(F) You either made a typo or have incorrectly put a C<*> quantifier
+after an open brace in your pattern. Check the pattern and review
+L<perlre> for details on legal verb patterns.
+
first.
=item unmatched [ in regex; marked by <-- HERE in m/%s/
compressed integer format and could not be converted to an integer.
See L<perlfunc/pack>.
+=item Unterminated verb pattern in regex; marked by <-- HERE in m/%s/
+
+(F) You used a pattern of the form C<(*VERB)> but did not terminate
+the pattern with a C<)>. Fix the pattern and retry.
+
+=item Unterminated verb pattern argument in regex; marked by <-- HERE in m/%s/
+
+(F) You used a pattern of the form C<(*VERB:ARG)> but did not terminate
+the pattern with a C<)>. Fix the pattern and retry.
+
+
=item Unterminated <> operator
(F) The lexer saw a left angle bracket in a place where it was expecting
reference variables in outer subroutines are created, they
are automatically rebound to the current values of such variables.
+=item Verb pattern '%s' has a mandatory argument in regex; marked by <-- HERE in m/%s/
+
+(F) You used a verb pattern that requires an argument. Supply an argument
+or check that you are using the right verb.
+
+=item Verb pattern '%s' may not have an argument in regex; marked by <-- HERE in m/%s/
+
+(F) You used a verb pattern that is not allowed an argument. Remove the
+argument or check that you are using the right verb.
+
=item Version number must be a constant number
(P) The attempt to translate a C<use Module n.n LIST> statement into
It is an error to refer to a name that is not declared somewhere in the
pattern.
-=item C<(?FAIL)> C<(?F)>
-X<(?FAIL)> X<(?F)>
+=item C<(?(condition)yes-pattern|no-pattern)>
+X<(?()>
-This pattern matches nothing and always fails. It can be used to force the
-engine to backtrack. It is equivalent to C<(?!)>, but easier to read. In
-fact, C<(?!)> gets optimised into C<(?FAIL)> internally.
+=item C<(?(condition)yes-pattern)>
-It is probably useful only when combined with C<(?{})> or C<(??{})>.
+Conditional expression. C<(condition)> should be either an integer in
+parentheses (which is valid if the corresponding pair of parentheses
+matched), a look-ahead/look-behind/evaluate zero-width assertion, a
+name in angle brackets or single quotes (which is valid if a buffer
+with the given name matched), or the special symbol (R) (true when
+evaluated inside of recursion or eval). Additionally the R may be
+followed by a number, (which will be true when evaluated when recursing
+inside of the appropriate group), or by C<&NAME>, in which case it will
+be true only when evaluated during recursion in the named group.
+
+Here's a summary of the possible predicates:
+
+=over 4
+
+=item (1) (2) ...
+
+Checks if the numbered capturing buffer has matched something.
+
+=item (<NAME>) ('NAME')
+
+Checks if a buffer with the given name has matched something.
+
+=item (?{ CODE })
+
+Treats the code block as the condition.
+
+=item (R)
+
+Checks if the expression has been evaluated inside of recursion.
+
+=item (R1) (R2) ...
+
+Checks if the expression has been evaluated while executing directly
+inside of the n-th capture group. This check is the regex equivalent of
+
+ if ((caller(0))[3] eq 'subname') { ... }
+
+In other words, it does not check the full recursion stack.
+
+=item (R&NAME)
+
+Similar to C<(R1)>, this predicate checks to see if we're executing
+directly inside of the leftmost group with a given name (this is the same
+logic used by C<(?&NAME)> to disambiguate). It does not check the full
+stack, but only the name of the innermost active recursion.
+
+=item (DEFINE)
+
+In this case, the yes-pattern is never directly executed, and no
+no-pattern is allowed. Similar in spirit to C<(?{0})> but more efficient.
+See below for details.
+
+=back
+
+For example:
+
+ m{ ( \( )?
+ [^()]+
+ (?(1) \) )
+ }x
+
+matches a chunk of non-parentheses, possibly included in parentheses
+themselves.
+
+A special form is the C<(DEFINE)> predicate, which never executes directly
+its yes-pattern, and does not allow a no-pattern. This allows to define
+subpatterns which will be executed only by using the recursion mechanism.
+This way, you can define a set of regular expression rules that can be
+bundled into any pattern you choose.
+
+It is recommended that for this usage you put the DEFINE block at the
+end of the pattern, and that you name any subpatterns defined within it.
+
+Also, it's worth noting that patterns defined this way probably will
+not be as efficient, as the optimiser is not very clever about
+handling them.
+
+An example of how this might be used is as follows:
+
+ /(?<NAME>(&NAME_PAT))(?<ADDR>(&ADDRESS_PAT))
+ (?(DEFINE)
+ (<NAME_PAT>....)
+ (<ADRESS_PAT>....)
+ )/x
+
+Note that capture buffers matched inside of recursion are not accessible
+after the recursion returns, so the extra layer of capturing buffers are
+necessary. Thus C<$+{NAME_PAT}> would not be defined even though
+C<$+{NAME}> would be.
=item C<< (?>pattern) >>
X<backtrack> X<backtracking> X<atomic> X<possessive>
Consider this pattern:
m{ \(
- (
- [^()]+ # x+
- |
+ (
+ [^()]+ # x+
+ |
\( [^()]* \)
)+
- \)
+ \)
}x
That will efficiently match a nonempty group with matching parentheses
exponential performance will make it appear that your program has
hung. However, a tiny change to this pattern
- m{ \(
- (
- (?> [^()]+ ) # change x+ above to (?> x+ )
- |
+ m{ \(
+ (
+ (?> [^()]+ ) # change x+ above to (?> x+ )
+ |
\( [^()]* \)
)+
- \)
+ \)
}x
which uses C<< (?>...) >> matches exactly when the one above does (verifying
PAT?+ (?>PAT?)
PAT{min,max}+ (?>PAT{min,max})
-=item C<(?COMMIT)>
-X<(?COMMIT)>
+=back
+
+=head2 Special Backtracking Control Verbs
+
+B<WARNING:> These patterns are experimental and subject to change or
+removal in a future version of perl. Their usage in production code should
+be noted to avoid problems during upgrades.
+
+These special patterns are generally of the form C<(*VERB:ARG)>. Unless
+otherwise stated the ARG argument is optional; in some cases, it is
+forbidden.
+
+Any pattern containing a special backtracking verb that allows an argument
+has the special behaviour that when executed it sets the current packages'
+C<$REGERROR> variable. In this case, the following rules apply:
+
+On failure, this variable will be set to the ARG value of the verb
+pattern, if the verb was involved in the failure of the match. If the ARG
+part of the pattern was omitted, then C<$REGERROR> will be set to TRUE.
+
+On a successful match this variable will be set to FALSE.
+
+B<NOTE:> C<$REGERROR> is not a magic variable in the same sense than
+C<$1> and most other regex related variables. It is not local to a
+scope, nor readonly but instead a volatile package variable similar to
+C<$AUTOLOAD>. Use C<local> to localize changes to it to a specific scope
+if necessary.
+
+If a pattern does not contain a special backtracking verb that allows an
+argument, then C<$REGERROR> is not touched at all.
+
+=over 4
+
+=item Verbs that take an argument
+
+=over 4
+
+=item C<(*NOMATCH)> C<(*NOMATCH:NAME)>
+X<(*NOMATCH)> X<(*NOMATCH:NAME)>
This zero-width pattern commits the match at the current point, preventing
-the engine from back-tracking on failure to the left of the commit point.
-Consider the pattern C<A (?COMMIT) B>, where A and B are complex patterns.
-Until the C<(?COMMIT)> is reached, A may backtrack as necessary to match.
+the engine from backtracking on failure to the left of the this point.
+Consider the pattern C<A (*NOMATCH) B>, where A and B are complex patterns.
+Until the C<(*NOMATCH)> is reached, A may backtrack as necessary to match.
Once it is reached, matching continues in B, which may also backtrack as
necessary; however, should B not match, then no further backtracking will
take place, and the pattern will fail outright at that starting position.
The following example counts all the possible matching strings in a
pattern (without actually matching any of them).
- 'aaab'=~/a+b?(?{print "$&\n"; $count++})(?FAIL)/;
+ 'aaab' =~ /a+b?(?{print "$&\n"; $count++})(*FAIL)/;
print "Count=$count\n";
which produces:
a
Count=9
-If we add a C<(?COMMIT)> before the count like the following
+If we add a C<(*NOMATCH)> before the count like the following
- 'aaab'=~/a+b?(?COMMIT)(?{print "$&\n"; $count++})(?FAIL)/;
+ 'aaab' =~ /a+b?(*NOMATCH)(?{print "$&\n"; $count++})(*FAIL)/;
print "Count=$count\n";
we prevent backtracking and find the count of the longest matching
ab
Count=3
-Any number of C<(?COMMIT)> assertions may be used in a pattern.
+Any number of C<(*NOMATCH)> assertions may be used in a pattern.
See also C<< (?>pattern) >> and possessive quantifiers for other
ways to control backtracking.
-=item C<(?CUT)>
-X<(?CUT)>
-
-This zero-width pattern is similar to C<(?COMMIT)>, except that on
-failure it also signifies that whatever text that was matched leading
-up to the C<(?CUT)> pattern cannot match, I<even from another
-starting point>.
-
-Compare the following to the examples in C<(?COMMIT)>, note the string
+=item C<(*MARK)> C<(*MARK:NAME)>
+X<(*MARK)>
+
+This zero-width pattern can be used to mark the point in a string
+reached when a certain part of the pattern has been successfully
+matched. This mark may be given a name. A later C<(*CUT)> pattern
+will then cut at that point if backtracked into on failure. Any
+number of (*MARK) patterns are allowed, and the NAME portion is
+optional and may be duplicated.
+
+See C<*CUT> for more detail.
+
+=item C<(*CUT)> C<(*CUT:NAME)>
+X<(*CUT)>
+
+This zero-width pattern is similar to C<(*NOMATCH)>, except that on
+failure it also signifies that whatever text that was matched leading up
+to the C<(*CUT)> pattern being executed cannot be part of a match, I<even
+if started from a later point>. This effectively means that the regex
+engine moves forward to this position on failure and tries to match
+again, (assuming that there is sufficient room to match).
+
+The name of the C<(*CUT:NAME)> pattern has special significance. If a
+C<(*MARK:NAME)> was encountered while matching, then it is the position
+where that pattern was executed that is used for the "cut point" in the
+string. If no mark of that name was encountered, then the cut is done at
+the point where the C<(*CUT)> was. Similarly if no NAME is specified in
+the C<(*CUT)>, and if a C<(*MARK)> with any name (or none) is encountered,
+then that C<(*MARK)>'s cursor point will be used. If the C<(*CUT)> is not
+preceded by a C<(*MARK)>, then the cut point is where the string was when
+the C<(*CUT)> was encountered.
+
+Compare the following to the examples in C<(*NOMATCH)>, note the string
is twice as long:
- 'aaabaaab'=~/a+b?(?CUT)(?{print "$&\n"; $count++})(?FAIL)/;
+ 'aaabaaab' =~ /a+b?(*CUT)(?{print "$&\n"; $count++})(*FAIL)/;
print "Count=$count\n";
outputs
aaab
Count=2
-Once the 'aaab' at the start of the string has matched and the C<(?CUT)>
-executed the next startpoint will be where the cursor was when the
-C<(?CUT)> was executed.
+Once the 'aaab' at the start of the string has matched, and the C<(*CUT)>
+executed, the next startpoint will be where the cursor was when the
+C<(*CUT)> was executed.
-=item C<(?ERROR)>
-X<(?ERROR)>
+=item C<(*COMMIT)>
+X<(*COMMIT)>
-This zero-width pattern is similar to C<(?CUT)> except that it causes
+This zero-width pattern is similar to C<(*CUT)> except that it causes
the match to fail outright. No attempts to match will occur again.
- 'aaabaaab'=~/a+b?(?ERROR)(?{print "$&\n"; $count++})(?FAIL)/;
+ 'aaabaaab' =~ /a+b?(*COMMIT)(?{print "$&\n"; $count++})(*FAIL)/;
print "Count=$count\n";
outputs
aaab
Count=1
-In other words, once the C<(?ERROR)> has been entered and then pattern
-does not match then the regex engine will not try any further matching at
-all on the rest of the string.
-
-=item C<(?(condition)yes-pattern|no-pattern)>
-X<(?()>
-
-=item C<(?(condition)yes-pattern)>
+In other words, once the C<(*COMMIT)> has been entered, and if the pattern
+does not match, the regex engine will not try any further matching on the
+rest of the string.
-Conditional expression. C<(condition)> should be either an integer in
-parentheses (which is valid if the corresponding pair of parentheses
-matched), a look-ahead/look-behind/evaluate zero-width assertion, a
-name in angle brackets or single quotes (which is valid if a buffer
-with the given name matched), the special symbol (R) (true when
-evaluated inside of recursion or eval). Additionally the R may be
-followed by a number, (which will be true when evaluated when recursing
-inside of the appropriate group), or by C<&NAME> in which case it will
-be true only when evaluated during recursion in the named group.
+=back
-Here's a summary of the possible predicates:
+=item Verbs without an argument
=over 4
-=item (1) (2) ...
-
-Checks if the numbered capturing buffer has matched something.
-
-=item (<NAME>) ('NAME')
+=item C<(*FAIL)> C<(*F)>
+X<(*FAIL)> X<(*F)>
-Checks if a buffer with the given name has matched something.
-
-=item (?{ CODE })
-
-Treats the code block as the condition
-
-=item (R)
-
-Checks if the expression has been evaluated inside of recursion.
-
-=item (R1) (R2) ...
+This pattern matches nothing and always fails. It can be used to force the
+engine to backtrack. It is equivalent to C<(?!)>, but easier to read. In
+fact, C<(?!)> gets optimised into C<(*FAIL)> internally.
-Checks if the expression has been evaluated while executing directly
-inside of the n-th capture group. This check is the regex equivalent of
+It is probably useful only when combined with C<(?{})> or C<(??{})>.
- if ((caller(0))[3] eq 'subname') { .. }
+=item C<(*ACCEPT)>
+X<(*ACCEPT)>
-In other words, it does not check the full recursion stack.
+B<WARNING:> This feature is highly experimental. It is not recommended
+for production code.
-=item (R&NAME)
+This pattern matches nothing and causes the end of successful matching at
+the point at which the C<(*ACCEPT)> pattern was encountered, regardless of
+whether there is actually more to match in the string. When inside of a
+nested pattern, such as recursion or a dynamically generated subbpattern
+via C<(??{})>, only the innermost pattern is ended immediately.
-Similar to C<(R1)>, this predicate checks to see if we're executing
-directly inside of the leftmost group with a given name (this is the same
-logic used by C<(?&NAME)> to disambiguate). It does not check the full
-stack, but only the name of the innermost active recursion.
+If the C<(*ACCEPT)> is inside of capturing buffers then the buffers are
+marked as ended at the point at which the C<(*ACCEPT)> was encountered.
+For instance:
-=item (DEFINE)
+ 'AB' =~ /(A (A|B(*ACCEPT)|C) D)(E)/x;
-In this case, the yes-pattern is never directly executed, and no
-no-pattern is allowed. Similar in spirit to C<(?{0})> but more efficient.
-See below for details.
+will match, and C<$1> will be C<AB> and C<$2> will be C<B>, C<$3> will not
+be set. If another branch in the inner parens were matched, such as in the
+string 'ACDE', then the C<D> and C<E> would have to be matched as well.
=back
-For example:
-
- m{ ( \( )?
- [^()]+
- (?(1) \) )
- }x
-
-matches a chunk of non-parentheses, possibly included in parentheses
-themselves.
-
-A special form is the C<(DEFINE)> predicate, which never executes directly
-its yes-pattern, and does not allow a no-pattern. This allows to define
-subpatterns which will be executed only by using the recursion mechanism.
-This way, you can define a set of regular expression rules that can be
-bundled into any pattern you choose.
-
-It is recommended that for this usage you put the DEFINE block at the
-end of the pattern, and that you name any subpatterns defined within it.
-
-Also, it's worth noting that patterns defined this way probably will
-not be as efficient, as the optimiser is not very clever about
-handling them. YMMV.
-
-An example of how this might be used is as follows:
-
- /(?<NAME>(&NAME_PAT))(?<ADDR>(&ADDRESS_PAT))
- (?(DEFINE)
- (<NAME_PAT>....)
- (<ADRESS_PAT>....)
- )/x
-
-Note that capture buffers matched inside of recursion are not accessible
-after the recursion returns, so the extra layer of capturing buffers are
-necessary. Thus C<$+{NAME_PAT}> would not be defined even though
-C<$+{NAME}> would be.
-
=back
=head2 Backtracking
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
-STATIC I32 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
+STATIC I32 S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
__attribute__warn_unused_result__
__attribute__nonnull__(pTHX_1)
__attribute__nonnull__(pTHX_2);
U32 seen;
I32 size; /* Code size. */
I32 npar; /* () count. */
+ I32 nestroot; /* root parens we are in - used by accept */
I32 extralen;
I32 seen_zerolen;
I32 seen_evals;
#define RExC_seen (pRExC_state->seen)
#define RExC_size (pRExC_state->size)
#define RExC_npar (pRExC_state->npar)
+#define RExC_nestroot (pRExC_state->nestroot)
#define RExC_extralen (pRExC_state->extralen)
#define RExC_seen_zerolen (pRExC_state->seen_zerolen)
#define RExC_seen_evals (pRExC_state->seen_evals)
#define SCF_WHILEM_VISITED_POS 0x2000
#define SCF_TRIE_RESTUDY 0x4000 /* Do restudy? */
-
+#define SCF_SEEN_ACCEPT 0x8000
#define UTF (RExC_utf8 != 0)
#define LOC ((RExC_flags & PMf_LOCALE) != 0)
scan_data_t data_fake;
SV *re_trie_maxbuff = NULL;
regnode *first_non_open = scan;
+ I32 stopmin = I32_MAX;
GET_RE_DEBUG_FLAGS_DECL;
#ifdef DEBUGGING
StructCopy(&zero_scan_data, &data_fake, scan_data_t);
scan = next;
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
+ if (data_fake.flags & SCF_SEEN_ACCEPT) {
+ if ( stopmin > minnext)
+ stopmin = min + min1;
+ flags &= ~SCF_DO_SUBSTR;
+ if (data)
+ data->flags |= SCF_SEEN_ACCEPT;
+ }
if (data) {
if (data_fake.flags & SF_HAS_EVAL)
data->flags |= SF_HAS_EVAL;
if (data)
data->flags |= SF_HAS_EVAL;
}
- else if ( OP(scan)==OPFAIL ) {
+ else if ( PL_regkind[OP(scan)] == ENDLIKE ) {
if (flags & SCF_DO_SUBSTR) {
scan_commit(pRExC_state,data,minlenp);
flags &= ~SCF_DO_SUBSTR;
}
+ if (data && OP(scan)==ACCEPT) {
+ data->flags |= SCF_SEEN_ACCEPT;
+ if (stopmin > min)
+ stopmin = min;
+ }
}
else if (OP(scan) == LOGICAL && scan->flags == 2) /* Embedded follows */
{
if (data_fake.flags & (SF_HAS_PAR|SF_IN_PAR))
pars++;
-
+ if (data_fake.flags & SCF_SEEN_ACCEPT) {
+ if ( stopmin > min + min1)
+ stopmin = min + min1;
+ flags &= ~SCF_DO_SUBSTR;
+ if (data)
+ data->flags |= SCF_SEEN_ACCEPT;
+ }
if (data) {
if (data_fake.flags & SF_HAS_EVAL)
data->flags |= SF_HAS_EVAL;
DEBUG_STUDYDATA(data,depth);
- return min;
+ return min < stopmin ? min : stopmin;
}
STATIC I32
RExC_end = xend;
RExC_naughty = 0;
RExC_npar = 1;
+ RExC_nestroot = 0;
RExC_size = 0L;
RExC_emit = &PL_regdummy;
RExC_whilem_seen = 0;
if (RExC_whilem_seen > 15)
RExC_whilem_seen = 15;
+#ifdef DEBUGGING
+ /* Make room for a sentinel value at the end of the program */
+ RExC_size++;
+#endif
+
/* Allocate space and zero-initialize. Note, the two step process
of zeroing when in debug mode, thus anything assigned has to
happen after that */
RExC_npar = 1;
RExC_emit_start = r->program;
RExC_emit = r->program;
+#ifdef DEBUGGING
+ /* put a sentinal on the end of the program so we can check for
+ overwrites */
+ r->program[RExC_size].type = 255;
+#endif
/* Store the count of eval-groups for security checks: */
RExC_emit->next_off = (RExC_seen_evals > (I32)U16_MAX) ? U16_MAX : (U16)RExC_seen_evals;
REGC((U8)REG_MAGIC, (char*) RExC_emit++);
r->reganch |= ROPT_EVAL_SEEN;
if (RExC_seen & REG_SEEN_CANY)
r->reganch |= ROPT_CANY_SEEN;
+ if (RExC_seen & REG_SEEN_VERBARG)
+ r->reganch |= ROPT_VERBARG_SEEN;
if (RExC_paren_names)
r->paren_names = (HV*)SvREFCNT_inc(RExC_paren_names);
else
#define REGTAIL_STUDY(x,y,z) regtail((x),(y),(z),depth+1)
#endif
+/* this idea is borrowed from STR_WITH_LEN in handy.h */
+#define CHECK_WORD(s,v,l) \
+ (((sizeof(s)-1)==(l)) && (strnEQ(start_verb, (s ""), (sizeof(s)-1))))
+
STATIC regnode *
S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth)
/* paren: Parenthesized? 0=top, 1=(, inside: changed to letter. */
/* Make an OPEN node, if parenthesized. */
if (paren) {
+ if ( *RExC_parse == '*') { /* (*VERB:ARG) */
+ char *start_verb = RExC_parse;
+ STRLEN verb_len = 0;
+ char *start_arg = NULL;
+ unsigned char op = 0;
+ int argok = 1;
+ int internal_argval = 0; /* internal_argval is only useful if !argok */
+ while ( *RExC_parse && *RExC_parse != ')' ) {
+ if ( *RExC_parse == ':' ) {
+ start_arg = RExC_parse + 1;
+ break;
+ }
+ RExC_parse++;
+ }
+ ++start_verb;
+ verb_len = RExC_parse - start_verb;
+ if ( start_arg ) {
+ RExC_parse++;
+ while ( *RExC_parse && *RExC_parse != ')' )
+ RExC_parse++;
+ if ( *RExC_parse != ')' )
+ vFAIL("Unterminated verb pattern argument");
+ if ( RExC_parse == start_arg )
+ start_arg = NULL;
+ } else {
+ if ( *RExC_parse != ')' )
+ vFAIL("Unterminated verb pattern");
+ }
+ switch ( *start_verb ) {
+ case 'A': /* (*ACCEPT) */
+ if ( CHECK_WORD("ACCEPT",start_verb,verb_len) ) {
+ op = ACCEPT;
+ internal_argval = RExC_nestroot;
+ }
+ break;
+ case 'C': /* (*COMMIT) */
+ if ( CHECK_WORD("COMMIT",start_verb,verb_len) )
+ op = COMMIT;
+ else if ( CHECK_WORD("CUT",start_verb,verb_len) )
+ op = CUT;
+ break;
+ case 'F': /* (*FAIL) */
+ if ( verb_len==1 || CHECK_WORD("FAIL",start_verb,verb_len) ) {
+ op = OPFAIL;
+ argok = 0;
+ }
+ break;
+ case 'M':
+ if ( CHECK_WORD("MARK",start_verb,verb_len) )
+ op = MARKPOINT;
+ break;
+ case 'N': /* (*NOMATCH) */
+ if ( CHECK_WORD("NOMATCH",start_verb,verb_len) )
+ op = NOMATCH;
+ break;
+ }
+ if ( ! op ) {
+ RExC_parse++;
+ vFAIL3("Unknown verb pattern '%.*s'",
+ verb_len, start_verb);
+ }
+ if ( argok ) {
+ if ( start_arg && internal_argval ) {
+ vFAIL3("Verb pattern '%.*s' may not have an argument",
+ verb_len, start_verb);
+ } else if ( argok < 0 && !start_arg ) {
+ vFAIL3("Verb pattern '%.*s' has a mandatory argument",
+ verb_len, start_verb);
+ } else {
+ ret = reganode(pRExC_state, op, internal_argval);
+ if ( ! internal_argval && ! SIZE_ONLY ) {
+ if (start_arg) {
+ SV *sv = newSVpvn( start_arg, RExC_parse - start_arg);
+ ARG(ret) = add_data( pRExC_state, 1, "S" );
+ RExC_rx->data->data[ARG(ret)]=(void*)sv;
+ ret->flags = 0;
+ } else {
+ ret->flags = 1;
+ }
+ }
+ }
+ if (!internal_argval)
+ RExC_seen |= REG_SEEN_VERBARG;
+ } else if ( start_arg ) {
+ vFAIL3("Verb pattern '%.*s' may not have an argument",
+ verb_len, start_verb);
+ } else {
+ ret = reg_node(pRExC_state, op);
+ }
+ nextchar(pRExC_state);
+ return ret;
+ } else
if (*RExC_parse == '?') { /* (?...) */
U32 posflags = 0, negflags = 0;
U32 *flagsp = &posflags;
RExC_parse++;
case '=': /* (?=...) */
case '!': /* (?!...) */
- if (*RExC_parse == ')')
- goto do_op_fail;
RExC_seen_zerolen++;
+ if (*RExC_parse == ')') {
+ ret=reg_node(pRExC_state, OPFAIL);
+ nextchar(pRExC_state);
+ return ret;
+ }
case ':': /* (?:...) */
case '>': /* (?>...) */
break;
- case 'C': /* (?CUT) and (?COMMIT) */
- if (RExC_parse[0] == 'O' &&
- RExC_parse[1] == 'M' &&
- RExC_parse[2] == 'M' &&
- RExC_parse[3] == 'I' &&
- RExC_parse[4] == 'T' &&
- RExC_parse[5] == ')')
- {
- RExC_parse+=5;
- ret = reg_node(pRExC_state, COMMIT);
- } else if (
- RExC_parse[0] == 'U' &&
- RExC_parse[1] == 'T' &&
- RExC_parse[2] == ')')
- {
- RExC_parse+=2;
- ret = reg_node(pRExC_state, CUT);
- } else {
- vFAIL("Sequence (?C... not terminated");
- }
- nextchar(pRExC_state);
- return ret;
- break;
- case 'E': /* (?ERROR) */
- if (RExC_parse[0] == 'R' &&
- RExC_parse[1] == 'R' &&
- RExC_parse[2] == 'O' &&
- RExC_parse[3] == 'R' &&
- RExC_parse[4] == ')')
- {
- RExC_parse+=4;
- ret = reg_node(pRExC_state, OPERROR);
- } else {
- vFAIL("Sequence (?E... not terminated");
- }
- nextchar(pRExC_state);
- return ret;
- break;
- case 'F':
- if (RExC_parse[0] == 'A' &&
- RExC_parse[1] == 'I' &&
- RExC_parse[2] == 'L')
- RExC_parse+=3;
- if (*RExC_parse != ')')
- vFAIL("Sequence (?FAIL) or (?F) not terminated");
- do_op_fail:
- ret = reg_node(pRExC_state, OPFAIL);
- nextchar(pRExC_state);
- return ret;
- break;
case '$': /* (?$...) */
case '@': /* (?@...) */
vFAIL2("Sequence (?%c...) not implemented", (int)paren);
capturing_parens:
parno = RExC_npar;
RExC_npar++;
+
ret = reganode(pRExC_state, OPEN, parno);
- if (!SIZE_ONLY && RExC_seen & REG_SEEN_RECURSE) {
- DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
+ if (!SIZE_ONLY ){
+ if (!RExC_nestroot)
+ RExC_nestroot = parno;
+ if (RExC_seen & REG_SEEN_RECURSE) {
+ DEBUG_OPTIMISE_MORE_r(PerlIO_printf(Perl_debug_log,
"Setting open paren #%"IVdf" to %d\n",
(IV)parno, REG_NODE_NUM(ret)));
- RExC_open_parens[parno-1]= ret;
+ RExC_open_parens[parno-1]= ret;
+ }
}
Set_Node_Length(ret, 1); /* MJD */
Set_Node_Offset(ret, RExC_parse); /* MJD */
"Setting close paren #%"IVdf" to %d\n",
(IV)parno, REG_NODE_NUM(ender)));
RExC_close_parens[parno-1]= ender;
+ if (RExC_nestroot == parno)
+ RExC_nestroot = 0;
}
Set_Node_Offset(ender,RExC_parse+1); /* MJD */
Set_Node_Length(ender,1); /* MJD */
RExC_size += 1;
return(ret);
}
+#ifdef DEBUGGING
+ if (OP(RExC_emit) == 255)
+ Perl_croak(aTHX_ "panic: reg_node overrun trying to emit %s: %d ",
+ reg_name[op], OP(RExC_emit));
+#endif
NODE_ALIGN_FILL(ret);
ptr = ret;
FILL_ADVANCE_NODE(ptr, op);
}
RExC_emit = ptr;
-
return(ret);
}
*/
return(ret);
}
-
+#ifdef DEBUGGING
+ if (OP(RExC_emit) == 255)
+ Perl_croak(aTHX_ "panic: reganode overwriting end of allocated program space");
+#endif
NODE_ALIGN_FILL(ret);
ptr = ret;
FILL_ADVANCE_NODE_ARG(ptr, op, arg);
}
RExC_emit = ptr;
-
return(ret);
}
}
else if (k == WHILEM && o->flags) /* Ordinal/of */
Perl_sv_catpvf(aTHX_ sv, "[%d/%d]", o->flags & 0xf, o->flags>>4);
- else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP)
+ else if (k == REF || k == OPEN || k == CLOSE || k == GROUPP || OP(o)==ACCEPT)
Perl_sv_catpvf(aTHX_ sv, "%d", (int)ARG(o)); /* Parenth number */
else if (k == GOSUB)
Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); /* Paren and offset */
- else if (k == LOGICAL)
+ else if (k == VERB) {
+ if (!o->flags)
+ Perl_sv_catpvf(aTHX_ sv, ":%"SVf,
+ (SV*)prog->data->data[ ARG( o ) ]);
+ } else if (k == LOGICAL)
Perl_sv_catpvf(aTHX_ sv, "[%d]", o->flags); /* 2: embedded, otherwise 1 */
else if (k == ANYOF) {
int i, rangestart = -1;
for (i = 0; i < count; i++) {
d->what[i] = r->data->what[i];
switch (d->what[i]) {
- /* legal options are one of: sfpont
+ /* legal options are one of: sSfpont
see also regcomp.h and pregfree() */
case 's':
case 'S':
#define REG_SEEN_SANY REG_SEEN_CANY /* src bckwrd cmpt */
#define REG_SEEN_RECURSE 0x00000020
#define REG_TOP_LEVEL_BRANCHES 0x00000040
+#define REG_SEEN_VERBARG 0x00000080
START_EXTERN_C
DEFINEP DEFINEP, none 1 Never execute directly.
#*Bactracking
-OPFAIL OPFAIL, none Same as (?!)
-COMMIT COMMIT, none Pattern fails if backtracking through this
-CUT COMMIT, none ... and restarts at the cursor point
-OPERROR OPERROR,none Pattern fails outright if backtracking through this
+ENDLIKE ENDLIKE, none Used only for the type field of verbs
+OPFAIL ENDLIKE, none Same as (?!)
+ACCEPT ENDLIKE, parno 1 Accepts the current matched string.
+VERB VERB, no-sv 1 Used only for the type field of verbs
+NOMATCH VERB, no-sv 1 Pattern fails at this startpoint if no-backtracking through this
+MARKPOINT VERB, no-sv 1 Push the current location for rollback by cut.
+CUT VERB, no-sv 1 On failure cut the string at the mark.
+COMMIT VERB, no-sv 1 Pattern fails outright if backtracking through this
+
+
# NEW STUFF ABOVE THIS LINE -- Please update counts below.
IFMATCH A:FAIL
CURLY B_min_known,B_min,B_max:FAIL
COMMIT next:FAIL
-
+MARKPOINT next:FAIL
+CUT next:FAIL
int nochange_depth = 0; /* depth of GOSUB recursion with nochange*/
regmatch_state *yes_state = NULL; /* state to pop to on success of
subpattern */
+ /* mark_state piggy backs on the yes_state logic so that when we unwind
+ the stack on success we can update the mark_state as we go */
+ regmatch_state *mark_state = NULL; /* last mark state we have seen */
regmatch_state *cur_eval = NULL; /* most recent EVAL_AB state */
struct regmatch_state *cur_curlyx = NULL; /* most recent curlyx */
U32 state_num;
bool no_final = 0;
-
+ char *startpoint = PL_reginput;
+ SV *popmark = NULL;
+ SV *sv_commit = NULL;
+ int lastopen = 0;
/* these three flags are set by various ops to signal information to
* the very next op. They have a useful lifetime of exactly one loop
* iteration, and are not preserved or restored by state pushes/pops
PL_reg_start_tmp[n] = locinput;
if (n > PL_regsize)
PL_regsize = n;
+ lastopen = n;
break;
case CLOSE:
n = ARG(scan); /* which paren pair */
goto fake_end;
}
break;
+ case ACCEPT:
+ if (ARG(scan)){
+ regnode *cursor;
+ for (cursor=scan;
+ cursor && OP(cursor)!=END;
+ cursor=regnext(cursor))
+ {
+ if ( OP(cursor)==CLOSE ){
+ n = ARG(cursor);
+ if ( n <= lastopen ) {
+ PL_regstartp[n] = PL_reg_start_tmp[n] - PL_bostr;
+ PL_regendp[n] = locinput - PL_bostr;
+ /*if (n > PL_regsize)
+ PL_regsize = n;*/
+ if (n > (I32)*PL_reglastparen)
+ *PL_reglastparen = n;
+ *PL_reglastcloseparen = n;
+ if ( n == ARG(scan) || (cur_eval &&
+ cur_eval->u.eval.close_paren == (U32)n))
+ break;
+ }
+ }
+ }
+ }
+ goto fake_end;
+ /*NOTREACHED*/
case GROUPP:
n = ARG(scan); /* which paren pair */
sw = (bool)((I32)*PL_reglastparen >= n && PL_regendp[n] != -1);
PL_reginput = locinput;
if (minmod) {
minmod = 0;
- if (ST.min && regrepeat(rex, ST.A, ST.min) < ST.min)
+ if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
sayNO;
ST.count = ST.min;
locinput = PL_reginput;
}
else {
- ST.count = regrepeat(rex, ST.A, ST.max);
+ ST.count = regrepeat(rex, ST.A, ST.max, depth);
locinput = PL_reginput;
if (ST.count < ST.min)
sayNO;
/* PL_reginput == oldloc now */
if (n) {
ST.count += n;
- if (regrepeat(rex, ST.A, n) < n)
+ if (regrepeat(rex, ST.A, n, depth) < n)
sayNO;
}
PL_reginput = locinput;
REGCP_UNWIND(ST.cp);
/* failed -- move forward one */
PL_reginput = locinput;
- if (regrepeat(rex, ST.A, 1)) {
+ if (regrepeat(rex, ST.A, 1, depth)) {
ST.count++;
locinput = PL_reginput;
if (ST.count <= ST.max || (ST.max == REG_INFTY &&
if (next == scan)
next = NULL;
break;
- case OPERROR:
- reginfo->cutpoint=PL_regeol;
- goto do_commit;
- /* NOTREACHED */
- case CUT:
- if ( locinput > reginfo->bol )
- reginfo->cutpoint = HOPBACKc(locinput, 1);
- /* FALLTHROUGH */
case COMMIT:
- do_commit:
+ reginfo->cutpoint = PL_regeol;
+ /* FALLTHROUGH */
+ case NOMATCH:
PL_reginput = locinput;
+ if (!scan->flags)
+ sv_commit = (SV*)rex->data->data[ ARG( scan ) ];
PUSH_STATE_GOTO(COMMIT_next,next);
/* NOTREACHED */
case COMMIT_next_fail:
/* FALLTHROUGH */
case OPFAIL:
sayNO;
+ /* NOTREACHED */
+
+#define ST st->u.mark
+ case MARKPOINT:
+ ST.prev_mark = mark_state;
+ ST.mark_name = scan->flags ? &PL_sv_yes :
+ (SV*)rex->data->data[ ARG( scan ) ];
+ mark_state = st;
+ ST.mark_loc = PL_reginput = locinput;
+ PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
+ /* NOTREACHED */
+ case MARKPOINT_next:
+ mark_state = ST.prev_mark;
+ sayYES;
+ /* NOTREACHED */
+ case MARKPOINT_next_fail:
+ if (popmark && ( popmark == &PL_sv_yes ||
+ (ST.mark_name != &PL_sv_yes &&
+ sv_eq(ST.mark_name,popmark))))
+ {
+ if (ST.mark_loc > startpoint)
+ reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
+ popmark = NULL; /* we found our mark */
+ sv_commit = ST.mark_name;
+
+ DEBUG_EXECUTE_r({
+ if (sv_commit != &PL_sv_yes)
+ PerlIO_printf(Perl_debug_log,
+ "%*s %ssetting cutpoint to mark:%"SVf"...%s\n",
+ REPORT_CODE_OFF+depth*2, "",
+ PL_colors[4], sv_commit, PL_colors[5]);
+ else
+ PerlIO_printf(Perl_debug_log,
+ "%*s %ssetting cutpoint to mark...%s\n",
+ REPORT_CODE_OFF+depth*2, "",
+ PL_colors[4], PL_colors[5]);
+ });
+ }
+ mark_state = ST.prev_mark;
+ sayNO;
+ /* NOTREACHED */
+ case CUT:
+ ST.mark_name = scan->flags ? &PL_sv_yes :
+ (SV*)rex->data->data[ ARG( scan ) ];
+ if (mark_state) {
+ ST.mark_loc = NULL;
+ } else {
+ ST.mark_loc = locinput;
+ }
+ PL_reginput = locinput;
+ PUSH_STATE_GOTO(CUT_next,next);
+ /* NOTREACHED */
+ case CUT_next_fail:
+ if (ST.mark_loc) {
+ if (ST.mark_loc > startpoint)
+ reginfo->cutpoint = HOPBACKc(ST.mark_loc, 1);
+ sv_commit = ST.mark_name;
+ } else {
+ popmark = ST.mark_name;
+ }
+ no_final = 1;
+ sayNO;
+ /* NOTREACHED */
+#undef ST
+
default:
PerlIO_printf(Perl_error_log, "%"UVxf" %d\n",
PTR2UV(scan), OP(scan));
PL_regmatch_slab = PL_regmatch_slab->prev;
st = SLAB_LAST(PL_regmatch_slab);
}
- DEBUG_STATE_r({
+ DEBUG_STATE_r({
if (no_final) {
DEBUG_STATE_pp("pop (no final)");
} else {
DEBUG_STATE_pp("pop (yes)");
}
- });
+ });
depth--;
}
#else
result = 0;
final_exit:
-
+ if (rex->reganch & ROPT_VERBARG_SEEN) {
+ SV *sv = get_sv("REGERROR", 1);
+ if (result)
+ sv_commit = &PL_sv_no;
+ else if (!sv_commit)
+ sv_commit = &PL_sv_yes;
+ sv_setsv(sv, sv_commit);
+ }
/* restore original high-water mark */
PL_regmatch_slab = orig_slab;
PL_regmatch_state = orig_state;
* rather than incrementing count on every character. [Er, except utf8.]]
*/
STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max)
+S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
{
dVAR;
register char *scan;
regprop(prog, prop, p);
PerlIO_printf(Perl_debug_log,
"%*s %s can match %"IVdf" times out of %"IVdf"...\n",
- REPORT_CODE_OFF+1, "", SvPVX_const(prop),(IV)c,(IV)max);
+ REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max);
});
});
#define ROPT_CANY_SEEN 0x00000800
#define ROPT_SANY_SEEN ROPT_CANY_SEEN /* src bckwrd cmpt */
#define ROPT_GPOS_CHECK (ROPT_GPOS_SEEN|ROPT_ANCH_GPOS)
-#define ROPT_RECURSE_SEEN 0x00001000
/* 0xf800 of reganch is used by PMf_COMPILETIME */
#define ROPT_COPY_DONE 0x00040000 /* subbeg is a copy of the string */
#define ROPT_TAINTED_SEEN 0x00080000
#define ROPT_MATCH_UTF8 0x10000000 /* subbeg is utf-8 */
+#define ROPT_RECURSE_SEEN 0x20000000
+#define ROPT_VERBARG_SEEN 0x40000000
#define RE_USE_INTUIT_NOML 0x00100000 /* Best to intuit before matching */
#define RE_USE_INTUIT_ML 0x00200000
I32 logical; /* saved copy of 'logical' var */
regnode *me; /* the IFMATCH/SUSPEND/UNLESSM node */
} ifmatch; /* and SUSPEND/UNLESSM */
+
+ struct {
+ /* this first element must match u.yes */
+ struct regmatch_state *prev_yes_state;
+ struct regmatch_state *prev_mark;
+ SV* mark_name;
+ char *mark_loc;
+ } mark;
} u;
} regmatch_state;
/* Regops and State definitions */
-#define REGNODE_MAX 78
-#define REGMATCH_STATE_MAX 110
+#define REGNODE_MAX 82
+#define REGMATCH_STATE_MAX 118
#define END 0 /* 0000 End of program. */
#define SUCCEED 1 /* 0x01 Return from a subroutine, basically. */
#define NGROUPP 70 /* 0x46 Whether the group matched. */
#define INSUBP 71 /* 0x47 Whether we are in a specific recurse. */
#define DEFINEP 72 /* 0x48 Never execute directly. */
-#define OPFAIL 73 /* 0x49 Same as (?!) */
-#define COMMIT 74 /* 0x4a Pattern fails if backtracking through this */
-#define CUT 75 /* 0x4b ... and restarts at the cursor point */
-#define OPERROR 76 /* 0x4c Pattern fails outright if backtracking through this */
-#define OPTIMIZED 77 /* 0x4d Placeholder for dump. */
-#define PSEUDO 78 /* 0x4e Pseudo opcode for internal use. */
+#define ENDLIKE 73 /* 0x49 Used only for the type field of verbs */
+#define OPFAIL 74 /* 0x4a Same as (?!) */
+#define ACCEPT 75 /* 0x4b Accepts the current matched string. */
+#define VERB 76 /* 0x4c no-sv 1 Used only for the type field of verbs */
+#define NOMATCH 77 /* 0x4d Pattern fails at this startpoint if no-backtracking through this */
+#define MARKPOINT 78 /* 0x4e Push the current location for rollback by cut. */
+#define CUT 79 /* 0x4f On failure cut the string at the mark. */
+#define COMMIT 80 /* 0x50 Pattern fails outright if backtracking through this */
+#define OPTIMIZED 81 /* 0x51 Placeholder for dump. */
+#define PSEUDO 82 /* 0x52 Pseudo opcode for internal use. */
/* ------------ States ------------- */
#define TRIE_next (REGNODE_MAX + 1) /* state for TRIE */
#define TRIE_next_fail (REGNODE_MAX + 2) /* state for TRIE */
#define CURLY_B_max_fail (REGNODE_MAX + 30) /* state for CURLY */
#define COMMIT_next (REGNODE_MAX + 31) /* state for COMMIT */
#define COMMIT_next_fail (REGNODE_MAX + 32) /* state for COMMIT */
+#define MARKPOINT_next (REGNODE_MAX + 33) /* state for MARKPOINT */
+#define MARKPOINT_next_fail (REGNODE_MAX + 34) /* state for MARKPOINT */
+#define CUT_next (REGNODE_MAX + 35) /* state for CUT */
+#define CUT_next_fail (REGNODE_MAX + 36) /* state for CUT */
/* PL_regkind[] What type of regop or state is this. */
EXTCONST U8 PL_regkind[];
#else
EXTCONST U8 PL_regkind[] = {
- END, /* END */
- END, /* SUCCEED */
- BOL, /* BOL */
- BOL, /* MBOL */
- BOL, /* SBOL */
- EOL, /* EOS */
- EOL, /* EOL */
- EOL, /* MEOL */
- EOL, /* SEOL */
- BOUND, /* BOUND */
- BOUND, /* BOUNDL */
- NBOUND, /* NBOUND */
- NBOUND, /* NBOUNDL */
- GPOS, /* GPOS */
- REG_ANY, /* REG_ANY */
- REG_ANY, /* SANY */
- REG_ANY, /* CANY */
- ANYOF, /* ANYOF */
- ALNUM, /* ALNUM */
- ALNUM, /* ALNUML */
- NALNUM, /* NALNUM */
- NALNUM, /* NALNUML */
- SPACE, /* SPACE */
- SPACE, /* SPACEL */
- NSPACE, /* NSPACE */
- NSPACE, /* NSPACEL */
- DIGIT, /* DIGIT */
- DIGIT, /* DIGITL */
- NDIGIT, /* NDIGIT */
- NDIGIT, /* NDIGITL */
- CLUMP, /* CLUMP */
- BRANCH, /* BRANCH */
- BACK, /* BACK */
- EXACT, /* EXACT */
- EXACT, /* EXACTF */
- EXACT, /* EXACTFL */
- NOTHING, /* NOTHING */
- NOTHING, /* TAIL */
- STAR, /* STAR */
- PLUS, /* PLUS */
- CURLY, /* CURLY */
- CURLY, /* CURLYN */
- CURLY, /* CURLYM */
- CURLY, /* CURLYX */
- WHILEM, /* WHILEM */
- OPEN, /* OPEN */
- CLOSE, /* CLOSE */
- REF, /* REF */
- REF, /* REFF */
- REF, /* REFFL */
- BRANCHJ, /* IFMATCH */
- BRANCHJ, /* UNLESSM */
- BRANCHJ, /* SUSPEND */
- BRANCHJ, /* IFTHEN */
- GROUPP, /* GROUPP */
- LONGJMP, /* LONGJMP */
- BRANCHJ, /* BRANCHJ */
- EVAL, /* EVAL */
- MINMOD, /* MINMOD */
- LOGICAL, /* LOGICAL */
- BRANCHJ, /* RENUM */
- TRIE, /* TRIE */
- TRIE, /* TRIEC */
- TRIE, /* AHOCORASICK */
- TRIE, /* AHOCORASICKC */
- GOSUB, /* GOSUB */
- GOSTART, /* GOSTART */
- NREF, /* NREF */
- NREF, /* NREFF */
- NREF, /* NREFFL */
- NGROUPP, /* NGROUPP */
- INSUBP, /* INSUBP */
- DEFINEP, /* DEFINEP */
- OPFAIL, /* OPFAIL */
- COMMIT, /* COMMIT */
- COMMIT, /* CUT */
- OPERROR, /* OPERROR */
- NOTHING, /* OPTIMIZED */
- PSEUDO, /* PSEUDO */
+ END, /* END */
+ END, /* SUCCEED */
+ BOL, /* BOL */
+ BOL, /* MBOL */
+ BOL, /* SBOL */
+ EOL, /* EOS */
+ EOL, /* EOL */
+ EOL, /* MEOL */
+ EOL, /* SEOL */
+ BOUND, /* BOUND */
+ BOUND, /* BOUNDL */
+ NBOUND, /* NBOUND */
+ NBOUND, /* NBOUNDL */
+ GPOS, /* GPOS */
+ REG_ANY, /* REG_ANY */
+ REG_ANY, /* SANY */
+ REG_ANY, /* CANY */
+ ANYOF, /* ANYOF */
+ ALNUM, /* ALNUM */
+ ALNUM, /* ALNUML */
+ NALNUM, /* NALNUM */
+ NALNUM, /* NALNUML */
+ SPACE, /* SPACE */
+ SPACE, /* SPACEL */
+ NSPACE, /* NSPACE */
+ NSPACE, /* NSPACEL */
+ DIGIT, /* DIGIT */
+ DIGIT, /* DIGITL */
+ NDIGIT, /* NDIGIT */
+ NDIGIT, /* NDIGITL */
+ CLUMP, /* CLUMP */
+ BRANCH, /* BRANCH */
+ BACK, /* BACK */
+ EXACT, /* EXACT */
+ EXACT, /* EXACTF */
+ EXACT, /* EXACTFL */
+ NOTHING, /* NOTHING */
+ NOTHING, /* TAIL */
+ STAR, /* STAR */
+ PLUS, /* PLUS */
+ CURLY, /* CURLY */
+ CURLY, /* CURLYN */
+ CURLY, /* CURLYM */
+ CURLY, /* CURLYX */
+ WHILEM, /* WHILEM */
+ OPEN, /* OPEN */
+ CLOSE, /* CLOSE */
+ REF, /* REF */
+ REF, /* REFF */
+ REF, /* REFFL */
+ BRANCHJ, /* IFMATCH */
+ BRANCHJ, /* UNLESSM */
+ BRANCHJ, /* SUSPEND */
+ BRANCHJ, /* IFTHEN */
+ GROUPP, /* GROUPP */
+ LONGJMP, /* LONGJMP */
+ BRANCHJ, /* BRANCHJ */
+ EVAL, /* EVAL */
+ MINMOD, /* MINMOD */
+ LOGICAL, /* LOGICAL */
+ BRANCHJ, /* RENUM */
+ TRIE, /* TRIE */
+ TRIE, /* TRIEC */
+ TRIE, /* AHOCORASICK */
+ TRIE, /* AHOCORASICKC */
+ GOSUB, /* GOSUB */
+ GOSTART, /* GOSTART */
+ NREF, /* NREF */
+ NREF, /* NREFF */
+ NREF, /* NREFFL */
+ NGROUPP, /* NGROUPP */
+ INSUBP, /* INSUBP */
+ DEFINEP, /* DEFINEP */
+ ENDLIKE, /* ENDLIKE */
+ ENDLIKE, /* OPFAIL */
+ ENDLIKE, /* ACCEPT */
+ VERB, /* VERB */
+ VERB, /* NOMATCH */
+ VERB, /* MARKPOINT */
+ VERB, /* CUT */
+ VERB, /* COMMIT */
+ NOTHING, /* OPTIMIZED */
+ PSEUDO, /* PSEUDO */
/* ------------ States ------------- */
- TRIE, /* TRIE_next */
- TRIE, /* TRIE_next_fail */
- EVAL, /* EVAL_AB */
- EVAL, /* EVAL_AB_fail */
- CURLYX, /* CURLYX_end */
- CURLYX, /* CURLYX_end_fail */
- WHILEM, /* WHILEM_A_pre */
- WHILEM, /* WHILEM_A_pre_fail */
- WHILEM, /* WHILEM_A_min */
- WHILEM, /* WHILEM_A_min_fail */
- WHILEM, /* WHILEM_A_max */
- WHILEM, /* WHILEM_A_max_fail */
- WHILEM, /* WHILEM_B_min */
- WHILEM, /* WHILEM_B_min_fail */
- WHILEM, /* WHILEM_B_max */
- WHILEM, /* WHILEM_B_max_fail */
- BRANCH, /* BRANCH_next */
- BRANCH, /* BRANCH_next_fail */
- CURLYM, /* CURLYM_A */
- CURLYM, /* CURLYM_A_fail */
- CURLYM, /* CURLYM_B */
- CURLYM, /* CURLYM_B_fail */
- IFMATCH, /* IFMATCH_A */
- IFMATCH, /* IFMATCH_A_fail */
- CURLY, /* CURLY_B_min_known */
- CURLY, /* CURLY_B_min_known_fail */
- CURLY, /* CURLY_B_min */
- CURLY, /* CURLY_B_min_fail */
- CURLY, /* CURLY_B_max */
- CURLY, /* CURLY_B_max_fail */
- COMMIT, /* COMMIT_next */
- COMMIT, /* COMMIT_next_fail */
+ TRIE, /* TRIE_next */
+ TRIE, /* TRIE_next_fail */
+ EVAL, /* EVAL_AB */
+ EVAL, /* EVAL_AB_fail */
+ CURLYX, /* CURLYX_end */
+ CURLYX, /* CURLYX_end_fail */
+ WHILEM, /* WHILEM_A_pre */
+ WHILEM, /* WHILEM_A_pre_fail */
+ WHILEM, /* WHILEM_A_min */
+ WHILEM, /* WHILEM_A_min_fail */
+ WHILEM, /* WHILEM_A_max */
+ WHILEM, /* WHILEM_A_max_fail */
+ WHILEM, /* WHILEM_B_min */
+ WHILEM, /* WHILEM_B_min_fail */
+ WHILEM, /* WHILEM_B_max */
+ WHILEM, /* WHILEM_B_max_fail */
+ BRANCH, /* BRANCH_next */
+ BRANCH, /* BRANCH_next_fail */
+ CURLYM, /* CURLYM_A */
+ CURLYM, /* CURLYM_A_fail */
+ CURLYM, /* CURLYM_B */
+ CURLYM, /* CURLYM_B_fail */
+ IFMATCH, /* IFMATCH_A */
+ IFMATCH, /* IFMATCH_A_fail */
+ CURLY, /* CURLY_B_min_known */
+ CURLY, /* CURLY_B_min_known_fail */
+ CURLY, /* CURLY_B_min */
+ CURLY, /* CURLY_B_min_fail */
+ CURLY, /* CURLY_B_max */
+ CURLY, /* CURLY_B_max_fail */
+ COMMIT, /* COMMIT_next */
+ COMMIT, /* COMMIT_next_fail */
+ MARKPOINT, /* MARKPOINT_next */
+ MARKPOINT, /* MARKPOINT_next_fail */
+ CUT, /* CUT_next */
+ CUT, /* CUT_next_fail */
};
#endif
EXTRA_SIZE(struct regnode_1), /* NGROUPP */
EXTRA_SIZE(struct regnode_1), /* INSUBP */
EXTRA_SIZE(struct regnode_1), /* DEFINEP */
+ 0, /* ENDLIKE */
0, /* OPFAIL */
- 0, /* COMMIT */
- 0, /* CUT */
- 0, /* OPERROR */
+ EXTRA_SIZE(struct regnode_1), /* ACCEPT */
+ 0, /* VERB */
+ EXTRA_SIZE(struct regnode_1), /* NOMATCH */
+ EXTRA_SIZE(struct regnode_1), /* MARKPOINT */
+ EXTRA_SIZE(struct regnode_1), /* CUT */
+ EXTRA_SIZE(struct regnode_1), /* COMMIT */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
0, /* NGROUPP */
0, /* INSUBP */
0, /* DEFINEP */
+ 0, /* ENDLIKE */
0, /* OPFAIL */
- 0, /* COMMIT */
+ 0, /* ACCEPT */
+ 0, /* VERB */
+ 0, /* NOMATCH */
+ 0, /* MARKPOINT */
0, /* CUT */
- 0, /* OPERROR */
+ 0, /* COMMIT */
0, /* OPTIMIZED */
0, /* PSEUDO */
};
"NGROUPP", /* 0x46 */
"INSUBP", /* 0x47 */
"DEFINEP", /* 0x48 */
- "OPFAIL", /* 0x49 */
- "COMMIT", /* 0x4a */
- "CUT", /* 0x4b */
- "OPERROR", /* 0x4c */
- "OPTIMIZED", /* 0x4d */
- "PSEUDO", /* 0x4e */
+ "ENDLIKE", /* 0x49 */
+ "OPFAIL", /* 0x4a */
+ "ACCEPT", /* 0x4b */
+ "VERB", /* 0x4c */
+ "NOMATCH", /* 0x4d */
+ "MARKPOINT", /* 0x4e */
+ "CUT", /* 0x4f */
+ "COMMIT", /* 0x50 */
+ "OPTIMIZED", /* 0x51 */
+ "PSEUDO", /* 0x52 */
/* ------------ States ------------- */
"TRIE_next", /* REGNODE_MAX +0x01 */
"TRIE_next_fail", /* REGNODE_MAX +0x02 */
"CURLY_B_max_fail", /* REGNODE_MAX +0x1e */
"COMMIT_next", /* REGNODE_MAX +0x1f */
"COMMIT_next_fail", /* REGNODE_MAX +0x20 */
+ "MARKPOINT_next", /* REGNODE_MAX +0x21 */
+ "MARKPOINT_next_fail", /* REGNODE_MAX +0x22 */
+ "CUT_next", /* REGNODE_MAX +0x23 */
+ "CUT_next_fail", /* REGNODE_MAX +0x24 */
};
#endif /* DEBUGGING */
#else
iseq($count,1,"should have matched once only [RT#36046]");
}
-{ # Test the (?COMMIT) pattern
+{ # Test the (*NOMATCH) pattern
our $count = 0;
- 'aaab'=~/a+b?(?{$count++})(?FAIL)/;
- iseq($count,9,"expect 9 for no (?COMMIT)");
+ 'aaab'=~/a+b?(?{$count++})(*FAIL)/;
+ iseq($count,9,"expect 9 for no (*NOMATCH)");
$count = 0;
- 'aaab'=~/a+b?(?COMMIT)(?{$count++})(?FAIL)/;
- iseq($count,3,"expect 3 with (?COMMIT)");
+ 'aaab'=~/a+b?(*NOMATCH)(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with (*NOMATCH)");
local $_='aaab';
$count=0;
- 1 while /.(?COMMIT)(?{$count++})(?FAIL)/g;
- iseq($count,4,"/.(?COMMIT)/");
+ 1 while /.(*NOMATCH)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*NOMATCH)/");
$count = 0;
- 'aaab'=~/a+b?(??{'(?COMMIT)'})(?{$count++})(?FAIL)/;
- iseq($count,3,"expect 3 with (?COMMIT)");
+ 'aaab'=~/a+b?(??{'(*NOMATCH)'})(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with (*NOMATCH)");
local $_='aaab';
$count=0;
- 1 while /.(??{'(?COMMIT)'})(?{$count++})(?FAIL)/g;
- iseq($count,4,"/.(?COMMIT)/");
+ 1 while /.(??{'(*NOMATCH)'})(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*NOMATCH)/");
}
-{ # Test the (?CUT) pattern
+{ # Test the (*CUT) pattern
our $count = 0;
- 'aaab'=~/a+b?(?CUT)(?{$count++})(?FAIL)/;
- iseq($count,1,"expect 1 with (?CUT)");
+ 'aaab'=~/a+b?(*CUT)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*CUT)");
local $_='aaab';
$count=0;
- 1 while /.(?CUT)(?{$count++})(?FAIL)/g;
- iseq($count,4,"/.(?CUT)/");
+ 1 while /.(*CUT)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*CUT)/");
$_='aaabaaab';
$count=0;
our @res=();
- 1 while /(a+b?)(?CUT)(?{$count++; push @res,$1})(?FAIL)/g;
- iseq($count,2,"Expect 2 with (?CUT)" );
- iseq("@res","aaab aaab","adjacent (?CUT) works as expected" );
+ 1 while /(a+b?)(*CUT)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,2,"Expect 2 with (*CUT)" );
+ iseq("@res","aaab aaab","adjacent (*CUT) works as expected" );
}
-{ # Test the (?ERROR) pattern
+{ # Test the (*CUT) pattern
our $count = 0;
- 'aaabaaab'=~/a+b?(?ERROR)(?{$count++})(?FAIL)/;
- iseq($count,1,"expect 1 with (?ERROR)");
+ 'aaab'=~/a+b?(*MARK)(*CUT)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*CUT)");
local $_='aaab';
$count=0;
- 1 while /.(?ERROR)(?{$count++})(?FAIL)/g;
- iseq($count,1,"/.(?ERROR)/");
+ 1 while /.(*MARK)(*CUT)(?{$count++})(*FAIL)/g;
+ iseq($count,4,"/.(*CUT)/");
$_='aaabaaab';
$count=0;
our @res=();
- 1 while /(a+b?)(?ERROR)(?{$count++; push @res,$1})(?FAIL)/g;
- iseq($count,1,"Expect 1 with (?ERROR)" );
- iseq("@res","aaab","adjacent (?ERROR) works as expected" );
+ 1 while /(a+b?)(*MARK)(*CUT)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,2,"Expect 2 with (*CUT)" );
+ iseq("@res","aaab aaab","adjacent (*CUT) works as expected" );
+}
+{ # Test the (*CUT) pattern
+ our $count = 0;
+ 'aaab'=~/a*(*MARK:a)b?(*MARK:b)(*CUT:a)(?{$count++})(*FAIL)/;
+ iseq($count,3,"expect 3 with *MARK:a)b?(*MARK:b)(*CUT:a)");
+ local $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a*(*MARK:a)b?)(*MARK)(*CUT:a)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,5,"Expect 5 with (*MARK:a)b?)(*MARK)(*CUT:a)" );
+ iseq("@res","aaab b aaab b ","adjacent (*MARK:a)b?)(*MARK)(*CUT:a) works as expected" );
+}
+{ # Test the (*COMMIT) pattern
+ our $count = 0;
+ 'aaabaaab'=~/a+b?(*COMMIT)(?{$count++})(*FAIL)/;
+ iseq($count,1,"expect 1 with (*COMMIT)");
+ local $_='aaab';
+ $count=0;
+ 1 while /.(*COMMIT)(?{$count++})(*FAIL)/g;
+ iseq($count,1,"/.(*COMMIT)/");
+ $_='aaabaaab';
+ $count=0;
+ our @res=();
+ 1 while /(a+b?)(*COMMIT)(?{$count++; push @res,$1})(*FAIL)/g;
+ iseq($count,1,"Expect 1 with (*COMMIT)" );
+ iseq("@res","aaab","adjacent (*COMMIT) works as expected" );
+}
+{
+ # Test named commits and the $REGERROR var
+ our $REGERROR;
+ for my $name ('',':foo')
+ {
+ for my $pat ("(*NOMATCH$name)","(*MARK$name)(*CUT)",
+ "(*CUT$name)","(*COMMIT$name)")
+ {
+ for my $suffix ('(*FAIL)','')
+ {
+ 'aaaab'=~/a+b$pat$suffix/;
+ iseq(
+ $REGERROR,
+ ($suffix ? ($name ? 'foo' : "1") : ""),
+ "Test $pat and \$REGERROR $suffix"
+ );
+ }
+ }
+ }
+}
+{
+ # Test named commits and the $REGERROR var
+ package Fnorble;
+ our $REGERROR;
+ for my $name ('',':foo')
+ {
+ for my $pat ("(*NOMATCH$name)","(*MARK$name)(*CUT)",
+ "(*CUT$name)","(*COMMIT$name)")
+ {
+ for my $suffix ('(*FAIL)','')
+ {
+ 'aaaab'=~/a+b$pat$suffix/;
+ ::iseq(
+ $REGERROR,
+ ($suffix ? ($name ? 'foo' : "1") : ""),
+ "Test $pat and \$REGERROR $suffix"
+ );
+ }
+ }
+ }
+}
+{
+ # Test named commits and the $REGERROR var
+ our $REGERROR;
+ for $word (qw(bar baz bop)) {
+ $REGERROR="";
+ "aaaaa$word"=~/a+(?:bar(*COMMIT:bar)|baz(*COMMIT:baz)|bop(*COMMIT:bop))(*FAIL)/;
+ iseq($REGERROR,$word);
+ }
+}
+{ #Regression test for perlbug 40684
+ my $s = "abc\ndef";
+ my $rex = qr'^abc$'m;
+ ok($s =~ m/$rex/);
+ ok($s =~ m/^abc$/m);
}
#-------------------------------------------------------------------
# Put new tests above the line, not here.
# Don't forget to update this!
-BEGIN{print "1..1300\n"};
+BEGIN{print "1..1344\n"};
()ef def y $-[1] 1
()ef def y $+[1] 1
*a - c - Quantifier follows nothing
-(*)b - c - Quantifier follows nothing
+(|*)b - c - Quantifier follows nothing
+(*)b - c - Unknown verb
$b b n - -
a\ - c - Search pattern not terminated
a\(b a(b y $&-$1 a(b-
'ab|cd'i ABCD y $& AB
'()ef'i DEF y $&-$1 EF-
'*a'i - c - Quantifier follows nothing
-'(*)b'i - c - Quantifier follows nothing
+'(|*)b'i - c - Quantifier follows nothing
+'(*)b'i - c - Unknown verb
'$b'i B n - -
'a\'i - c - Search pattern not terminated
'a\(b'i A(B y $&-$1 A(B-
a*(?!) aaaab n - -
-a*(?FAIL) aaaab n - -
-a*(?F) aaaab n - -
+a*(*FAIL) aaaab n - -
+a*(*F) aaaab n - -
+
+(A(A|B(*ACCEPT)|C)D)(E) AB y $1 AB
+(A(A|B(*ACCEPT)|C)D)(E) ACDE y $1$2$3 ACDCE
+