=item C<(?I<PARNO>)> C<(?-I<PARNO>)> C<(?+I<PARNO>)> C<(?R)> C<(?0)>
X<(?PARNO)> X<(?1)> X<(?R)> X<(?0)> X<(?-1)> X<(?+1)> X<(?-PARNO)> X<(?+PARNO)>
X<regex, recursive> X<regexp, recursive> X<regular expression, recursive>
-X<regex, relative recursion>
+X<regex, relative recursion> X<GOSUB> X<GOSTART>
+
+Recursive subpattern. Treat the contents of a given capture buffer in the
+current pattern as an independent subpattern and attempt to match it at
+the current position in the string. Information about capture state from
+the caller for things like backreferences is available to the subpattern,
+but capture buffers set by the subpattern are not visible to the caller.
Similar to C<(??{ code })> except that it does not involve executing any
code or potentially compiling a returned pattern string; instead it treats
the part of the current pattern contained within a specified capture group
-as an independent pattern that must match at the current position.
-Capture groups contained by the pattern will have the value as determined
-by the outermost recursion.
+as an independent pattern that must match at the current position. Also
+different is the treatment of capture buffers, unlike C<(??{ code })>
+recursive patterns have access to their callers match state, so one can
+use backreferences safely.
I<PARNO> is a sequence of digits (not starting with 0) whose value reflects
the paren-number of the capture group to recurse to. C<(?R)> recurses to
PERL_ARGS_ASSERT_REGCPPUSH;
if (paren_elems_to_push < 0)
- Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0",
- paren_elems_to_push);
+ Perl_croak(aTHX_ "panic: paren_elems_to_push, %i < 0, maxopenparen: %i parenfloor: %i REGCP_PAREN_ELEMS: %i",
+ paren_elems_to_push, maxopenparen, parenfloor, REGCP_PAREN_ELEMS);
if ((elems_shifted >> SAVE_TIGHT_SHIFT) != total_elems)
Perl_croak(aTHX_ "panic: paren_elems_to_push offset %"UVuf
startpoint = rei->program+1;
ST.close_paren = 0;
}
+
+ /* Save all the positions seen so far. */
+ ST.cp = regcppush(rex, 0, maxopenparen);
+ REGCP_SET(ST.lastcp);
+
+ /* and then jump to the code we share with EVAL */
goto eval_recurse_doit;
+
assert(0); /* NOTREACHED */
case EVAL: /* /(?{A})B/ /(??{A})B/ and /(?(?{A})X|Y)B/ */
re->sublen = rex->sublen;
re->suboffset = rex->suboffset;
re->subcoffset = rex->subcoffset;
+ re->lastparen = 0;
+ re->lastcloseparen = 0;
rei = RXi_GET(re);
DEBUG_EXECUTE_r(
debug_start_match(re_sv, utf8_target, locinput,
);
startpoint = rei->program + 1;
ST.close_paren = 0; /* only used for GOSUB */
+ /* Save all the seen positions so far. */
+ ST.cp = regcppush(rex, 0, maxopenparen);
+ REGCP_SET(ST.lastcp);
+ /* and set maxopenparen to 0, since we are starting a "fresh" match */
+ maxopenparen = 0;
+ /* run the pattern returned from (??{...}) */
- eval_recurse_doit: /* Share code with GOSUB below this line */
- /* run the pattern returned from (??{...}) */
-
- /* Save *all* the positions. */
- ST.cp = regcppush(rex, 0, maxopenparen);
- REGCP_SET(ST.lastcp);
-
- re->lastparen = 0;
- re->lastcloseparen = 0;
-
- maxopenparen = 0;
+ eval_recurse_doit: /* Share code with GOSUB below this line
+ * At this point we expect the stack context to be
+ * set up correctly */
/* invalidate the S-L poscache. We're now executing a
* different set of WHILEM ops (and their associated
* pattern again */
reginfo->poscache_maxiter = 0;
+ /* the new regexp might have a different is_utf8_pat than we do */
is_utf8_pat = reginfo->is_utf8_pat = cBOOL(RX_UTF8(re_sv));
ST.prev_rex = rex_sv;
'(?-m:^abc)'m x\nabcde n - -
# RT #121248 - segfault
-
/^[+]([^(]+)$/mi li n - -
+# RT #121299 - Inconsistent behavior with backreferences nested inside subpattern references
+/(.)((o)\1)(?2)/ fofof y $2 of
+/(.)(?2)((\1)(?4)(\1))/ fffffff y $1 f
+/(.)(?2)((.)(?4)(\1))/ foffoff y $2 off
+/^(.\2?)(.)(?1)$/ abcb y $2 b
+/^(.\1?)(?1)$/ aba y $1 a
+/^ (\3(?2)\3)? ((.)) (?1) $/x aaba y $2 a
+/^ (a|\3(?1)\2|(?2)) ((b|c)(?4)?) (?1) (d(?1)) $/x abbcdcabbda y $2 b
# Keep these lines at the end of the file
# vim: softtabstop=0 noexpandtab