- regtry - try match at specific point
*/
STATIC I32 /* 0 failure, 1 success */
-S_regtry(pTHX_ regmatch_info *reginfo, char **startpos)
+S_regtry(pTHX_ regmatch_info *reginfo, char **startposp)
{
dVAR;
CHECKPOINT lastcp;
REGEXP *const rx = reginfo->prog;
regexp *const prog = (struct regexp *)SvANY(rx);
+ I32 result;
RXi_GET_DECL(prog,progi);
GET_RE_DEBUG_FLAGS_DECL;
prog->sublen = PL_regeol - PL_bostr; /* strend may have been modified */
}
#ifdef DEBUGGING
- PL_reg_starttry = *startpos;
+ PL_reg_starttry = *startposp;
#endif
- prog->offs[0].start = *startpos - PL_bostr;
- PL_reginput = *startpos;
+ prog->offs[0].start = *startposp - PL_bostr;
prog->lastparen = 0;
prog->lastcloseparen = 0;
PL_regsize = 0;
}
#endif
REGCP_SET(lastcp);
- if (regmatch(reginfo, progi->program + 1)) {
- prog->offs[0].end = PL_reginput - PL_bostr;
+ result = regmatch(reginfo, *startposp, progi->program + 1);
+ if (result != -1) {
+ prog->offs[0].end = result;
return 1;
}
if (reginfo->cutpoint)
- *startpos= reginfo->cutpoint;
+ *startposp= reginfo->cutpoint;
REGCP_UNWIND(lastcp);
return 0;
}
}
-STATIC I32 /* 0 failure, 1 success */
-S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog)
+/* returns -1 on failure, $+[0] on success */
+STATIC I32
+S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog)
{
#if PERL_VERSION < 9 && !defined(PERL_CORE)
dMY_CXT;
regnode *next;
U32 n = 0; /* general value; init to avoid compiler warning */
I32 ln = 0; /* len or last; init to avoid compiler warning */
- char *locinput = PL_reginput;
+ char *reginput = startpos;
+ char *locinput = reginput;
I32 nextchr; /* is always set to UCHARAT(locinput) */
bool result = 0; /* return value of S_regmatch */
U32 state_num;
bool no_final = 0; /* prevent failure from backtracking? */
bool do_cutgroup = 0; /* no_final only until next branch/trie entry */
- char *startpoint = PL_reginput;
+ char *startpoint = reginput;
SV *popmark = NULL; /* are we looking for a mark? */
SV *sv_commit = NULL; /* last mark name seen in failure */
SV *sv_yes_mark = NULL; /* last mark name we have seen
case KEEPS:
/* update the startpoint */
st->u.keeper.val = rex->offs[0].start;
- PL_reginput = locinput;
+ reginput = locinput;
rex->offs[0].start = locinput - PL_bostr;
PUSH_STATE_GOTO(KEEPS_next, next);
/*NOT-REACHED*/
else
uc += chars;
}
- PL_reginput = (char *)uc;
+ reginput = (char *)uc;
}
scan = ST.me + ((ST.jump && ST.jump[ST.nextword])
PL_colors[5] );
});
- locinput = PL_reginput;
+ locinput = reginput;
nextchr = UCHARAT(locinput);
continue; /* execute rest of RE */
assert(0); /* NOTREACHED */
re->lastparen = 0;
re->lastcloseparen = 0;
- PL_reginput = locinput;
+ reginput = locinput;
PL_regsize = 0;
/* XXXX This is too dramatic a measure... */
rex = (struct regexp *)SvANY(rex_sv);
rexi = RXi_GET(rex);
- PL_reginput = locinput;
+ reginput = locinput;
REGCP_UNWIND(ST.lastcp);
regcppop(rex);
cur_eval = ST.prev_eval;
ST.count = -1; /* this will be updated by WHILEM */
ST.lastloc = NULL; /* this will be updated by WHILEM */
- PL_reginput = locinput;
+ reginput = locinput;
PUSH_YES_STATE_GOTO(CURLYX_end, PREVOPER(next));
assert(0); /* NOTREACHED */
}
ST.cache_offset = 0;
ST.cache_mask = 0;
- PL_reginput = locinput;
+ reginput = locinput;
DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log,
"%*s whilem: matched %ld out of %d..%d\n",
case WHILEM_A_max_fail: /* just failed to match A in a maximal match */
REGCP_UNWIND(ST.lastcp);
regcppop(rex); /* Restore some previous $<digit>s? */
- PL_reginput = locinput;
+ reginput = locinput;
DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log,
"%*s whilem: failed, trying continuation...\n",
REPORT_CODE_OFF+depth*2, "")
"%*s trying longer...\n", REPORT_CODE_OFF+depth*2, "")
);
/* Try grabbing another A and see if it helps. */
- PL_reginput = locinput;
+ reginput = locinput;
cur_curlyx->u.curlyx.lastloc = locinput;
ST.cp = regcppush(rex, cur_curlyx->u.curlyx.parenfloor);
REGCP_SET(ST.lastcp);
ST.lastcloseparen = rex->lastcloseparen;
ST.next_branch = next;
REGCP_SET(ST.cp);
- PL_reginput = locinput;
+ reginput = locinput;
/* Now go into the branch */
if (has_cutgroup) {
}
assert(0); /* NOTREACHED */
case CUTGROUP:
- PL_reginput = locinput;
+ reginput = locinput;
sv_yes_mark = st->u.mark.mark_name = scan->flags ? NULL :
MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
PUSH_STATE_GOTO(CUTGROUP_next,next);
goto curlym_do_B;
curlym_do_A: /* execute the A in /A{m,n}B/ */
- PL_reginput = locinput;
+ reginput = locinput;
PUSH_YES_STATE_GOTO(CURLYM_A, ST.A); /* match A */
assert(0); /* NOTREACHED */
if (ST.count == 1) {
if (PL_reg_match_utf8) {
char *s = locinput;
- while (s < PL_reginput) {
+ while (s < reginput) {
ST.alen++;
s += UTF8SKIP(s);
}
}
else {
- ST.alen = PL_reginput - locinput;
+ ST.alen = reginput - locinput;
}
if (ST.alen == 0)
ST.count = ST.minmod ? ARG1(ST.me) : ARG2(ST.me);
(IV) ST.count, (IV)ST.alen)
);
- locinput = PL_reginput;
+ locinput = reginput;
if (cur_eval && cur_eval->u.eval.close_paren &&
cur_eval->u.eval.close_paren == (U32)ST.me->flags)
sayNO;
curlym_do_B: /* execute the B in /A{m,n}B/ */
- PL_reginput = locinput;
+ reginput = locinput;
if (ST.c1 == CHRTEST_UNINIT) {
/* calculate c1 and c2 for possible match of 1st char
* following curly */
"", (IV)ST.count)
);
if (ST.c1 != CHRTEST_VOID
- && UCHARAT(PL_reginput) != ST.c1
- && UCHARAT(PL_reginput) != ST.c2)
+ && UCHARAT(reginput) != ST.c1
+ && UCHARAT(reginput) != ST.c2)
{
/* simulate B failing */
DEBUG_OPTIMISE_r(
I32 paren = ST.me->flags;
if (ST.count) {
rex->offs[paren].start
- = HOPc(PL_reginput, -ST.alen) - PL_bostr;
- rex->offs[paren].end = PL_reginput - PL_bostr;
+ = HOPc(reginput, -ST.alen) - PL_bostr;
+ rex->offs[paren].end = reginput - PL_bostr;
if ((U32)paren > rex->lastparen)
rex->lastparen = paren;
rex->lastcloseparen = paren;
ST.A = scan;
ST.B = next;
- PL_reginput = locinput;
+ reginput = locinput;
if (minmod) {
+ /* avoid taking address of reginput, so it can remain
+ * a register var */
+ char *ri = reginput;
minmod = 0;
- if (ST.min && regrepeat(rex, ST.A, ST.min, depth) < ST.min)
+ if (ST.min && regrepeat(rex, &ri, ST.A, ST.min, depth) < ST.min)
sayNO;
+ reginput = ri;
ST.count = ST.min;
- locinput = PL_reginput;
+ locinput = reginput;
REGCP_SET(ST.cp);
if (ST.c1 == CHRTEST_VOID)
goto curly_try_B_min;
}
else {
- ST.count = regrepeat(rex, ST.A, ST.max, depth);
- locinput = PL_reginput;
+ char *ri = reginput;
+ ST.count = regrepeat(rex, &ri, ST.A, ST.max, depth);
+ reginput = ri;
+ locinput = reginput;
if (ST.count < ST.min)
sayNO;
if ((ST.count > ST.min)
/* ...except that $ and \Z can match before *and* after
newline at the end. Consider "\n\n" =~ /\n+\Z\n/.
We may back off by one in this case. */
- if (UCHARAT(PL_reginput - 1) == '\n' && OP(ST.B) != EOS)
+ if (UCHARAT(reginput - 1) == '\n' && OP(ST.B) != EOS)
ST.min--;
}
REGCP_SET(ST.cp);
case CURLY_B_min_known_fail:
/* failed to find B in a non-greedy match where c1,c2 valid */
- PL_reginput = locinput; /* Could be reset... */
+ reginput = locinput; /* Could be reset... */
REGCP_UNWIND(ST.cp);
if (ST.paren) {
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
}
if (locinput > ST.maxpos)
sayNO;
- /* PL_reginput == oldloc now */
+ /* reginput == oldloc now */
if (n) {
+ char *ri = reginput;
ST.count += n;
- if (regrepeat(rex, ST.A, n, depth) < n)
+ if (regrepeat(rex, &ri, ST.A, n, depth) < n)
sayNO;
+ reginput = ri;
}
- PL_reginput = locinput;
+ reginput = locinput;
CURLY_SETPAREN(ST.paren, ST.count);
if (cur_eval && cur_eval->u.eval.close_paren &&
cur_eval->u.eval.close_paren == (U32)ST.paren) {
UNWIND_PAREN(ST.lastparen, ST.lastcloseparen);
}
/* failed -- move forward one */
- PL_reginput = locinput;
- if (regrepeat(rex, ST.A, 1, depth)) {
+ reginput = locinput;
+ {
+ char *ri = reginput;
+ if (!regrepeat(rex, &ri, ST.A, 1, depth)) {
+ sayNO;
+ }
+ reginput = ri;
+ }
+ {
ST.count++;
- locinput = PL_reginput;
+ locinput = reginput;
if (ST.count <= ST.max || (ST.max == REG_INFTY &&
ST.count > 0)) /* count overflow ? */
{
PUSH_STATE_GOTO(CURLY_B_min, ST.B);
}
}
- sayNO;
assert(0); /* NOTREACHED */
{
UV c = 0;
if (ST.c1 != CHRTEST_VOID)
- c = utf8_target ? utf8n_to_uvchr((U8*)PL_reginput,
+ c = utf8_target ? utf8n_to_uvchr((U8*)reginput,
UTF8_MAXBYTES, 0, uniflags)
- : (UV) UCHARAT(PL_reginput);
+ : (UV) UCHARAT(reginput);
/* If it could work, try it. */
if (ST.c1 == CHRTEST_VOID || c == (UV)ST.c1 || c == (UV)ST.c2) {
CURLY_SETPAREN(ST.paren, ST.count);
/* back up. */
if (--ST.count < ST.min)
sayNO;
- PL_reginput = locinput = HOPc(locinput, -1);
+ reginput = locinput = HOPc(locinput, -1);
goto curly_try_B_max;
#undef ST
cur_curlyx = cur_eval->u.eval.prev_curlyx;
REGCP_SET(st->u.eval.lastcp);
- PL_reginput = locinput;
+ reginput = locinput;
/* Restore parens of the outer rex without popping the
* savestack */
sayNO_SILENT; /* Cannot match: too short. */
}
- PL_reginput = locinput; /* put where regtry can find it */
+ reginput = locinput; /* put where regtry can find it */
sayYES; /* Success! */
case SUCCEED: /* successful SUSPEND/UNLESSM/IFMATCH/CURLYM */
PerlIO_printf(Perl_debug_log,
"%*s %ssubpattern success...%s\n",
REPORT_CODE_OFF+depth*2, "", PL_colors[4], PL_colors[5]));
- PL_reginput = locinput; /* put where regtry can find it */
+ reginput = locinput; /* put where regtry can find it */
sayYES; /* Success! */
#undef ST
case SUSPEND: /* (?>A) */
ST.wanted = 1;
- PL_reginput = locinput;
+ reginput = locinput;
goto do_ifmatch;
case UNLESSM: /* -ve lookaround: (?!A), or with flags, (?<!A) */
next = NULL;
break;
}
- PL_reginput = s;
+ reginput = s;
}
else
- PL_reginput = locinput;
+ reginput = locinput;
do_ifmatch:
ST.me = scan;
sayNO;
if (OP(ST.me) == SUSPEND)
- locinput = PL_reginput;
+ locinput = reginput;
else {
- locinput = PL_reginput = st->locinput;
+ locinput = reginput = st->locinput;
nextchr = UCHARAT(locinput);
}
scan = ST.me + ARG(ST.me);
reginfo->cutpoint = PL_regeol;
/* FALLTHROUGH */
case PRUNE:
- PL_reginput = locinput;
+ reginput = locinput;
if (!scan->flags)
sv_yes_mark = sv_commit = MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
PUSH_STATE_GOTO(COMMIT_next,next);
ST.mark_name = sv_commit = sv_yes_mark
= MUTABLE_SV(rexi->data->data[ ARG( scan ) ]);
mark_state = st;
- ST.mark_loc = PL_reginput = locinput;
+ ST.mark_loc = reginput = locinput;
PUSH_YES_STATE_GOTO(MARKPOINT_next,next);
assert(0); /* NOTREACHED */
case MARKPOINT_next:
sayNO;
assert(0); /* NOTREACHED */
case SKIP:
- PL_reginput = locinput;
+ reginput = locinput;
if (scan->flags) {
/* (*SKIP) : if we fail we cut here*/
ST.mark_name = NULL;
newst = S_push_slab(aTHX);
PL_regmatch_state = newst;
- locinput = PL_reginput;
+ locinput = reginput;
nextchr = UCHARAT(locinput);
st = newst;
continue;
/* clean up; in particular, free all slabs above current one */
LEAVE_SCOPE(oldsave);
- return result;
+ assert(!result || reginput - PL_bostr >= 0);
+ return result ? reginput - PL_bostr : -1;
}
/*
* rather than incrementing count on every character. [Er, except utf8.]]
*/
STATIC I32
-S_regrepeat(pTHX_ const regexp *prog, const regnode *p, I32 max, int depth)
+S_regrepeat(pTHX_ const regexp *prog, char **startposp, const regnode *p, I32 max, int depth)
{
dVAR;
char *scan;
PERL_ARGS_ASSERT_REGREPEAT;
- scan = PL_reginput;
+ scan = *startposp;
if (max == REG_INFTY)
max = I32_MAX;
else if (max < loceol - scan)
if (hardcount)
c = hardcount;
else
- c = scan - PL_reginput;
- PL_reginput = scan;
+ c = scan - *startposp;
+ *startposp = scan;
DEBUG_r({
GET_RE_DEBUG_FLAGS_DECL;