From 2395827c7bba99ac465a81aaf211d6f887d2518e Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Sun, 23 Feb 2014 18:59:48 +0100 Subject: [PATCH] Improve how regprop dumps REF-like nodes during execution We pass in the regmatch_info struct, which allows us to dump what a given REF is going to match. --- embed.fnc | 2 +- embed.h | 2 +- proto.h | 2 +- regcomp.c | 61 +++++++++++++++++++++++++++++++++++++++---------------------- regexec.c | 6 +++--- 5 files changed, 45 insertions(+), 28 deletions(-) diff --git a/embed.fnc b/embed.fnc index f747aae..11b28cb 100644 --- a/embed.fnc +++ b/embed.fnc @@ -1167,7 +1167,7 @@ EXp |I32|reg_numbered_buff_length|NN REGEXP * const rx|NN const SV * const sv|co EXp |SV*|reg_qr_package|NN REGEXP * const rx : FIXME - why the E? -Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o +Ep |void |regprop |NULLOK const regexp *prog|NN SV* sv|NN const regnode* o|NULLOK const regmatch_info *reginfo Anp |void |repeatcpy |NN char* to|NN const char* from|I32 len|IV count AnpP |char* |rninstr |NN const char* big|NN const char* bigend \ |NN const char* little|NN const char* lend diff --git a/embed.h b/embed.h index 2f8aca5..16ebd32 100644 --- a/embed.h +++ b/embed.h @@ -874,7 +874,7 @@ #define reg_numbered_buff_store(a,b,c) Perl_reg_numbered_buff_store(aTHX_ a,b,c) #define reg_qr_package(a) Perl_reg_qr_package(aTHX_ a) #define reg_temp_copy(a,b) Perl_reg_temp_copy(aTHX_ a,b) -#define regprop(a,b,c) Perl_regprop(aTHX_ a,b,c) +#define regprop(a,b,c,d) Perl_regprop(aTHX_ a,b,c,d) #define report_uninit(a) Perl_report_uninit(aTHX_ a) #define sv_magicext_mglob(a) Perl_sv_magicext_mglob(aTHX_ a) #define validate_proto(a,b,c) Perl_validate_proto(aTHX_ a,b,c) diff --git a/proto.h b/proto.h index e032ad6..a2e8a29 100644 --- a/proto.h +++ b/proto.h @@ -3479,7 +3479,7 @@ PERL_CALLCONV void Perl_reginitcolors(pTHX); PERL_CALLCONV regnode* Perl_regnext(pTHX_ regnode* p) __attribute__warn_unused_result__; -PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o) +PERL_CALLCONV void Perl_regprop(pTHX_ const regexp *prog, SV* sv, const regnode* o, const regmatch_info *reginfo) __attribute__nonnull__(pTHX_2) __attribute__nonnull__(pTHX_3); #define PERL_ARGS_ASSERT_REGPROP \ diff --git a/regcomp.c b/regcomp.c index c59c155..ea9f42b 100644 --- a/regcomp.c +++ b/regcomp.c @@ -3070,7 +3070,7 @@ S_make_trie_failtable(pTHX_ RExC_state_t *pRExC_state, regnode *source, regnode DEBUG_OPTIMISE_r({if (scan){ \ SV * const mysv=sv_newmortal(); \ regnode *Next = regnext(scan); \ - regprop(RExC_rx, mysv, scan); \ + regprop(RExC_rx, mysv, scan, NULL); \ PerlIO_printf(Perl_debug_log, "%*s" str ">%3d: %s (%d)\n", \ (int)depth*2, "", REG_NODE_NUM(scan), SvPV_nolen_const(mysv),\ Next ? (REG_NODE_NUM(Next)) : 0 ); \ @@ -3838,7 +3838,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, DEBUG_TRIE_COMPILE_r({ - regprop(RExC_rx, mysv, tail ); + regprop(RExC_rx, mysv, tail, NULL); PerlIO_printf( Perl_debug_log, "%*s%s%s\n", (int)depth * 2 + 2, "", "Looking for TRIE'able sequences. Tail node is: ", @@ -3919,16 +3919,16 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, #endif DEBUG_TRIE_COMPILE_r({ - regprop(RExC_rx, mysv, cur); + regprop(RExC_rx, mysv, cur, NULL); PerlIO_printf( Perl_debug_log, "%*s- %s (%d)", (int)depth * 2 + 2,"", SvPV_nolen_const( mysv ), REG_NODE_NUM(cur) ); - regprop(RExC_rx, mysv, noper); + regprop(RExC_rx, mysv, noper, NULL); PerlIO_printf( Perl_debug_log, " -> %s", SvPV_nolen_const(mysv)); if ( noper_next ) { - regprop(RExC_rx, mysv, noper_next ); + regprop(RExC_rx, mysv, noper_next, NULL); PerlIO_printf( Perl_debug_log,"\t=> %s\t", SvPV_nolen_const(mysv)); } @@ -4027,7 +4027,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, } /* end handle unmergable node */ } /* loop over branches */ DEBUG_TRIE_COMPILE_r({ - regprop(RExC_rx, mysv, cur); + regprop(RExC_rx, mysv, cur, NULL); PerlIO_printf( Perl_debug_log, "%*s- %s (%d) \n", (int)depth * 2 + 2, @@ -4067,7 +4067,7 @@ S_study_chunk(pTHX_ RExC_state_t *pRExC_state, regnode **scanp, * something like this: (?:|) So we can * turn it into a plain NOTHING op. */ DEBUG_TRIE_COMPILE_r({ - regprop(RExC_rx, mysv, cur); + regprop(RExC_rx, mysv, cur, NULL); PerlIO_printf( Perl_debug_log, "%*s- %s (%d) \n", (int)depth * 2 + 2, "", SvPV_nolen_const( mysv ),REG_NODE_NUM(cur)); @@ -4941,7 +4941,7 @@ PerlIO_printf(Perl_debug_log, "LHS=%"UVdf" RHS=%"UVdf"\n", DEBUG_STUDYDATA("OPFAIL",data,depth); /*DEBUG_PARSE_MSG("opfail");*/ - regprop(RExC_rx, mysv_val, upto); + regprop(RExC_rx, mysv_val, upto, NULL); PerlIO_printf(Perl_debug_log, "~ replace with OPFAIL pointed at %s (%"IVdf") offset %"IVdf"\n", SvPV_nolen_const(mysv_val), @@ -6914,7 +6914,7 @@ reStudy: ri->regstclass = (regnode*)RExC_rxi->data->data[n]; r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV *sv = sv_newmortal(); - regprop(r, sv, (regnode*)data.start_class); + regprop(r, sv, (regnode*)data.start_class, NULL); PerlIO_printf(Perl_debug_log, "synthetic stclass \"%s\".\n", SvPVX_const(sv));}); @@ -6994,7 +6994,7 @@ reStudy: ri->regstclass = (regnode*)RExC_rxi->data->data[n]; r->intflags &= ~PREGf_SKIP; /* Used in find_byclass(). */ DEBUG_COMPILE_r({ SV* sv = sv_newmortal(); - regprop(r, sv, (regnode*)data.start_class); + regprop(r, sv, (regnode*)data.start_class, NULL); PerlIO_printf(Perl_debug_log, "synthetic stclass \"%s\".\n", SvPVX_const(sv));}); @@ -10136,8 +10136,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) SV * const mysv_val1=sv_newmortal(); SV * const mysv_val2=sv_newmortal(); DEBUG_PARSE_MSG("lsbr"); - regprop(RExC_rx, mysv_val1, lastbr); - regprop(RExC_rx, mysv_val2, ender); + regprop(RExC_rx, mysv_val1, lastbr, NULL); + regprop(RExC_rx, mysv_val2, ender, NULL); PerlIO_printf(Perl_debug_log, "~ tying lastbr %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", SvPV_nolen_const(mysv_val1), (IV)REG_NODE_NUM(lastbr), @@ -10177,8 +10177,8 @@ S_reg(pTHX_ RExC_state_t *pRExC_state, I32 paren, I32 *flagp,U32 depth) SV * const mysv_val1=sv_newmortal(); SV * const mysv_val2=sv_newmortal(); DEBUG_PARSE_MSG("NADA"); - regprop(RExC_rx, mysv_val1, ret); - regprop(RExC_rx, mysv_val2, ender); + regprop(RExC_rx, mysv_val1, ret, NULL); + regprop(RExC_rx, mysv_val2, ender, NULL); PerlIO_printf(Perl_debug_log, "~ converting ret %s (%"IVdf") to ender %s (%"IVdf") offset %"IVdf"\n", SvPV_nolen_const(mysv_val1), (IV)REG_NODE_NUM(ret), @@ -15221,7 +15221,7 @@ S_regtail(pTHX_ RExC_state_t *pRExC_state, regnode *p, DEBUG_PARSE_r({ SV * const mysv=sv_newmortal(); DEBUG_PARSE_MSG((scan==p ? "tail" : "")); - regprop(RExC_rx, mysv, scan); + regprop(RExC_rx, mysv, scan, NULL); PerlIO_printf(Perl_debug_log, "~ %s (%d) %s %s\n", SvPV_nolen_const(mysv), REG_NODE_NUM(scan), (temp == NULL ? "->" : ""), @@ -15311,7 +15311,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, DEBUG_PARSE_r({ SV * const mysv=sv_newmortal(); DEBUG_PARSE_MSG((scan==p ? "tsdy" : "")); - regprop(RExC_rx, mysv, scan); + regprop(RExC_rx, mysv, scan, NULL); PerlIO_printf(Perl_debug_log, "~ %s (%d) -> %s\n", SvPV_nolen_const(mysv), REG_NODE_NUM(scan), @@ -15324,7 +15324,7 @@ S_regtail_study(pTHX_ RExC_state_t *pRExC_state, regnode *p, DEBUG_PARSE_r({ SV * const mysv_val=sv_newmortal(); DEBUG_PARSE_MSG(""); - regprop(RExC_rx, mysv_val, val); + regprop(RExC_rx, mysv_val, val, NULL); PerlIO_printf(Perl_debug_log, "~ attach to %s (%"IVdf") offset to %"IVdf"\n", SvPV_nolen_const(mysv_val), @@ -15476,7 +15476,7 @@ Perl_regdump(pTHX_ const regexp *r) PerlIO_printf(Perl_debug_log, ") "); if (ri->regstclass) { - regprop(r, sv, ri->regstclass); + regprop(r, sv, ri->regstclass, NULL); PerlIO_printf(Perl_debug_log, "stclass %s ", SvPVX_const(sv)); } if (r->intflags & PREGf_ANCH) { @@ -15513,11 +15513,11 @@ Perl_regdump(pTHX_ const regexp *r) } /* -- regprop - printable representation of opcode +- regprop - printable representation of opcode, with run time support */ void -Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) +Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o, const regmatch_info *reginfo) { #ifdef DEBUGGING dVAR; @@ -15619,7 +15619,7 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) (UV)trie->maxlen, (UV)TRIE_CHARCOUNT(trie), (UV)trie->uniquecharcount - ) + ); ); if ( IS_ANYOF_TRIE(op) || trie->bitmap ) { sv_catpvs(sv, "["); @@ -15662,6 +15662,20 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) } } } + if ( k == REF && reginfo) { + U32 n = ARG(o); /* which paren pair */ + I32 ln = prog->offs[n].start; + if (prog->lastparen < n || ln == -1) + Perl_sv_catpvf(aTHX_ sv, ": FAIL"); + else if (ln == prog->offs[n].end) + Perl_sv_catpvf(aTHX_ sv, ": ACCEPT - EMPTY STRING"); + else { + const char *s = reginfo->strbeg + ln; + Perl_sv_catpvf(aTHX_ sv, ": "); + Perl_pv_pretty( aTHX_ sv, s, prog->offs[n].end - prog->offs[n].start, 32, 0, 0, + PERL_PV_ESCAPE_UNI_DETECT|PERL_PV_PRETTY_NOCLEAR|PERL_PV_PRETTY_ELLIPSES|PERL_PV_PRETTY_QUOTE ); + } + } } else if (k == GOSUB) /* Paren and offset */ Perl_sv_catpvf(aTHX_ sv, "%d[%+d]", (int)ARG(o),(int)ARG2L(o)); @@ -15826,9 +15840,12 @@ Perl_regprop(pTHX_ const regexp *prog, SV *sv, const regnode *o) PERL_UNUSED_ARG(sv); PERL_UNUSED_ARG(o); PERL_UNUSED_ARG(prog); + PERL_UNUSED_ARG(reginfo); #endif /* DEBUGGING */ } + + SV * Perl_re_intuit_string(pTHX_ REGEXP * const r) { /* Assume that RE_INTUIT is set */ @@ -16566,7 +16583,7 @@ S_dumpuntil(pTHX_ const regexp *r, const regnode *start, const regnode *node, } else CLEAR_OPTSTART; - regprop(r, sv, node); + regprop(r, sv, node, NULL); PerlIO_printf(Perl_debug_log, "%4"IVdf":%*s%s", (IV)(node - start), (int)(2*indent + 1), "", SvPVX_const(sv)); diff --git a/regexec.c b/regexec.c index c61d212..9ceb4e3 100644 --- a/regexec.c +++ b/regexec.c @@ -2866,7 +2866,7 @@ Perl_regexec_flags(pTHX_ REGEXP * const rx, char *stringarg, char *strend, } DEBUG_EXECUTE_r({ SV * const prop = sv_newmortal(); - regprop(prog, prop, c); + regprop(prog, prop, c, reginfo); { RE_PV_QUOTED_DECL(quoted,utf8_target,PERL_DEBUG_PAD_ZERO(1), s,strend-s,60); @@ -3896,7 +3896,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, char *startpos, regnode *prog) SV * const prop = sv_newmortal(); regnode *rnext=regnext(scan); DUMP_EXEC_POS( locinput, scan, utf8_target ); - regprop(rex, prop, scan); + regprop(rex, prop, scan, reginfo); PerlIO_printf(Perl_debug_log, "%3"IVdf":%*s%s(%"IVdf")\n", @@ -7463,7 +7463,7 @@ S_regrepeat(pTHX_ regexp *prog, char **startposp, const regnode *p, GET_RE_DEBUG_FLAGS_DECL; DEBUG_EXECUTE_r({ SV * const prop = sv_newmortal(); - regprop(prog, prop, p); + regprop(prog, prop, p, reginfo); PerlIO_printf(Perl_debug_log, "%*s %s can match %"IVdf" times out of %"IVdf"...\n", REPORT_CODE_OFF + depth*2, "", SvPVX_const(prop),(IV)c,(IV)max); -- 2.7.4