From e7707071e420c5a715c0621d0428dd393503e884 Mon Sep 17 00:00:00 2001 From: Yves Orton Date: Wed, 28 Feb 2007 18:45:33 +0100 Subject: [PATCH] Re: New file: t/op/regexp_email.t Message-ID: <9b18b3110702280845p7860ca08taf1aead39a178aa4@mail.gmail.com> p4raw-id: //depot/perl@30436 --- MANIFEST | 1 + ext/re/re.pm | 10 ++++-- regcomp.h | 4 +++ regexec.c | 23 +++++++------ t/op/regexp_email.t | 94 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 118 insertions(+), 14 deletions(-) create mode 100644 t/op/regexp_email.t diff --git a/MANIFEST b/MANIFEST index 02db239..b70220b 100644 --- a/MANIFEST +++ b/MANIFEST @@ -3616,6 +3616,7 @@ t/op/regexp_qr_embed.t See if regular expressions work with embedded qr// t/op/regexp_qr.t See if regular expressions work as qr// t/op/regexp.t See if regular expressions work t/op/regexp_trielist.t See if regular expressions work with trie optimisation +t/op/regexp_email.t See if regex recursion works by parsing email addresses t/op/regmesg.t See if one can get regular expression errors t/op/repeat.t See if x operator works t/op/re_tests Regular expressions for regexp.t diff --git a/ext/re/re.pm b/ext/re/re.pm index 4f8d410..c33ca3c 100644 --- a/ext/re/re.pm +++ b/ext/re/re.pm @@ -67,8 +67,9 @@ my %flags = ( STATE => 0x080000, OPTIMISEM => 0x100000, STACK => 0x280000, + BUFFERS => 0x400000, ); -$flags{ALL} = -1; +$flags{ALL} = -1 & ~($flags{OFFSETS}|$flags{OFFSETSDBG}|$flags{BUFFERS}); $flags{All} = $flags{all} = $flags{DUMP} | $flags{EXECUTE}; $flags{Extra} = $flags{EXECUTE} | $flags{COMPILE}; $flags{More} = $flags{MORE} = $flags{All} | $flags{TRIEC} | $flags{TRIEM} | $flags{STATE}; @@ -323,6 +324,11 @@ Enable debugging of start point optimisations. Turns on all "extra" debugging options. +=item BUFFERS + +Enable debugging the capture buffer storage during match. Warning, +this can potentially produce extremely large output. + =item TRIEM Enable enhanced TRIE debugging. Enhances both TRIEE @@ -373,7 +379,7 @@ These are useful shortcuts to save on the typing. =item ALL -Enable all compile and execute options at once. +Enable all options at once except OFFSETS, OFFSETSDBG and BUFFERS =item All diff --git a/regcomp.h b/regcomp.h index b07a63f..72f415a 100644 --- a/regcomp.h +++ b/regcomp.h @@ -695,6 +695,7 @@ re.pm, especially to the documentation. #define RE_DEBUG_EXTRA_OFFDEBUG 0x040000 #define RE_DEBUG_EXTRA_STATE 0x080000 #define RE_DEBUG_EXTRA_OPTIMISE 0x100000 +#define RE_DEBUG_EXTRA_BUFFERS 0x400000 /* combined */ #define RE_DEBUG_EXTRA_STACK 0x280000 @@ -732,6 +733,9 @@ re.pm, especially to the documentation. if (re_debug_flags & RE_DEBUG_EXTRA_STATE) x ) #define DEBUG_STACK_r(x) DEBUG_r( \ if (re_debug_flags & RE_DEBUG_EXTRA_STACK) x ) +#define DEBUG_BUFFERS_r(x) DEBUG_r( \ + if (re_debug_flags & RE_DEBUG_EXTRA_BUFFERS) x ) + #define DEBUG_OPTIMISE_MORE_r(x) DEBUG_r( \ if ((RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE) == \ (re_debug_flags & (RE_DEBUG_EXTRA_OPTIMISE|RE_DEBUG_COMPILE_OPTIMISE)) ) x ) diff --git a/regexec.c b/regexec.c index 1ae9842..d1f6b8e 100644 --- a/regexec.c +++ b/regexec.c @@ -193,7 +193,7 @@ S_regcppush(pTHX_ I32 parenfloor) SSPUSHINT(PL_regstartp[p]); SSPUSHPTR(PL_reg_start_tmp[p]); SSPUSHINT(p); - DEBUG_EXECUTE_r(PerlIO_printf(Perl_debug_log, + DEBUG_BUFFERS_r(PerlIO_printf(Perl_debug_log, " saving \\%"UVuf" %"IVdf"(%"IVdf")..%"IVdf"\n", (UV)p, (IV)PL_regstartp[p], (IV)(PL_reg_start_tmp[p] - PL_bostr), @@ -263,7 +263,7 @@ S_regcppop(pTHX_ const regexp *rex) tmps = SSPOPINT; if (paren <= *PL_reglastparen) PL_regendp[paren] = tmps; - DEBUG_EXECUTE_r( + DEBUG_BUFFERS_r( PerlIO_printf(Perl_debug_log, " restoring \\%"UVuf" to %"IVdf"(%"IVdf")..%"IVdf"%s\n", (UV)paren, (IV)PL_regstartp[paren], @@ -272,7 +272,7 @@ S_regcppop(pTHX_ const regexp *rex) (paren > *PL_reglastparen ? "(no)" : "")); ); } - DEBUG_EXECUTE_r( + DEBUG_BUFFERS_r( if (*PL_reglastparen + 1 <= rex->nparens) { PerlIO_printf(Perl_debug_log, " restoring \\%"IVdf"..\\%"IVdf" to undef\n", @@ -3568,8 +3568,8 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) regnode *startpoint; case GOSTART: - case GOSUB: /* /(...(?1))/ */ - if (cur_eval && cur_eval->locinput==locinput) { + case GOSUB: /* /(...(?1))/ /(...(?&foo))/ */ + if (cur_eval && cur_eval->locinput==locinput) { if (cur_eval->u.eval.close_paren == (U32)ARG(scan)) Perl_croak(aTHX_ "Infinite recursion in regex"); if ( ++nochange_depth > max_nochange_depth ) @@ -3742,7 +3742,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) cur_curlyx = ST.prev_curlyx; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; - if ( nochange_depth > 0 ); + if ( nochange_depth ) nochange_depth--; sayYES; @@ -3760,7 +3760,7 @@ S_regmatch(pTHX_ regmatch_info *reginfo, regnode *prog) cur_curlyx = ST.prev_curlyx; /* XXXX This is too dramatic a measure... */ PL_reg_maxiter = 0; - if ( nochange_depth > 0 ); + if ( nochange_depth ) nochange_depth--; sayNO_SILENT; #undef ST @@ -4755,8 +4755,6 @@ NULL if (cur_eval) { /* we've just finished A in /(??{A})B/; now continue with B */ I32 tmpix; - - st->u.eval.toggle_reg_flags = cur_eval->u.eval.toggle_reg_flags; PL_reg_flags ^= st->u.eval.toggle_reg_flags; @@ -4782,9 +4780,10 @@ NULL DEBUG_EXECUTE_r( PerlIO_printf(Perl_debug_log, "%*s EVAL trying tail ... %"UVxf"\n", REPORT_CODE_OFF+depth*2, "",PTR2UV(cur_eval));); - if ( nochange_depth > 0 ); - nochange_depth++; - PUSH_YES_STATE_GOTO(EVAL_AB, + if ( nochange_depth ) + nochange_depth--; + + PUSH_YES_STATE_GOTO(EVAL_AB, st->u.eval.prev_eval->u.eval.B); /* match B */ } diff --git a/t/op/regexp_email.t b/t/op/regexp_email.t new file mode 100644 index 0000000..c53dd82 --- /dev/null +++ b/t/op/regexp_email.t @@ -0,0 +1,94 @@ +#!./perl +# +# Tests to make sure the regexp engine doesn't run into limits too soon. +# + +BEGIN { + chdir 't' if -d 't'; + @INC = '../lib'; +} + +print "1..13\n"; + +my $email = qr { + (?(DEFINE) + (?
(?&mailbox) | (?&group)) + (? (?&name_addr) | (?&addr_spec)) + (? (?&display_name)? (?&angle_addr)) + (? (?&CFWS)? < (?&addr_spec) > (?&CFWS)?) + (? (?&display_name) : (?:(?&mailbox_list) | (?&CFWS))? ; + (?&CFWS)?) + (? (?&phrase)) + (? (?&mailbox) (?: , (?&mailbox))*) + + (? (?&local_part) \@ (?&domain)) + (? (?&dot_atom) | (?"ed_string)) + (? (?&dot_atom) | (?&domain_literal)) + (? (?&CFWS)? \[ (?: (?&FWS)? (?&dcontent))* (?&FWS)? + \] (?&CFWS)?) + (? (?&dtext) | (?"ed_pair)) + (? (?&NO_WS_CTL) | [\x21-\x5a\x5e-\x7e]) + + (? (?&ALPHA) | (?&DIGIT) | [!#\$%&'*+-/=?^_`{|}~]) + (? (?&CFWS)? (?&atext)+ (?&CFWS)?) + (? (?&CFWS)? (?&dot_atom_text) (?&CFWS)?) + (? (?&atext)+ (?: \. (?&atext)+)*) + + (? [\x01-\x09\x0b\x0c\x0e-\x7f]) + (? \\ (?&text)) + + (? (?&NO_WS_CTL) | [\x21\x23-\x5b\x5d-\x7e]) + (? (?&qtext) | (?"ed_pair)) + (? (?&CFWS)? (?&DQUOTE) (?:(?&FWS)? (?&qcontent))* + (?&FWS)? (?&DQUOTE) (?&CFWS)?) + + (? (?&atom) | (?"ed_string)) + (? (?&word)+) + + # Folding white space + (? (?: (?&WSP)* (?&CRLF))? (?&WSP)+) + (? (?&NO_WS_CTL) | [\x21-\x27\x2a-\x5b\x5d-\x7e]) + (? (?&ctext) | (?"ed_pair) | (?&comment)) + (? \( (?: (?&FWS)? (?&ccontent))* (?&FWS)? \) ) + (? (?: (?&FWS)? (?&comment))* + (?: (?:(?&FWS)? (?&comment)) | (?&FWS))) + + # No whitespace control + (? [\x01-\x08\x0b\x0c\x0e-\x1f\x7f]) + + (? [A-Za-z]) + (? [0-9]) + (? \x0d \x0a) + (? ") + (? [\x20\x09]) + ) + + (?&address) +}x; + +my $count = 0; + +$| = 1; +while () { + chomp; + next if /^#/; + print /^$email$/ ? "ok " : "not ok ", ++ $count, "\n"; +} + +# +# Acme::MetaSyntactic ++ +# +__DATA__ +Jeff_Tracy@thunderbirds.org +"Lady Penelope"@thunderbirds.org +"The\ Hood"@thunderbirds.org +fred @ flintstones.net +barney (rubble) @ flintstones.org +bammbamm (bam! bam! (bam! bam! (bam!)) bam!) @ flintstones.org +Michelangelo@[127.0.0.1] +Donatello @ [127.0.0.1] +Raphael (He as well) @ [127.0.0.1] +"Leonardo" @ [127.0.0.1] +Barbapapa +"Barba Mama" +Barbalala (lalalalalalalala) -- 2.7.4