From: David Mitchell Date: Sun, 26 Jan 2014 14:19:47 +0000 (+0000) Subject: regex substrs: record index of check substr X-Git-Tag: upstream/5.20.0~464^2~42 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6480a6c448dec40aad54025b06ea6b8bdbc54527;p=platform%2Fupstream%2Fperl.git regex substrs: record index of check substr Currently prog->substrs->data[] is a 3 element array of structures. Elements 0 and 1 record the longest anchored and floating substrings, while element 2 ('check'), is a copy of the longest of 0 and 1. Record in a new field, prog->substrs->check_ix, the index of which element was copied. (Eventually I intend to remove the copy altogether.) Also for the anchored substr, set max_offset equal to min offset. Previously it was left as zero and ignored, although if copied to check, the check copy of max *was* set equal to min. Having this always set will allow us to make the code simpler. --- diff --git a/regcomp.c b/regcomp.c index a657252..a82171a 100644 --- a/regcomp.c +++ b/regcomp.c @@ -6953,6 +6953,7 @@ reStudy: /* A temporary algorithm prefers floated substr to fixed one to dig * more info. */ if (longest_fixed_length > longest_float_length) { + r->substrs->check_ix = 0; r->check_end_shift = r->anchored_end_shift; r->check_substr = r->anchored_substr; r->check_utf8 = r->anchored_utf8; @@ -6961,6 +6962,7 @@ reStudy: r->intflags |= PREGf_NOSCAN; } else { + r->substrs->check_ix = 1; r->check_end_shift = r->float_end_shift; r->check_substr = r->float_substr; r->check_utf8 = r->float_utf8; @@ -6972,6 +6974,8 @@ reStudy: if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8)) r->extflags |= RXf_INTUIT_TAIL; } + r->substrs->data[0].max_offset = r->substrs->data[0].min_offset; + /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere) if ( (STRLEN)minlen < longest_float_length ) minlen= longest_float_length; diff --git a/regexec.c b/regexec.c index 140bf8b..e1b4e8b 100644 --- a/regexec.c +++ b/regexec.c @@ -644,6 +644,7 @@ Perl_re_intuit_start(pTHX_ SV *check; char *t; const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */ + U8 other_ix = 1 - prog->substrs->check_ix; bool ml_anch = 0; char *other_last = NULL; /* other substr already checked this high */ char *check_at = NULL; /* check substr found at this pos */ @@ -899,14 +900,15 @@ Perl_re_intuit_start(pTHX_ Probably it is right to do no SCREAM here... */ - if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8) - : (prog->float_substr && prog->anchored_substr)) + if (utf8_target ? prog->substrs->data[other_ix].utf8_substr + : prog->substrs->data[other_ix].substr) { /* Take into account the "other" substring. */ /* XXXX May be hopelessly wrong for UTF... */ if (!other_last) other_last = strpos; - if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) { + + if (prog->substrs->check_ix) { do_other_anchored: { char * last; diff --git a/regexp.h b/regexp.h index eba5974..d32e669 100644 --- a/regexp.h +++ b/regexp.h @@ -43,6 +43,7 @@ struct reg_substr_datum { SSize_t end_shift; /* how many fixed chars must end the string */ }; struct reg_substr_data { + U8 check_ix; /* index into data[] of check substr */ struct reg_substr_datum data[3]; /* Actual array */ };