From: David Mitchell <davem@iabyn.com>
Date: Sun, 26 Jan 2014 14:19:47 +0000 (+0000)
Subject: regex substrs: record index of check substr
X-Git-Tag: upstream/5.20.0~464^2~42
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=6480a6c448dec40aad54025b06ea6b8bdbc54527;p=platform%2Fupstream%2Fperl.git

regex substrs: record index of check substr

Currently prog->substrs->data[] is a 3 element array of structures.
Elements 0 and 1 record the longest anchored and floating substrings,
while element 2 ('check'), is a copy of the longest of 0 and 1.

Record in a new field, prog->substrs->check_ix, the index of which element
was copied. (Eventually I intend to remove the copy altogether.)

Also for the anchored substr, set max_offset equal to min offset.
Previously it was left as zero and ignored, although if copied to check,
the check copy of max *was* set equal to min. Having this always set will
allow us to make the code simpler.
---

diff --git a/regcomp.c b/regcomp.c
index a657252..a82171a 100644
--- a/regcomp.c
+++ b/regcomp.c
@@ -6953,6 +6953,7 @@ reStudy:
         /* A temporary algorithm prefers floated substr to fixed one to dig
          * more info. */
 	if (longest_fixed_length > longest_float_length) {
+	    r->substrs->check_ix = 0;
 	    r->check_end_shift = r->anchored_end_shift;
 	    r->check_substr = r->anchored_substr;
 	    r->check_utf8 = r->anchored_utf8;
@@ -6961,6 +6962,7 @@ reStudy:
                 r->intflags |= PREGf_NOSCAN;
 	}
 	else {
+	    r->substrs->check_ix = 1;
 	    r->check_end_shift = r->float_end_shift;
 	    r->check_substr = r->float_substr;
 	    r->check_utf8 = r->float_utf8;
@@ -6972,6 +6974,8 @@ reStudy:
 	    if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
 		r->extflags |= RXf_INTUIT_TAIL;
 	}
+        r->substrs->data[0].max_offset = r->substrs->data[0].min_offset;
+
 	/* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
 	if ( (STRLEN)minlen < longest_float_length )
             minlen= longest_float_length;
diff --git a/regexec.c b/regexec.c
index 140bf8b..e1b4e8b 100644
--- a/regexec.c
+++ b/regexec.c
@@ -644,6 +644,7 @@ Perl_re_intuit_start(pTHX_
     SV *check;
     char *t;
     const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
+    U8   other_ix = 1 - prog->substrs->check_ix;
     bool ml_anch = 0;
     char *other_last = NULL;	/* other substr already checked this high */
     char *check_at = NULL;		/* check substr found at this pos */
@@ -899,14 +900,15 @@ Perl_re_intuit_start(pTHX_
        Probably it is right to do no SCREAM here...
      */
 
-    if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
-                : (prog->float_substr && prog->anchored_substr)) 
+    if (utf8_target ? prog->substrs->data[other_ix].utf8_substr
+                    : prog->substrs->data[other_ix].substr)
     {
 	/* Take into account the "other" substring. */
 	/* XXXX May be hopelessly wrong for UTF... */
 	if (!other_last)
 	    other_last = strpos;
-	if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
+
+	if (prog->substrs->check_ix) {
 	  do_other_anchored:
 	    {
 		char * last;
diff --git a/regexp.h b/regexp.h
index eba5974..d32e669 100644
--- a/regexp.h
+++ b/regexp.h
@@ -43,6 +43,7 @@ struct reg_substr_datum {
     SSize_t end_shift;  /* how many fixed chars must end the string */
 };
 struct reg_substr_data {
+    U8      check_ix;   /* index into data[] of check substr */
     struct reg_substr_datum data[3];	/* Actual array */
 };