regex substrs: record index of check substr

author David Mitchell <davem@iabyn.com>

Sun, 26 Jan 2014 14:19:47 +0000 (14:19 +0000)

committer David Mitchell <davem@iabyn.com>

Fri, 7 Feb 2014 22:39:37 +0000 (22:39 +0000)
author David Mitchell <davem@iabyn.com>
Sun, 26 Jan 2014 14:19:47 +0000 (14:19 +0000)
committer David Mitchell <davem@iabyn.com>
Fri, 7 Feb 2014 22:39:37 +0000 (22:39 +0000)
diff --git a/regcomp.c b/regcomp.c

index a657252..a82171a 100644 (file)
--- a/regcomp.c
+++ b/regcomp.c
@@ -6953,6 +6953,7 @@ reStudy:
          /* A temporary algorithm prefers floated substr to fixed one to dig
           * more info. */
         if (longest_fixed_length > longest_float_length) {
+           r->substrs->check_ix = 0;
             r->check_end_shift = r->anchored_end_shift;
             r->check_substr = r->anchored_substr;
             r->check_utf8 = r->anchored_utf8;
@@ -6961,6 +6962,7 @@ reStudy:
                  r->intflags |= PREGf_NOSCAN;
         }
         else {
+           r->substrs->check_ix = 1;
             r->check_end_shift = r->float_end_shift;
             r->check_substr = r->float_substr;
             r->check_utf8 = r->float_utf8;
@@ -6972,6 +6974,8 @@ reStudy:
             if (SvTAIL(r->check_substr ? r->check_substr : r->check_utf8))
                 r->extflags |= RXf_INTUIT_TAIL;
         }
+        r->substrs->data[0].max_offset = r->substrs->data[0].min_offset;
+
         /* XXX Unneeded? dmq (shouldn't as this is handled elsewhere)
         if ( (STRLEN)minlen < longest_float_length )
              minlen= longest_float_length;
diff --git a/regexec.c b/regexec.c

index 140bf8b..e1b4e8b 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -644,6 +644,7 @@ Perl_re_intuit_start(pTHX_
      SV *check;
      char *t;
      const bool utf8_target = (sv && SvUTF8(sv)) ? 1 : 0; /* if no sv we have to assume bytes */
+    U8   other_ix = 1 - prog->substrs->check_ix;
      bool ml_anch = 0;
      char *other_last = NULL;   /* other substr already checked this high */
      char *check_at = NULL;             /* check substr found at this pos */
@@ -899,14 +900,15 @@ Perl_re_intuit_start(pTHX_
         Probably it is right to do no SCREAM here...
       */
  
-    if (utf8_target ? (prog->float_utf8 && prog->anchored_utf8)
-                : (prog->float_substr && prog->anchored_substr)) 
+    if (utf8_target ? prog->substrs->data[other_ix].utf8_substr
+                    : prog->substrs->data[other_ix].substr)
      {
         /* Take into account the "other" substring. */
         /* XXXX May be hopelessly wrong for UTF... */
         if (!other_last)
             other_last = strpos;
-       if (check == (utf8_target ? prog->float_utf8 : prog->float_substr)) {
+
+       if (prog->substrs->check_ix) {
           do_other_anchored:
             {
                 char * last;
diff --git a/regexp.h b/regexp.h

index eba5974..d32e669 100644 (file)
--- a/regexp.h
+++ b/regexp.h
@@ -43,6 +43,7 @@ struct reg_substr_datum {
      SSize_t end_shift;  /* how many fixed chars must end the string */
  };
  struct reg_substr_data {
+    U8      check_ix;   /* index into data[] of check substr */
      struct reg_substr_datum data[3];   /* Actual array */
  };
author	David Mitchell <davem@iabyn.com>
	Sun, 26 Jan 2014 14:19:47 +0000 (14:19 +0000)
committer	David Mitchell <davem@iabyn.com>
	Fri, 7 Feb 2014 22:39:37 +0000 (22:39 +0000)
regcomp.c		patch \| blob \| history
regexec.c		patch \| blob \| history
regexp.h		patch \| blob \| history