re_intuit_start(): fix another utf8 slowdown

author David Mitchell <davem@iabyn.com>

Thu, 16 Jan 2014 16:00:41 +0000 (16:00 +0000)

committer David Mitchell <davem@iabyn.com>

Fri, 7 Feb 2014 22:39:36 +0000 (22:39 +0000)
author David Mitchell <davem@iabyn.com>
Thu, 16 Jan 2014 16:00:41 +0000 (16:00 +0000)
committer David Mitchell <davem@iabyn.com>
Fri, 7 Feb 2014 22:39:36 +0000 (22:39 +0000)
diff --git a/regexec.c b/regexec.c

index 627b16f..5736625 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -129,6 +129,12 @@ static const char* const non_utf8_target_but_utf8_required
  #define HOP3(pos,off,lim) (reginfo->is_utf8_target  ? reghop3((U8*)(pos), off, (U8*)(lim)) : (U8*)(pos + off))
  #define HOP3c(pos,off,lim) ((char*)HOP3(pos,off,lim))
  
+/* like HOP3, but limits the result to <= lim even for the non-utf8 case.
+ * off must be >=0; args should be vars rather than expressions */
+#define HOP3lim(pos,off,lim) (reginfo->is_utf8_target \
+    ? reghop3((U8*)(pos), off, (U8*)(lim)) \
+    : (U8*)((pos + off) > lim ? lim : (pos + off)))
+
  
  #define NEXTCHR_EOS -10 /* nextchr has fallen off the end */
  #define NEXTCHR_IS_EOS (nextchr < 0)
@@ -1004,10 +1010,16 @@ Perl_re_intuit_start(pTHX_
               * <= float_max_offset chars from the regex origin (t).
               * If this value is less than last1, use it instead.
               */
+            assert(t <= last1);
              last = 
-                CHR_DIST((U8*)last1, (U8*)t) > prog->float_max_offset
-                    ? HOP3c(t, prog->float_max_offset, strend)
-                    : last1;
+                /* this condition handles the offset==infinity case, and
+                 * is a short-cut otherwise. Although it's comparing a
+                 * byte offset to a char length, it does so in a safe way,
+                 * meaning it errs towards doing the accurate HOP3 rather
+                 * than just using last1 */
+                (last1 - t) < prog->float_max_offset
+                    ? last1
+                    : (char*)HOP3lim(t, prog->float_max_offset, last1);
  
             s = HOP3c(t, prog->float_min_offset, strend);
             if (s < other_last)
diff --git a/t/re/pat.t b/t/re/pat.t

index a052ee7..9296808 100644 (file)
--- a/t/re/pat.t
+++ b/t/re/pat.t
@@ -20,7 +20,7 @@ BEGIN {
      require './test.pl';
  }
  
-plan tests => 714;  # Update this when adding/deleting tests.
+plan tests => 715;  # Update this when adding/deleting tests.
  
  run_tests() unless caller;
  
@@ -1525,6 +1525,9 @@ EOP
  
          $s=~ /^a{1,2}x/ for  1..10_000;
          pass("RT#120692 a{1,2} mustn't run slowly");
+
+        $s=~ /ab.{1,2}x/;
+        pass("RT#120692 ab.{1,2} mustn't run slowly");
      }
  
      # These are based on looking at the code in regcomp.c
author	David Mitchell <davem@iabyn.com>
	Thu, 16 Jan 2014 16:00:41 +0000 (16:00 +0000)
committer	David Mitchell <davem@iabyn.com>
	Fri, 7 Feb 2014 22:39:36 +0000 (22:39 +0000)
regexec.c		patch \| blob \| history
t/re/pat.t		patch \| blob \| history