utf8 pos cache: always keep most recent value

author David Mitchell <davem@iabyn.com>

Tue, 21 Aug 2012 09:55:00 +0000 (10:55 +0100)

committer David Mitchell <davem@iabyn.com>

Tue, 21 Aug 2012 09:55:00 +0000 (10:55 +0100)
author David Mitchell <davem@iabyn.com>
Tue, 21 Aug 2012 09:55:00 +0000 (10:55 +0100)
committer David Mitchell <davem@iabyn.com>
Tue, 21 Aug 2012 09:55:00 +0000 (10:55 +0100)
diff --git a/sv.c b/sv.c

index 2004224..904f4bd 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -6928,7 +6928,6 @@ S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN b
            calculation in bytes simply because we always know the byte
            length.  squareroot has the same ordering as the positive value,
            so don't bother with the actual square root.  */
-       const float existing = THREEWAY_SQUARE(0, cache[3], cache[1], blen);
         if (byte > cache[1]) {
             /* New position is after the existing pair of pairs.  */
             const float keep_earlier
@@ -6937,18 +6936,14 @@ S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN b
                 = THREEWAY_SQUARE(0, cache[1], byte, blen);
  
             if (keep_later < keep_earlier) {
-               if (keep_later < existing) {
-                   cache[2] = cache[0];
-                   cache[3] = cache[1];
-                   cache[0] = utf8;
-                   cache[1] = byte;
-               }
+                cache[2] = cache[0];
+                cache[3] = cache[1];
+                cache[0] = utf8;
+                cache[1] = byte;
             }
             else {
-               if (keep_earlier < existing) {
-                   cache[0] = utf8;
-                   cache[1] = byte;
-               }
+                cache[0] = utf8;
+                cache[1] = byte;
             }
         }
         else if (byte > cache[3]) {
@@ -6959,16 +6954,12 @@ S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN b
                 = THREEWAY_SQUARE(0, byte, cache[1], blen);
  
             if (keep_later < keep_earlier) {
-               if (keep_later < existing) {
-                   cache[2] = utf8;
-                   cache[3] = byte;
-               }
+                cache[2] = utf8;
+                cache[3] = byte;
             }
             else {
-               if (keep_earlier < existing) {
-                   cache[0] = utf8;
-                   cache[1] = byte;
-               }
+                cache[0] = utf8;
+                cache[1] = byte;
             }
         }
         else {
@@ -6979,18 +6970,14 @@ S_utf8_mg_pos_cache_update(pTHX_ SV *const sv, MAGIC **const mgp, const STRLEN b
                 = THREEWAY_SQUARE(0, byte, cache[1], blen);
  
             if (keep_later < keep_earlier) {
-               if (keep_later < existing) {
-                   cache[2] = utf8;
-                   cache[3] = byte;
-               }
+                cache[2] = utf8;
+                cache[3] = byte;
             }
             else {
-               if (keep_earlier < existing) {
-                   cache[0] = cache[2];
-                   cache[1] = cache[3];
-                   cache[2] = utf8;
-                   cache[3] = byte;
-               }
+                cache[0] = cache[2];
+                cache[1] = cache[3];
+                cache[2] = utf8;
+                cache[3] = byte;
             }
         }
      }
diff --git a/t/op/utf8cache.t b/t/op/utf8cache.t

index 7ac0011..83ad4e8 100644 (file)
--- a/t/op/utf8cache.t
+++ b/t/op/utf8cache.t
@@ -10,7 +10,7 @@ BEGIN {
  
  use strict;
  
-plan(tests => 1);
+plan(tests => 2);
  
  my $pid = open CHILD, '-|';
  die "kablam: $!\n" unless defined $pid;
@@ -35,3 +35,15 @@ my $utf8magic = qr{ ^ \s+ MAGIC \s = .* \n
                        \s+ MG_LEN \s = .* \n }xm;
  
  unlike($_, qr{ $utf8magic $utf8magic }x);
+
+# With bad caching, this code used to go quadratic and take 10s of minutes.
+# The 'test' in this case is simply that it doesn't hang.
+
+{
+    local ${^UTF8CACHE} = 1; # enable cache, disable debugging
+    my $x = "\x{100}" x 1000000;
+    while ($x =~ /./g) {
+       my $p = pos($x);
+    }
+    pass("quadratic pos");
+}
author	David Mitchell <davem@iabyn.com>
	Tue, 21 Aug 2012 09:55:00 +0000 (10:55 +0100)
committer	David Mitchell <davem@iabyn.com>
	Tue, 21 Aug 2012 09:55:00 +0000 (10:55 +0100)
sv.c		patch \| blob \| history
t/op/utf8cache.t		patch \| blob \| history