One more sharp s case found by Jeffrey.

author Jarkko Hietaniemi <jhi@iki.fi>

Sun, 13 Jan 2002 05:13:03 +0000 (05:13 +0000)

committer Jarkko Hietaniemi <jhi@iki.fi>

Sun, 13 Jan 2002 05:13:03 +0000 (05:13 +0000)
author Jarkko Hietaniemi <jhi@iki.fi>
Sun, 13 Jan 2002 05:13:03 +0000 (05:13 +0000)
committer Jarkko Hietaniemi <jhi@iki.fi>
Sun, 13 Jan 2002 05:13:03 +0000 (05:13 +0000)
diff --git a/regexec.c b/regexec.c

index 3dba4c9..3380ad5 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -2373,8 +2373,20 @@ S_regmatch(pTHX_ regnode *prog)
                 char *e = PL_regeol;
  
                 if (ibcmp_utf8(s, 0,  ln, do_utf8,
-                              l, &e, 0,  UTF))
-                    sayNO;
+                              l, &e, 0,  UTF)) {
+                    /* One more case for the sharp s:
+                     * pack("U0U*", 0xDF) =~ /ss/i,
+                     * the 0xC3 0x9F are the UTF-8
+                     * byte sequence for the U+00DF. */
+                    if (!(do_utf8 &&
+                          toLOWER(s[0]) == 's' &&
+                          ln >= 2 &&
+                          toLOWER(s[1]) == 's' &&
+                          (U8)l[0] == 0xC3 &&
+                          e - l >= 2 &&
+                          (U8)l[1] == 0x9F))
+                         sayNO;
+               }
                 locinput = e;
                 nextchr = UCHARAT(locinput);
                 break;
diff --git a/t/op/pat.t b/t/op/pat.t

index 8de9b82..39bdbee 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
  
  $| = 1;
  
-print "1..852\n";
+print "1..853\n";
  
  BEGIN {
      chdir 't' if -d 't';
@@ -2607,15 +2607,21 @@ print "# some Unicode properties\n";
  
      print "SS" =~
         /[\N{LATIN SMALL LETTER SHARP S}]/i ? "ok 842\n" : "not ok 842\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~ /ss/i ?
+       "ok 843\n" : "not ok 843\n";
+
+    print "\N{LATIN SMALL LETTER SHARP S}" =~ /SS/i ?
+       "ok 844\n" : "not ok 844\n";
  }
  
  {
      print "# more whitespace: U+0085, U+2028, U+2029\n";
  
      # U+0085 needs to be forced to be Unicode, the \x{100} does that.
-    print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 843\n" : "not ok 843\n";
-    print "<\x{2028}>" =~ /<\s>/ ? "ok 844\n" : "not ok 844\n";
-    print "<\x{2029}>" =~ /<\s>/ ? "ok 845\n" : "not ok 845\n";
+    print "<\x{100}\x{0085}>" =~ /<\x{100}\s>/ ? "ok 845\n" : "not ok 845\n";
+    print "<\x{2028}>" =~ /<\s>/ ? "ok 846\n" : "not ok 846\n";
+    print "<\x{2029}>" =~ /<\s>/ ? "ok 847\n" : "not ok 847\n";
  }
  
  {
@@ -2626,7 +2632,7 @@ print "# some Unicode properties\n";
      # This is not expected to match: the point is that
      # neither should we get "Malformed UTF-8" warnings.
      print $s =~ /\G(.+?)\n/gcs ?
-       "not ok 846\n" : "ok 846\n";
+       "not ok 848\n" : "ok 848\n";
  
      my @c;
  
@@ -2634,7 +2640,7 @@ print "# some Unicode properties\n";
         push @c, $1;
      }
  
-    print join("", @c) eq $s ? "ok 847\n" : "not ok 847\n";
+    print join("", @c) eq $s ? "ok 849\n" : "not ok 849\n";
  
      my $t1 = "Q003\n\n\x{e4}\x{f6}\n\nQ004\n\n\x{e7}"; # test only chars < 256
      my $r1 = "";
@@ -2648,14 +2654,14 @@ print "# some Unicode properties\n";
         $r2 .= $1 . $2;
      }
      $r2 =~ s/\x{100}//;
-    print $r1 eq $r2 ? "ok 848\n" : "not ok 848\n";
+    print $r1 eq $r2 ? "ok 850\n" : "not ok 850\n";
  }
  
  {
      print "# Unicode lookbehind\n";
  
-    print "A\x{100}B"        =~ /(?<=A.)B/  ? "ok 849\n" : "not ok 849\n";
-    print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 850\n" : "not ok 850\n";
-    print "\x{400}AB"        =~ /(?<=\x{400}.)B/ ? "ok 851\n" : "not ok 851\n";
-    print "\x{500\x{600}}B"  =~ /(?<=\x{500}.)B/ ? "ok 852\n" : "not ok 852\n";
+    print "A\x{100}B"        =~ /(?<=A.)B/  ? "ok 851\n" : "not ok 851\n";
+    print "A\x{200}\x{300}B" =~ /(?<=A..)B/ ? "ok 852\n" : "not ok 852\n";
+    print "\x{400}AB"        =~ /(?<=\x{400}.)B/ ? "ok 853\n" : "not ok 853\n";
+    print "\x{500\x{600}}B"  =~ /(?<=\x{500}.)B/ ? "ok 854\n" : "not ok 854\n";
  }
author	Jarkko Hietaniemi <jhi@iki.fi>
	Sun, 13 Jan 2002 05:13:03 +0000 (05:13 +0000)
committer	Jarkko Hietaniemi <jhi@iki.fi>
	Sun, 13 Jan 2002 05:13:03 +0000 (05:13 +0000)
regexec.c		patch \| blob \| history
t/op/pat.t		patch \| blob \| history