Unnecessary/Lingering UTF8 flag might mess up caseless matching

author Jarkko Hietaniemi <jhi@iki.fi>

Tue, 1 Jan 2002 03:35:01 +0000 (03:35 +0000)

committer Jarkko Hietaniemi <jhi@iki.fi>

Tue, 1 Jan 2002 03:35:01 +0000 (03:35 +0000)
author Jarkko Hietaniemi <jhi@iki.fi>
Tue, 1 Jan 2002 03:35:01 +0000 (03:35 +0000)
committer Jarkko Hietaniemi <jhi@iki.fi>
Tue, 1 Jan 2002 03:35:01 +0000 (03:35 +0000)
diff --git a/regexec.c b/regexec.c

index 3aed549..0f738d1 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -995,7 +995,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                         c = utf8_to_uvchr((U8*)s, &len);
                         if ( c == c1
                              && (ln == len ||
-                                !ibcmp_utf8(s, do_utf8, strend - s,
+                                !ibcmp_utf8(s, do_utf8,
+                                            strend - s > ln ? ln : strend - s,
                                              m, UTF, ln))
                              && (norun || regtry(prog, s)) )
                             goto got_it;
@@ -1007,7 +1008,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                                   && (f == c1 || f == c2)
                                   && (ln == foldlen ||
                                       !ibcmp_utf8((char *)foldbuf,
-                                                 do_utf8, foldlen,
+                                                 do_utf8,
+                                                 foldlen > ln ? ln : foldlen,
                                                   m, UTF, ln))
                                   && (norun || regtry(prog, s)) )
                                   goto got_it;
@@ -1032,7 +1034,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
  
                         if ( (c == c1 || c == c2)
                              && (ln == len ||
-                                !ibcmp_utf8(s, do_utf8, strend - s,
+                                !ibcmp_utf8(s, do_utf8,
+                                            strend - s > ln ? ln : strend - s,
                                              m, UTF, ln))
                              && (norun || regtry(prog, s)) )
                             goto got_it;
@@ -1044,7 +1047,8 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                                   && (f == c1 || f == c2)
                                   && (ln == foldlen ||
                                       !ibcmp_utf8((char *)foldbuf,
-                                                 do_utf8, foldlen,
+                                                 do_utf8,
+                                                 foldlen > ln ? ln : foldlen,
                                                   m, UTF, ln))
                                   && (norun || regtry(prog, s)) )
                                   goto got_it;
diff --git a/t/op/pat.t b/t/op/pat.t

index 0eda689..b797bdf 100755 (executable)
--- a/t/op/pat.t
+++ b/t/op/pat.t
@@ -6,7 +6,7 @@
  
  $| = 1;
  
-print "1..825\n";
+print "1..828\n";
  
  BEGIN {
      chdir 't' if -d 't';
@@ -2520,3 +2520,41 @@ print "# some Unicode properties\n";
         $& eq "franc\N{COMBINING CEDILLA}ais" ?
         "ok 825\n" : "not ok 825\n";
  }
+
+{
+    print "# Does lingering (and useless) UTF8 flag mess up /i matching?\n";
+
+    {
+       my $regex  = "ABcde";
+       my $string = "abcDE\x{100}";
+       chop($string);
+       if ($string =~ m/$regex/i) {
+           print "ok 826\n";
+       } else {
+           print "not ok 826\n";
+       }
+    }
+
+    {
+       my $regex  = "ABcde\x{100}";
+       my $string = "abcDE";
+       chop($regex);
+       if ($string =~ m/$regex/i) {
+           print "ok 827\n";
+       } else {
+           print "not ok 827\n";
+       }
+    }
+
+    {
+       my $regex  = "ABcde\x{100}";
+       my $string = "abcDE\x{100}";
+       chop($regex);
+       chop($string);
+       if ($string =~ m/$regex/i) {
+           print "ok 828\n";
+       } else {
+           print "not ok 828\n";
+       }
+    }
+}
diff --git a/utf8.c b/utf8.c

index 54ab529..0051796 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1672,9 +1672,9 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
       register U8 *be = b + len2;
       STRLEN la, lb;
       UV ca, cb;
-     STRLEN ulen1, ulen2;
-     U8 tmpbuf1[UTF8_MAXLEN_FOLD+1];
-     U8 tmpbuf2[UTF8_MAXLEN_FOLD+1];
+     STRLEN foldlen1, foldlen2;
+     U8 foldbuf1[UTF8_MAXLEN_FOLD+1];
+     U8 foldbuf2[UTF8_MAXLEN_FOLD+1];
       
       while (a < ae && b < be) {
           if (u1) {
@@ -1682,7 +1682,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
                     break;
                ca = utf8_to_uvchr((U8*)a, &la);
           } else {
-              ca = *a;
+              ca = NATIVE_TO_UNI(*a);
                la = 1;
           }
           if (u2) {
@@ -1690,21 +1690,17 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, bool u1, register I32 len1, const char *s2
                     break;
                cb = utf8_to_uvchr((U8*)b, &lb);
           } else {
-              cb = *b;
+              cb = NATIVE_TO_UNI(*b);
                lb = 1;
           }
           if (ca != cb) {
-              if (u1)
-                   to_uni_fold(NATIVE_TO_UNI(ca), tmpbuf1, &ulen1);
-              else
-                   ulen1 = 1;
-              if (u2)
-                   to_uni_fold(NATIVE_TO_UNI(cb), tmpbuf2, &ulen2);
-              else
-                   ulen2 = 1;
-              if (ulen1 != ulen2
-                  || (ca < 256 && cb < 256 && ca != PL_fold[cb])
-                  || memNE((char *)tmpbuf1, (char *)tmpbuf2, ulen1))
+              to_uni_fold(ca, foldbuf1, &foldlen1);
+              ca = utf8_to_uvchr(foldbuf1, 0);
+              
+              to_uni_fold(cb, foldbuf2, &foldlen2);
+              cb = utf8_to_uvchr(foldbuf2, 0);
+
+              if (ca != cb || foldlen1 != foldlen2)
                     return 1; /* mismatch */
           }
           a += la;
author	Jarkko Hietaniemi <jhi@iki.fi>
	Tue, 1 Jan 2002 03:35:01 +0000 (03:35 +0000)
committer	Jarkko Hietaniemi <jhi@iki.fi>
	Tue, 1 Jan 2002 03:35:01 +0000 (03:35 +0000)
regexec.c		patch \| blob \| history
t/op/pat.t		patch \| blob \| history
utf8.c		patch \| blob \| history