The Malformed UTF-8 Heisenbug seen by Merijn and NickC
authorJarkko Hietaniemi <jhi@iki.fi>
Fri, 1 Feb 2002 05:17:59 +0000 (05:17 +0000)
committerJarkko Hietaniemi <jhi@iki.fi>
Fri, 1 Feb 2002 05:17:59 +0000 (05:17 +0000)
I got it in Tru64 + ithreads but only without -g, took
some debugging by printf (which was no fun either since
adding some debug printfs hid the error)

p4raw-id: //depot/perl@14511

regexec.c
utf8.c

index 6512986..70d401d 100644 (file)
--- a/regexec.c
+++ b/regexec.c
@@ -1043,7 +1043,7 @@ S_find_byclass(pTHX_ regexp * prog, regnode *c, char *s, char *strend, char *sta
                             if ( f != c
                                  && (f == c1 || f == c2)
                                  && (ln == foldlen ||
-                                     !ibcmp_utf8((char *)foldbuf,
+                                     !ibcmp_utf8((char *) foldbuf,
                                                  (char **)0, foldlen, do_utf8,
                                                  m,
                                                  (char **)0, ln,      UTF))
diff --git a/utf8.c b/utf8.c
index cf3f48d..60933cd 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -1285,7 +1285,7 @@ to the hash is by Perl_to_utf8_case().
  */
 
 UV
-Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal, char *special)
+Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp, char *normal, char *special)
 {
     UV uv;
 
@@ -1305,6 +1305,7 @@ Perl_to_utf8_case(pTHX_ U8 *p, U8* ustrp, STRLEN *lenp, SV **swashp,char *normal
              SV *val = HeVAL(he);
              char *s = SvPV(val, *lenp);
              U8 c = *(U8*)s;
+
              if (*lenp > 1 || UNI_IS_INVARIANT(c))
                   Copy(s, ustrp, *lenp, U8);
              else {
@@ -1807,6 +1808,9 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const
      if ((e1 == 0 && f1 == 0) || (e2 == 0 && f2 == 0) || (f1 == 0 && f2 == 0))
          return 1; /* mismatch; possible infinite loop or false positive */
 
+     if (!u1 || !u2)
+         natbuf[1] = 0; /* Need to terminate the buffer. */
+
      while ((e1 == 0 || p1 < e1) &&
            (f1 == 0 || p1 < f1) &&
            (e2 == 0 || p2 < e2) &&