From 809e8e66a971d59a948ca995e08b228927d82c66 Mon Sep 17 00:00:00 2001 From: Dave Mitchell Date: Fri, 8 Jul 2005 01:43:24 +0000 Subject: [PATCH] [perl #36207] UTF8/Latin 1/i regexp "Malformed character" warning $utf8 =~ /latin/i didn't match. Also added TODO for $latin =~ /utf8/i which also fails p4raw-id: //depot/perl@25095 --- t/op/pat.t | 14 ++++++++++++-- utf8.c | 4 ++-- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/t/op/pat.t b/t/op/pat.t index ce5d7a2..795177b 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -6,7 +6,7 @@ $| = 1; -print "1..1178\n"; +print "1..1180\n"; BEGIN { chdir 't' if -d 't'; @@ -3364,4 +3364,14 @@ ok(("foba ba$s" =~ qr/(foo|BaSS|bar)/i) my $psycho=join "|",@normal,map chr $_,255..20000; ok(('these'=~/($psycho)/) && $1 eq 'these','Pyscho'); } -# last test 1178 + +# [perl #36207] mixed utf8 / latin-1 and case folding + +{ + my $u = "\xe9\x{100}"; + chop $u; + ok($u =~ /\xe9/i, "utf8/latin"); + ok("\xe9" =~ /$u/i, "# TODO latin/utf8"); +} + +# last test 1180 diff --git a/utf8.c b/utf8.c index b26d5a6..cdf24ce 100644 --- a/utf8.c +++ b/utf8.c @@ -2037,7 +2037,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const if (u1) to_utf8_fold(p1, foldbuf1, &foldlen1); else { - natbuf[0] = *p1; + uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p1))); to_utf8_fold(natbuf, foldbuf1, &foldlen1); } q1 = foldbuf1; @@ -2047,7 +2047,7 @@ Perl_ibcmp_utf8(pTHX_ const char *s1, char **pe1, register UV l1, bool u1, const if (u2) to_utf8_fold(p2, foldbuf2, &foldlen2); else { - natbuf[0] = *p2; + uvuni_to_utf8(natbuf, (UV) NATIVE_TO_UNI(((UV)*p2))); to_utf8_fold(natbuf, foldbuf2, &foldlen2); } q2 = foldbuf2; -- 2.7.4