From: Father Chrysostomos Date: Sun, 29 May 2011 21:21:06 +0000 (-0700) Subject: [perl #91834] utf8::decode does not respect copy-on-write X-Git-Tag: accepted/trunk/20130322.191538~4007 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=40f11004fb3b5fa1cd207a20090df837d721b736;p=platform%2Fupstream%2Fperl.git [perl #91834] utf8::decode does not respect copy-on-write utf8::decode was not respecting copy-on-write, but simply modify- ing the PV (the scalar’s string buffer) in place, causing problems with hashes: my $name = "\x{c3}\x{b3}"; my ($k1) = keys %{ { $name=>undef } }; my $k2 = $name; utf8::decode($k1); utf8::decode($k2); print "k1 eq k2 = '", $k1 eq $k2, "'\n"; my $h = { $k1 => 1, $k2 => 2 }; print "{k1} '", $h->{$k1}, "'\n"; print "{k2} '", $h->{$k2}, "'\n"; This example (from the RT ticket) shows that there were two hash ele- ments with the same key. As of this commit, the hash only has one element. --- diff --git a/lib/utf8.t b/lib/utf8.t index ae81ccd..b13bb53 100644 --- a/lib/utf8.t +++ b/lib/utf8.t @@ -427,6 +427,18 @@ SKIP: { } { + # Make sure utf8::decode respects copy-on-write [perl #91834]. + # Hash keys are the easiest way to test this. + my $name = "\x{c3}\x{b3}"; + my ($k1) = keys %{ { $name=>undef } }; + my $k2 = $name; + utf8::decode($k1); + utf8::decode($k2); + my $h = { $k1 => 1, $k2 => 2 }; + is join('', keys $h), $k2, 'utf8::decode respects copy-on-write'; +} + +{ my $a = "456\xb6"; utf8::upgrade($a); diff --git a/universal.c b/universal.c index 145d860..d012c0f 100644 --- a/universal.c +++ b/universal.c @@ -695,6 +695,7 @@ XS(XS_utf8_decode) croak_xs_usage(cv, "sv"); else { SV * const sv = ST(0); + if (SvIsCOW(sv)) sv_force_normal(sv); const bool RETVAL = sv_utf8_decode(sv); ST(0) = boolSV(RETVAL); sv_2mortal(ST(0));