From: Father Chrysostomos <sprout@cpan.org>
Date: Sun, 29 May 2011 21:21:06 +0000 (-0700)
Subject: [perl #91834] utf8::decode does not respect copy-on-write
X-Git-Tag: accepted/trunk/20130322.191538~4007
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=40f11004fb3b5fa1cd207a20090df837d721b736;p=platform%2Fupstream%2Fperl.git

[perl #91834] utf8::decode does not respect copy-on-write

utf8::decode was not respecting copy-on-write, but simply modify-
ing the PV (the scalar’s string buffer) in place, causing problems
with hashes:

my $name = "\x{c3}\x{b3}";
my ($k1) = keys %{ { $name=>undef } };
my $k2 = $name;
utf8::decode($k1);
utf8::decode($k2);
print "k1 eq k2 = '", $k1 eq $k2, "'\n";
my $h = { $k1 => 1, $k2 => 2 };
print "{k1} '", $h->{$k1}, "'\n";
print "{k2} '", $h->{$k2}, "'\n";

This example (from the RT ticket) shows that there were two hash ele-
ments with the same key.

As of this commit, the hash only has one element.
---

diff --git a/lib/utf8.t b/lib/utf8.t
index ae81ccd..b13bb53 100644
--- a/lib/utf8.t
+++ b/lib/utf8.t
@@ -427,6 +427,18 @@ SKIP: {
 }
 
 {
+    # Make sure utf8::decode respects copy-on-write [perl #91834].
+    # Hash keys are the easiest way to test this.
+    my $name = "\x{c3}\x{b3}";
+    my ($k1) = keys %{ { $name=>undef } };
+    my $k2 = $name;
+    utf8::decode($k1);
+    utf8::decode($k2);
+    my $h = { $k1 => 1, $k2 => 2 };
+    is join('', keys $h), $k2, 'utf8::decode respects copy-on-write';
+}
+
+{
     my $a = "456\xb6";
     utf8::upgrade($a);
 
diff --git a/universal.c b/universal.c
index 145d860..d012c0f 100644
--- a/universal.c
+++ b/universal.c
@@ -695,6 +695,7 @@ XS(XS_utf8_decode)
 	croak_xs_usage(cv, "sv");
     else {
 	SV * const sv = ST(0);
+	if (SvIsCOW(sv)) sv_force_normal(sv);
 	const bool RETVAL = sv_utf8_decode(sv);
 	ST(0) = boolSV(RETVAL);
 	sv_2mortal(ST(0));