From fc9668ae737de7993bbd27aa5c3eaddf41c5c885 Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Fri, 2 May 2014 13:51:00 +0100 Subject: [PATCH] vec(): downgrade before accessing string buffer This code: #!perl -l $x = substr "\x{100}\xff\xfe", 1; print vec($x, 0, 8); print vec($x, 0, 8); In 5.18 and earlier prints 255 255 With blead it prints: 195 255 This is due to the fact that it does SvPV() first to get the string buffer, then calls sv_utf8_downgrade(). With COW, the PVX of the SV may no longer equal the value earlier returned by SvPV(), but vec() continues to use the old pointer. This bug has always been present, but COW made it more noticeable. The fix is to just redo the SvPV() after a downgrade. --- doop.c | 14 +++++++++----- t/op/vec.t | 18 +++++++++++++++++- 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/doop.c b/doop.c index 5031af8..96185bd 100644 --- a/doop.c +++ b/doop.c @@ -761,13 +761,14 @@ Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) { dVAR; STRLEN srclen, len, uoffset, bitoffs = 0; - const unsigned char *s = (const unsigned char *) SvPV_flags_const(sv, srclen, - SV_GMAGIC | ((PL_op->op_flags & OPf_MOD || LVRET) - ? SV_UNDEF_RETURNS_NULL : 0)); + const I32 svpv_flags = ((PL_op->op_flags & OPf_MOD || LVRET) + ? SV_UNDEF_RETURNS_NULL : 0); + unsigned char *s = (unsigned char *) + SvPV_flags(sv, srclen, (svpv_flags|SV_GMAGIC)); UV retnum = 0; if (!s) { - s = (const unsigned char *)""; + s = (unsigned char *)""; } PERL_ARGS_ASSERT_DO_VECGET; @@ -777,8 +778,11 @@ Perl_do_vecget(pTHX_ SV *sv, SSize_t offset, int size) if (size < 1 || (size & (size-1))) /* size < 1 or not a power of two */ Perl_croak(aTHX_ "Illegal number of bits in vec"); - if (SvUTF8(sv)) + if (SvUTF8(sv)) { (void) Perl_sv_utf8_downgrade(aTHX_ sv, TRUE); + /* PVX may have changed */ + s = (unsigned char *) SvPV_flags(sv, srclen, svpv_flags); + } if (size < 8) { bitoffs = ((offset%8)*size)%8; diff --git a/t/op/vec.t b/t/op/vec.t index b4afcf9..30badb0 100644 --- a/t/op/vec.t +++ b/t/op/vec.t @@ -6,7 +6,7 @@ BEGIN { } require "test.pl"; -plan( tests => 33 ); +plan( tests => 35 ); my $Is_EBCDIC = (ord('A') == 193) ? 1 : 0; @@ -112,3 +112,19 @@ use constant roref => \1; eval { for (roref) { vec($_,0,1) = 1 } }; like($@, qr/^Modification of a read-only value attempted at /, 'err msg when modifying read-only refs'); + + +{ + # downgradeable utf8 strings should be downgraded before accessing + # the byte string. + # See the p5p thread with Message-ID: + # + + + my $x = substr "\x{100}\xff\xfe", 1; # a utf8 string with all ords < 256 + my $v; + $v = vec($x, 0, 8); + is($v, 255, "downgraded utf8 try 1"); + $v = vec($x, 0, 8); + is($v, 255, "downgraded utf8 try 2"); +} -- 2.7.4