From: David Mitchell Date: Tue, 24 May 2011 16:08:51 +0000 (+0100) Subject: RT #91032: formline: bugs with non-string formats X-Git-Tag: accepted/trunk/20130322.191538~4028 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3808a68376b13a13745f22f7454ecf5e673ad24f;p=platform%2Fupstream%2Fperl.git RT #91032: formline: bugs with non-string formats When the format SV used by formline isn't a simple POK (such as ties, overloads, or stringified refs), many many things go wrong, and SEGVs ensue. Originally, pp_formline forced the SV to a PV, and then assumed it could rely on the resulting SvPVX value. Recent commits fixed this to skip the force (good), but then broke things such as: * in the absence of POK or pPOK, $^A was grown by 0 bytes rather than the length of the format, so the buffer overran; * the compiled format stored indexes into the original format string to refer to chunks of content text and the like. If there's no real SvPVX around, that's bad. * Stuff like tie and overload could return different format strings on each get, but the format would not be re-compiled (but would index into the new string anyway) Also, the format compiler would convert strings like '~~' into blanks in the original format SV. The easiest way to fix all these is to save a copy of the original string at the time it is compiled. This can conveniently be stored in the mg_obj slot of the fm magic (the compiled format already goes in mg_ptr). This way we're always guaranteed to have an unadulterated copy of the string to mess with. Also, the ~~ self-modification now happens to the copy rather than the original. Now each time formline is called, we also compare the current value of the SV with the stored copy, and if it's changed (e.g. tie with a FETCH that returns different values each time), then we recompile. Note that the recompile test is currently defeated by the ~~ modification, so re-compiles unnecessarily (but safely) in that case. A fix for that is coming next. --- diff --git a/pp_ctl.c b/pp_ctl.c index 9ce16c1..e136955 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -523,6 +523,7 @@ PP(pp_formline) { dVAR; dSP; dMARK; dORIGMARK; register SV * const tmpForm = *++MARK; + SV *formsv; register U32 *fpc; register char *t; const char *f; @@ -538,35 +539,30 @@ PP(pp_formline) NV value; bool gotsome = FALSE; STRLEN len; - const STRLEN fudge = SvPOKp(tmpForm) - ? (SvCUR(tmpForm) * (IN_BYTES ? 1 : 3) + 1) : 0; + STRLEN fudge; bool item_is_utf8 = FALSE; bool targ_is_utf8 = FALSE; SV * nsv = NULL; const char *fmt; MAGIC *mg = NULL; - if (SvTYPE(tmpForm) >= SVt_PVMG) { - /* This might, of course, still return NULL. */ - mg = mg_find(tmpForm, PERL_MAGIC_fm); - } else { - sv_upgrade(tmpForm, SVt_PVMG); - } + mg = doparseform(tmpForm); - if(!mg) { - mg = doparseform(tmpForm); - assert(mg); - } fpc = (U32*)mg->mg_ptr; + /* the actual string the format was compiled from. + * with overload etc, this may not match tmpForm */ + formsv = mg->mg_obj; + SvPV_force(PL_formtarget, len); - if (SvTAINTED(tmpForm)) + if (SvTAINTED(tmpForm) || SvTAINTED(formsv)) SvTAINTED_on(PL_formtarget); if (DO_UTF8(PL_formtarget)) targ_is_utf8 = TRUE; + fudge = (SvCUR(formsv) * (IN_BYTES ? 1 : 3) + 1); t = SvGROW(PL_formtarget, len + fudge + 1); /* XXX SvCUR bad */ t += len; - f = SvPV_const(tmpForm, len); + f = SvPV_const(formsv, len); for (;;) { DEBUG_f( { @@ -607,7 +603,7 @@ PP(pp_formline) case FF_LITERAL: arg = *fpc++; - if (targ_is_utf8 && !SvUTF8(tmpForm)) { + if (targ_is_utf8 && !SvUTF8(formsv)) { SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget)); *t = '\0'; sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv); @@ -615,7 +611,7 @@ PP(pp_formline) f += arg; break; } - if (!targ_is_utf8 && DO_UTF8(tmpForm)) { + if (!targ_is_utf8 && DO_UTF8(formsv)) { SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget)); *t = '\0'; sv_utf8_upgrade_flags_grow(PL_formtarget, SV_GMAGIC, fudge + 1); @@ -4921,7 +4917,7 @@ S_doparseform(pTHX_ SV *sv) { STRLEN len; register char *s = SvPV(sv, len); - register char * const send = s + len; + register char *send; register char *base = NULL; register I32 skipspaces = 0; bool noblank = FALSE; @@ -4934,13 +4930,43 @@ S_doparseform(pTHX_ SV *sv) bool ischop; bool unchopnum = FALSE; int maxops = 12; /* FF_LINEMARK + FF_END + 10 (\0 without preceding \n) */ - MAGIC *mg; + MAGIC *mg = NULL; + SV *sv_copy; PERL_ARGS_ASSERT_DOPARSEFORM; if (len == 0) Perl_croak(aTHX_ "Null picture in formline"); + if (SvTYPE(sv) >= SVt_PVMG) { + /* This might, of course, still return NULL. */ + mg = mg_find(sv, PERL_MAGIC_fm); + } else { + sv_upgrade(sv, SVt_PVMG); + } + + if (mg) { + /* still the same as previously-compiled string? */ + SV *old = mg->mg_obj; + if ( !(!!SvUTF8(old) ^ !!SvUTF8(sv)) + && len == SvCUR(old) + && strnEQ(SvPVX(old), SvPVX(sv), len) + ) + return mg; + + Safefree(mg->mg_ptr); + mg->mg_ptr = NULL; + SvREFCNT_dec(old); + mg->mg_obj = NULL; + } + else + mg = sv_magicext(sv, NULL, PERL_MAGIC_fm, &PL_vtbl_fm, NULL, 0); + + sv_copy = newSVpvn_utf8(s, len, SvUTF8(sv)); + s = SvPV(sv_copy, len); /* work on the copy, not the original */ + send = s + len; + + /* estimate the buffer size needed */ for (base = s; s <= send; s++) { if (*s == '\n' || *s == '@' || *s == '^') @@ -5121,16 +5147,10 @@ S_doparseform(pTHX_ SV *sv) assert (fpc <= fops + maxops); /* ensure our buffer estimate was valid */ arg = fpc - fops; - /* If we pass the length in to sv_magicext() it will copy the buffer for us. - We don't need that, so by setting the length on return we "donate" the - buffer to the magic, avoiding an allocation. We could realloc() the - buffer to the exact size used, but that feels like it's not worth it - (particularly if the rumours are true and some realloc() implementations - don't shrink blocks). However, set the true length used in mg_len so that - mg_dup only allocates and copies what's actually needed. */ - mg = sv_magicext(sv, NULL, PERL_MAGIC_fm, &PL_vtbl_fm, - (const char *const) fops, 0); + mg->mg_ptr = (char *) fops; mg->mg_len = arg * sizeof(U32); + mg->mg_obj = sv_copy; + mg->mg_flags |= MGf_REFCOUNTED; if (unchopnum && repeat) Perl_die(aTHX_ "Repeated format line will never terminate (~~ and @#)"); diff --git a/t/op/write.t b/t/op/write.t index 36cb2ad..d436730 100644 --- a/t/op/write.t +++ b/t/op/write.t @@ -61,7 +61,7 @@ for my $tref ( @NumTests ){ my $bas_tests = 20; # number of tests in section 3 -my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 2 + 1 + 1; +my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 6 + 2 + 1 + 1; # number of tests in section 4 my $hmb_tests = 35; @@ -610,6 +610,53 @@ close STDOUT_DUP; *CmT = *{$::{Comment}}{FORMAT}; ok defined *{$::{CmT}}{FORMAT}, "glob assign"; + +# RT #91032: Check that "non-real" strings like tie and overload work, +# especially that they re-compile the pattern on each FETCH, and that +# they don't overrun the buffer + + +{ + package RT91032; + + sub TIESCALAR { bless [] } + my $i = 0; + sub FETCH { $i++; "A$i @> Z\n" } + + use overload '""' => \&FETCH; + + tie my $f, 'RT91032'; + + formline $f, "a"; + formline $f, "bc"; + ::is $^A, "A1 a Z\nA2 bc Z\n", "RT 91032: tied"; + $^A = ''; + + my $g = bless []; # has overloaded stringify + formline $g, "de"; + formline $g, "f"; + ::is $^A, "A3 de Z\nA4 f Z\n", "RT 91032: overloaded"; + $^A = ''; + + my $h = []; + formline $h, "junk1"; + formline $h, "junk2"; + ::is ref($h), 'ARRAY', "RT 91032: array ref still a ref"; + ::like "$h", qr/^ARRAY\(0x[0-9a-f]+\)$/, "RT 91032: array stringifies ok"; + ::is $^A, "$h$h","RT 91032: stringified array"; + $^A = ''; + + # used to overwrite the ~~ in the *original SV with spaces. Naughty! + + my $orig = my $format = "^<<<<< ~~\n"; + my $abc = "abc"; + formline $format, $abc; + $^A =''; + ::is $format, $orig, "RT91032: don't overwrite orig format string"; + +} + + SKIP: { skip_if_miniperl('miniperl does not support scalario'); my $buf = "";