From b57b17349edad3eb77b8bbcdf1aee88b481e183f Mon Sep 17 00:00:00 2001 From: David Mitchell Date: Thu, 26 May 2011 08:57:07 +0100 Subject: [PATCH] stop ~ in format modifying format string Currently, the format parser converts ~ or ~~ in a format string into blank spaces. Since the previous-but-one commit, it only does it in a copy rather than the original string, but this still defeats the "if the string is the same don't recompile" mechanism. Fix this by leaving the ~ alone in the format string, but instead cause FF_LITERAL to convert '~' to ' ' when appending to the target. Also, in S_doparseform(), improve the processing of '~~': previously it only skipped one '~', and processed the second '~' on the next loop; this happened to work, but it's less unexpected to process both chars at once. I've also added some tests, but these don't actually test whether the format gets re-compiled: I couldn't think of a way to do that short of checking the output of perl -Df. Instead the tests I added were based around making sure I didn't break anything related to ~~ formatting. I also improved the description string for some of the existing tests. --- pp_ctl.c | 35 ++++++++++++++++++++++++++++------- t/op/write.t | 35 +++++++++++++++++++++++++++++++---- 2 files changed, 59 insertions(+), 11 deletions(-) diff --git a/pp_ctl.c b/pp_ctl.c index 8591328..79b7d32 100644 --- a/pp_ctl.c +++ b/pp_ctl.c @@ -604,9 +604,23 @@ PP(pp_formline) case FF_LITERAL: arg = *fpc++; if (targ_is_utf8 && !SvUTF8(formsv)) { + char *s; SvCUR_set(PL_formtarget, t - SvPVX_const(PL_formtarget)); *t = '\0'; - sv_catpvn_utf8_upgrade(PL_formtarget, f, arg, nsv); + + /* this is an unrolled sv_catpvn_utf8_upgrade(), + * but with the addition of s/~/ /g */ + if (!(nsv)) + nsv = newSVpvn_flags(f, arg, SVs_TEMP); + else + sv_setpvn(nsv, f, arg); + SvUTF8_off(nsv); + for (s = SvPVX(nsv); s <= SvEND(nsv); s++) + if (*s == '~') + *s = ' '; + sv_utf8_upgrade(nsv); + sv_catsv(PL_formtarget, nsv); + t = SvEND(PL_formtarget); f += arg; break; @@ -618,8 +632,10 @@ PP(pp_formline) t = SvEND(PL_formtarget); targ_is_utf8 = TRUE; } - while (arg--) - *t++ = *f++; + while (arg--) { + *t++ = (*f == '~') ? ' ' : *f; + f++; + } break; case FF_SKIP: @@ -4951,16 +4967,21 @@ S_doparseform(pTHX_ SV *sv) if ( !(!!SvUTF8(old) ^ !!SvUTF8(sv)) && len == SvCUR(old) && strnEQ(SvPVX(old), SvPVX(sv), len) - ) + ) { + DEBUG_f(PerlIO_printf(Perl_debug_log,"Re-using compiled format\n")); return mg; + } + DEBUG_f(PerlIO_printf(Perl_debug_log, "Re-compiling format\n")); Safefree(mg->mg_ptr); mg->mg_ptr = NULL; SvREFCNT_dec(old); mg->mg_obj = NULL; } - else + else { + DEBUG_f(PerlIO_printf(Perl_debug_log, "Compiling format\n")); mg = sv_magicext(sv, NULL, PERL_MAGIC_fm, &PL_vtbl_fm, NULL, 0); + } sv_copy = newSVpvn_utf8(s, len, SvUTF8(sv)); s = SvPV(sv_copy, len); /* work on the copy, not the original */ @@ -4994,10 +5015,10 @@ S_doparseform(pTHX_ SV *sv) case '~': if (*s == '~') { repeat = TRUE; - *s = ' '; + skipspaces++; + s++; } noblank = TRUE; - s[-1] = ' '; /* FALL THROUGH */ case ' ': case '\t': skipspaces++; diff --git a/t/op/write.t b/t/op/write.t index d436730..646143d 100644 --- a/t/op/write.t +++ b/t/op/write.t @@ -61,7 +61,7 @@ for my $tref ( @NumTests ){ my $bas_tests = 20; # number of tests in section 3 -my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 6 + 2 + 1 + 1; +my $bug_tests = 4 + 3 * 3 * 5 * 2 * 3 + 2 + 66 + 2 + 1 + 1; # number of tests in section 4 my $hmb_tests = 35; @@ -542,9 +542,13 @@ for my $tref ( @NumTests ){ "$base\nMoo!\n",) { foreach (['^*', qr/(.+)/], ['@*', qr/(.*?)$/s]) { my ($format, $re) = @$_; + $format = "1^*2 3${format}4"; foreach my $class ('', 'Count') { - my $name = "$first, $second $format $class"; + my $name = qq{swrite("$format", "$first", "$second") class="$class"}; $name =~ s/\n/\\n/g; + $name =~ s{(.)}{ + ord($1) > 126 ? sprintf("\\x{%x}",ord($1)) : $1 + }ge; $first =~ /(.+)/ or die $first; my $expect = "1${1}2"; @@ -555,12 +559,12 @@ for my $tref ( @NumTests ){ my $copy1 = $first; my $copy2; tie $copy2, $class, $second; - is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name; + is swrite("$format", $copy1, $copy2), $expect, $name; my $obj = tied $copy2; is $obj->[1], 1, 'value read exactly once'; } else { my ($copy1, $copy2) = ($first, $second); - is swrite("1^*2 3${format}4", $copy1, $copy2), $expect, $name; + is swrite("$format", $copy1, $copy2), $expect, $name; } } } @@ -654,6 +658,29 @@ ok defined *{$::{CmT}}{FORMAT}, "glob assign"; $^A =''; ::is $format, $orig, "RT91032: don't overwrite orig format string"; + # check that ~ and ~~ are displayed correctly as whitespace, + # under the influence of various different types of border + + for my $n (1,2) { + for my $lhs (' ', 'Y', '^<<<', '^|||', '^>>>') { + for my $rhs ('', ' ', 'Z', '^<<<', '^|||', '^>>>') { + my $fmt = "^