From bb933b9ba3a34a454c83adeaf265bd1d4eb466a1 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Thu, 11 Oct 2012 14:38:31 -0700 Subject: [PATCH] =?utf8?q?[perl=20#49190]=20Don=E2=80=99t=20prematurely=20?= =?utf8?q?optimise=20s/foo/bar$baz/?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit $baz could be aliased to a package variable, so we do need to recon- catenate for every iteration of s///g. For s/// without /g, only one more op will be executed, so the speed difference is negligible. The only cases we can optimise in terms of skipping the evaluation of the ops on the rhs (by eliminating the substconst op) are s//constant/ and s//$single_variable/. Anything more complicated causes bugs. A recent commit made s/foo/$bar/g re-stringify $bar for each iteration (though without having to reevaluate the ops that return $bar). So we no longer have to special-case match vars at compile time. This means that s/foo/bar$baz/g will be slower (and less buggy), but s/foo/$1/g will be faster. This also caused an existing taint but in pp_subst to surface. If get-magic turns off taint on a replacement string, it should not be considered tainted. So the taint check on the replacement should come *after* the stringification. This applies to the constant replacement optimisation. pp_substcont was already doing this correctly. --- ext/B/t/optree_misc.t | 22 +++++++++---------- ext/B/t/walkoptree.t | 2 +- op.c | 61 +++++++++++++++------------------------------------ pp_hot.c | 10 ++++----- t/re/subst.t | 9 +++++++- 5 files changed, 42 insertions(+), 62 deletions(-) diff --git a/ext/B/t/optree_misc.t b/ext/B/t/optree_misc.t index 7a52cc3..5b623f5 100644 --- a/ext/B/t/optree_misc.t +++ b/ext/B/t/optree_misc.t @@ -66,25 +66,23 @@ checkOptree ( name => 'PMOP children', code => sub { $foo =~ s/(a)/$1/ }, strip_open_hints => 1, expect => <<'EOT_EOT', expect_nt => <<'EONT_EONT'); -# 6 <1> leavesub[1 ref] K/REFC,1 ->(end) -# - <@> lineseq KP ->6 +# 5 <1> leavesub[1 ref] K/REFC,1 ->(end) +# - <@> lineseq KP ->5 # 1 <;> nextstate(main 1 -e:1) v:>,<,%,{ ->2 -# 3 subst(/"(a)"/ replstart->4) KS ->6 +# 4 subst(/"(a)"/) KS ->5 # - <1> ex-rv2sv sKRM/1 ->3 # 2 <#> gvsv[*foo] s ->3 -# 5 <|> substcont(other->3) sK/1 ->(end) -# - <1> ex-rv2sv sK/1 ->5 -# 4 <#> gvsv[*1] s ->5 +# - <1> ex-rv2sv sK/1 ->4 +# 3 <#> gvsv[*1] s ->4 EOT_EOT -# 6 <1> leavesub[1 ref] K/REFC,1 ->(end) -# - <@> lineseq KP ->6 +# 5 <1> leavesub[1 ref] K/REFC,1 ->(end) +# - <@> lineseq KP ->5 # 1 <;> nextstate(main 1 -e:1) v:>,<,%,{ ->2 -# 3 subst(/"(a)"/ replstart->4) KS ->6 +# 4 subst(/"(a)"/) KS ->5 # - <1> ex-rv2sv sKRM/1 ->3 # 2 <$> gvsv(*foo) s ->3 -# 5 <|> substcont(other->3) sK/1 ->(end) -# - <1> ex-rv2sv sK/1 ->5 -# 4 <$> gvsv(*1) s ->5 +# - <1> ex-rv2sv sK/1 ->4 +# 3 <$> gvsv(*1) s ->4 EONT_EONT } #skip diff --git a/ext/B/t/walkoptree.t b/ext/B/t/walkoptree.t index fbdc50f..3648835 100644 --- a/ext/B/t/walkoptree.t +++ b/ext/B/t/walkoptree.t @@ -33,7 +33,7 @@ sub B::OP::walkoptree_debug { my $victim = sub { # This gives us a substcont, which gets to the second recursive call # point (in the if statement in the XS code) - $_[0] =~ s/(a)/$1/; + $_[0] =~ s/(a)/ $1/; # PMOP_pmreplroot(cPMOPo) is NULL for this $_[0] =~ s/(b)//; # This gives an OP_PUSHRE diff --git a/op.c b/op.c index 0240349..7d6b3fc 100644 --- a/op.c +++ b/op.c @@ -4752,54 +4752,29 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor) } if (repl) { - OP *curop; + bool konst; if (pm->op_pmflags & PMf_EVAL) { - curop = NULL; + konst = FALSE; if (CopLINE(PL_curcop) < (line_t)PL_parser->multi_end) CopLINE_set(PL_curcop, (line_t)PL_parser->multi_end); } else if (repl->op_type == OP_CONST) - curop = repl; - else { - OP *lastop = NULL; - for (curop = LINKLIST(repl); curop!=repl; curop = LINKLIST(curop)) { - if (curop->op_type == OP_SCOPE - || curop->op_type == OP_LEAVE - || (PL_opargs[curop->op_type] & OA_DANGEROUS)) { - if (curop->op_type == OP_GV) { - GV * const gv = cGVOPx_gv(curop); - repl_has_vars = 1; - if (strchr("&`'123456789+-\016\022", *GvENAME(gv))) - break; - } - else if (curop->op_type == OP_RV2CV) - break; - else if (curop->op_type == OP_RV2SV || - curop->op_type == OP_RV2AV || - curop->op_type == OP_RV2HV || - curop->op_type == OP_RV2GV) { - if (lastop && lastop->op_type != OP_GV) /*funny deref?*/ - break; - } - else if (curop->op_type == OP_PADSV || - curop->op_type == OP_PADAV || - curop->op_type == OP_PADHV || - curop->op_type == OP_PADANY) - { - repl_has_vars = 1; - } - else if (curop->op_type == OP_PUSHRE) - NOOP; /* Okay here, dangerous in newASSIGNOP */ - else - break; - } - else if ((PL_opargs[curop->op_type] & OA_CLASS_MASK) - == OA_LOGOP) - break; - lastop = curop; - } - } - if (curop == repl + konst = TRUE; + else if (( (repl->op_type == OP_RV2SV || + repl->op_type == OP_RV2AV || + repl->op_type == OP_RV2HV || + repl->op_type == OP_RV2GV) + && cUNOPx(repl)->op_first + && cUNOPx(repl)->op_first->op_type == OP_GV ) + || repl->op_type == OP_PADSV + || repl->op_type == OP_PADAV + || repl->op_type == OP_PADHV + || repl->op_type == OP_PADANY) { + repl_has_vars = 1; + konst = TRUE; + } + else konst = FALSE; + if (konst && !(repl_has_vars && (!PM_GETRE(pm) || RX_EXTFLAGS(PM_GETRE(pm)) & RXf_EVAL_SEEN))) diff --git a/pp_hot.c b/pp_hot.c index ec0b3b8..7994992 100644 --- a/pp_hot.c +++ b/pp_hot.c @@ -2172,9 +2172,6 @@ PP(pp_subst) /* known replacement string? */ if (dstr) { - if (SvTAINTED(dstr)) - rxtainted |= SUBST_TAINT_REPL; - /* replacement needing upgrading? */ if (DO_UTF8(TARG) && !doutf8) { nsv = sv_newmortal(); @@ -2190,6 +2187,9 @@ PP(pp_subst) c = SvPV_const(dstr, clen); doutf8 = DO_UTF8(dstr); } + + if (SvTAINTED(dstr)) + rxtainted |= SUBST_TAINT_REPL; } else { c = NULL; @@ -2347,8 +2347,6 @@ PP(pp_subst) first = FALSE; } else { - if (SvTAINTED(dstr)) - rxtainted |= SUBST_TAINT_REPL; if (PL_encoding) { if (!nsv) nsv = sv_newmortal(); sv_copypv(nsv, repl); @@ -2356,6 +2354,8 @@ PP(pp_subst) sv_catsv(dstr, nsv); } else sv_catsv(dstr, repl); + if (SvTAINTED(repl)) + rxtainted |= SUBST_TAINT_REPL; } if (once) break; diff --git a/t/re/subst.t b/t/re/subst.t index 6474b46..8aa2266 100644 --- a/t/re/subst.t +++ b/t/re/subst.t @@ -7,7 +7,7 @@ BEGIN { require './test.pl'; } -plan( tests => 203 ); +plan( tests => 204 ); $_ = 'david'; $a = s/david/rules/r; @@ -857,6 +857,13 @@ $_ = "hello"; s/(.)\1/$a/g; } is $_, 'helo', 's/pat/$alias_to_match_var/'; +"g" =~ /(.)/; +$_ = "hello"; +{ + local *a = *1; + s/e(.)\1/a$a/g; +} +is $_, 'halo', 's/pat/$alias_to_match_var/'; $_ = "\xc4\x80"; -- 2.7.4