[perl #49190] Don’t prematurely optimise s/foo/bar$baz/
authorFather Chrysostomos <sprout@cpan.org>
Thu, 11 Oct 2012 21:38:31 +0000 (14:38 -0700)
committerFather Chrysostomos <sprout@cpan.org>
Fri, 12 Oct 2012 06:07:36 +0000 (23:07 -0700)
$baz could be aliased to a package variable, so we do need to recon-
catenate for every iteration of s///g.  For s/// without /g, only one
more op will be executed, so the speed difference is negligible.

The only cases we can optimise in terms of skipping the evaluation of
the ops on the rhs (by eliminating the substconst op) are s//constant/
and s//$single_variable/.  Anything more complicated causes bugs.

A recent commit made s/foo/$bar/g re-stringify $bar for each iteration
(though without having to reevaluate the ops that return $bar).  So we
no longer have to special-case match vars at compile time.

This means that s/foo/bar$baz/g will be slower (and less buggy), but
s/foo/$1/g will be faster.

This also caused an existing taint but in pp_subst to surface.  If
get-magic turns off taint on a replacement string, it should not be
considered tainted.  So the taint check on the replacement should come
*after* the stringification.  This applies to the constant replacement
optimisation.  pp_substcont was already doing this correctly.

ext/B/t/optree_misc.t
ext/B/t/walkoptree.t
op.c
pp_hot.c
t/re/subst.t

index 7a52cc3..5b623f5 100644 (file)
@@ -66,25 +66,23 @@ checkOptree ( name  => 'PMOP children',
              code      => sub { $foo =~ s/(a)/$1/ },
              strip_open_hints => 1,
              expect    => <<'EOT_EOT', expect_nt => <<'EONT_EONT');
-# 6  <1> leavesub[1 ref] K/REFC,1 ->(end)
-# -     <@> lineseq KP ->6
+# 5  <1> leavesub[1 ref] K/REFC,1 ->(end)
+# -     <@> lineseq KP ->5
 # 1        <;> nextstate(main 1 -e:1) v:>,<,%,{ ->2
-# 3        </> subst(/"(a)"/ replstart->4) KS ->6
+# 4        </> subst(/"(a)"/) KS ->5
 # -           <1> ex-rv2sv sKRM/1 ->3
 # 2              <#> gvsv[*foo] s ->3
-# 5           <|> substcont(other->3) sK/1 ->(end)
-# -              <1> ex-rv2sv sK/1 ->5
-# 4                 <#> gvsv[*1] s ->5
+# -           <1> ex-rv2sv sK/1 ->4
+# 3              <#> gvsv[*1] s ->4
 EOT_EOT
-# 6  <1> leavesub[1 ref] K/REFC,1 ->(end)
-# -     <@> lineseq KP ->6
+# 5  <1> leavesub[1 ref] K/REFC,1 ->(end)
+# -     <@> lineseq KP ->5
 # 1        <;> nextstate(main 1 -e:1) v:>,<,%,{ ->2
-# 3        </> subst(/"(a)"/ replstart->4) KS ->6
+# 4        </> subst(/"(a)"/) KS ->5
 # -           <1> ex-rv2sv sKRM/1 ->3
 # 2              <$> gvsv(*foo) s ->3
-# 5           <|> substcont(other->3) sK/1 ->(end)
-# -              <1> ex-rv2sv sK/1 ->5
-# 4                 <$> gvsv(*1) s ->5
+# -           <1> ex-rv2sv sK/1 ->4
+# 3              <$> gvsv(*1) s ->4
 EONT_EONT
 
 } #skip
index fbdc50f..3648835 100644 (file)
@@ -33,7 +33,7 @@ sub B::OP::walkoptree_debug {
 my $victim = sub {
     # This gives us a substcont, which gets to the second recursive call
     # point (in the if statement in the XS code)
-    $_[0] =~ s/(a)/$1/;
+    $_[0] =~ s/(a)/ $1/;
     # PMOP_pmreplroot(cPMOPo) is NULL for this
     $_[0] =~ s/(b)//;
     # This gives an OP_PUSHRE
diff --git a/op.c b/op.c
index 0240349..7d6b3fc 100644 (file)
--- a/op.c
+++ b/op.c
@@ -4752,54 +4752,29 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
     }
 
     if (repl) {
-       OP *curop;
+       bool konst;
        if (pm->op_pmflags & PMf_EVAL) {
-           curop = NULL;
+           konst = FALSE;
            if (CopLINE(PL_curcop) < (line_t)PL_parser->multi_end)
                CopLINE_set(PL_curcop, (line_t)PL_parser->multi_end);
        }
        else if (repl->op_type == OP_CONST)
-           curop = repl;
-       else {
-           OP *lastop = NULL;
-           for (curop = LINKLIST(repl); curop!=repl; curop = LINKLIST(curop)) {
-               if (curop->op_type == OP_SCOPE
-                       || curop->op_type == OP_LEAVE
-                       || (PL_opargs[curop->op_type] & OA_DANGEROUS)) {
-                   if (curop->op_type == OP_GV) {
-                       GV * const gv = cGVOPx_gv(curop);
-                       repl_has_vars = 1;
-                       if (strchr("&`'123456789+-\016\022", *GvENAME(gv)))
-                           break;
-                   }
-                   else if (curop->op_type == OP_RV2CV)
-                       break;
-                   else if (curop->op_type == OP_RV2SV ||
-                            curop->op_type == OP_RV2AV ||
-                            curop->op_type == OP_RV2HV ||
-                            curop->op_type == OP_RV2GV) {
-                       if (lastop && lastop->op_type != OP_GV) /*funny deref?*/
-                           break;
-                   }
-                   else if (curop->op_type == OP_PADSV ||
-                            curop->op_type == OP_PADAV ||
-                            curop->op_type == OP_PADHV ||
-                            curop->op_type == OP_PADANY)
-                   {
-                       repl_has_vars = 1;
-                   }
-                   else if (curop->op_type == OP_PUSHRE)
-                       NOOP; /* Okay here, dangerous in newASSIGNOP */
-                   else
-                       break;
-               }
-               else if ((PL_opargs[curop->op_type] & OA_CLASS_MASK)
-                          == OA_LOGOP)
-                   break;
-               lastop = curop;
-           }
-       }
-       if (curop == repl
+           konst = TRUE;
+       else if (( (repl->op_type == OP_RV2SV ||
+                   repl->op_type == OP_RV2AV ||
+                   repl->op_type == OP_RV2HV ||
+                   repl->op_type == OP_RV2GV)
+                && cUNOPx(repl)->op_first
+                && cUNOPx(repl)->op_first->op_type == OP_GV )
+             || repl->op_type == OP_PADSV
+             || repl->op_type == OP_PADAV
+             || repl->op_type == OP_PADHV
+             || repl->op_type == OP_PADANY) {
+           repl_has_vars = 1;
+           konst = TRUE;
+       }
+       else konst = FALSE;
+       if (konst
            && !(repl_has_vars
                 && (!PM_GETRE(pm)
                     || RX_EXTFLAGS(PM_GETRE(pm)) & RXf_EVAL_SEEN)))
index ec0b3b8..7994992 100644 (file)
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2172,9 +2172,6 @@ PP(pp_subst)
 
     /* known replacement string? */
     if (dstr) {
-       if (SvTAINTED(dstr))
-           rxtainted |= SUBST_TAINT_REPL;
-
        /* replacement needing upgrading? */
        if (DO_UTF8(TARG) && !doutf8) {
             nsv = sv_newmortal();
@@ -2190,6 +2187,9 @@ PP(pp_subst)
            c = SvPV_const(dstr, clen);
            doutf8 = DO_UTF8(dstr);
        }
+
+       if (SvTAINTED(dstr))
+           rxtainted |= SUBST_TAINT_REPL;
     }
     else {
        c = NULL;
@@ -2347,8 +2347,6 @@ PP(pp_subst)
              first = FALSE;
            }
            else {
-               if (SvTAINTED(dstr))
-                   rxtainted |= SUBST_TAINT_REPL;
                if (PL_encoding) {
                    if (!nsv) nsv = sv_newmortal();
                    sv_copypv(nsv, repl);
@@ -2356,6 +2354,8 @@ PP(pp_subst)
                    sv_catsv(dstr, nsv);
                }
                else sv_catsv(dstr, repl);
+               if (SvTAINTED(repl))
+                   rxtainted |= SUBST_TAINT_REPL;
            }
            if (once)
                break;
index 6474b46..8aa2266 100644 (file)
@@ -7,7 +7,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan( tests => 203 );
+plan( tests => 204 );
 
 $_ = 'david';
 $a = s/david/rules/r;
@@ -857,6 +857,13 @@ $_ = "hello";
     s/(.)\1/$a/g;
 }
 is $_, 'helo', 's/pat/$alias_to_match_var/';
+"g" =~ /(.)/;
+$_ = "hello";
+{
+    local *a = *1;
+    s/e(.)\1/a$a/g;
+}
+is $_, 'halo', 's/pat/$alias_to_match_var/';
 
 
 $_ = "\xc4\x80";