[perl #49190] Don’t prematurely optimise s/foo/bar$baz/
authorFather Chrysostomos <sprout@cpan.org>
Thu, 11 Oct 2012 21:38:31 +0000 (14:38 -0700)
committerFather Chrysostomos <sprout@cpan.org>
Fri, 12 Oct 2012 06:07:36 +0000 (23:07 -0700)
$baz could be aliased to a package variable, so we do need to recon-
catenate for every iteration of s///g.  For s/// without /g, only one
more op will be executed, so the speed difference is negligible.

The only cases we can optimise in terms of skipping the evaluation of
the ops on the rhs (by eliminating the substconst op) are s//constant/
and s//$single_variable/.  Anything more complicated causes bugs.

A recent commit made s/foo/$bar/g re-stringify $bar for each iteration
(though without having to reevaluate the ops that return $bar).  So we
no longer have to special-case match vars at compile time.

This means that s/foo/bar$baz/g will be slower (and less buggy), but
s/foo/$1/g will be faster.

This also caused an existing taint but in pp_subst to surface.  If
get-magic turns off taint on a replacement string, it should not be
considered tainted.  So the taint check on the replacement should come
*after* the stringification.  This applies to the constant replacement
optimisation.  pp_substcont was already doing this correctly.

ext/B/t/optree_misc.t
ext/B/t/walkoptree.t
op.c
pp_hot.c
t/re/subst.t

index 7a52cc384481a21e7f6e504b20d80b9e597eb047..5b623f597869e82f5eacde7f4ad306a538a15b3f 100644 (file)
@@ -66,25 +66,23 @@ checkOptree ( name  => 'PMOP children',
              code      => sub { $foo =~ s/(a)/$1/ },
              strip_open_hints => 1,
              expect    => <<'EOT_EOT', expect_nt => <<'EONT_EONT');
-# 6  <1> leavesub[1 ref] K/REFC,1 ->(end)
-# -     <@> lineseq KP ->6
+# 5  <1> leavesub[1 ref] K/REFC,1 ->(end)
+# -     <@> lineseq KP ->5
 # 1        <;> nextstate(main 1 -e:1) v:>,<,%,{ ->2
-# 3        </> subst(/"(a)"/ replstart->4) KS ->6
+# 4        </> subst(/"(a)"/) KS ->5
 # -           <1> ex-rv2sv sKRM/1 ->3
 # 2              <#> gvsv[*foo] s ->3
-# 5           <|> substcont(other->3) sK/1 ->(end)
-# -              <1> ex-rv2sv sK/1 ->5
-# 4                 <#> gvsv[*1] s ->5
+# -           <1> ex-rv2sv sK/1 ->4
+# 3              <#> gvsv[*1] s ->4
 EOT_EOT
-# 6  <1> leavesub[1 ref] K/REFC,1 ->(end)
-# -     <@> lineseq KP ->6
+# 5  <1> leavesub[1 ref] K/REFC,1 ->(end)
+# -     <@> lineseq KP ->5
 # 1        <;> nextstate(main 1 -e:1) v:>,<,%,{ ->2
-# 3        </> subst(/"(a)"/ replstart->4) KS ->6
+# 4        </> subst(/"(a)"/) KS ->5
 # -           <1> ex-rv2sv sKRM/1 ->3
 # 2              <$> gvsv(*foo) s ->3
-# 5           <|> substcont(other->3) sK/1 ->(end)
-# -              <1> ex-rv2sv sK/1 ->5
-# 4                 <$> gvsv(*1) s ->5
+# -           <1> ex-rv2sv sK/1 ->4
+# 3              <$> gvsv(*1) s ->4
 EONT_EONT
 
 } #skip
index fbdc50fcb8f33fd14988db26979db760ab8f387a..3648835b7f6fb85160a1c5dd51b23f5b1fc32111 100644 (file)
@@ -33,7 +33,7 @@ sub B::OP::walkoptree_debug {
 my $victim = sub {
     # This gives us a substcont, which gets to the second recursive call
     # point (in the if statement in the XS code)
-    $_[0] =~ s/(a)/$1/;
+    $_[0] =~ s/(a)/ $1/;
     # PMOP_pmreplroot(cPMOPo) is NULL for this
     $_[0] =~ s/(b)//;
     # This gives an OP_PUSHRE
diff --git a/op.c b/op.c
index 0240349517f71742fe27689b46ec3810a8bcc4ba..7d6b3fcc653549872cf36758dae56bda69001890 100644 (file)
--- a/op.c
+++ b/op.c
@@ -4752,54 +4752,29 @@ Perl_pmruntime(pTHX_ OP *o, OP *expr, bool isreg, I32 floor)
     }
 
     if (repl) {
-       OP *curop;
+       bool konst;
        if (pm->op_pmflags & PMf_EVAL) {
-           curop = NULL;
+           konst = FALSE;
            if (CopLINE(PL_curcop) < (line_t)PL_parser->multi_end)
                CopLINE_set(PL_curcop, (line_t)PL_parser->multi_end);
        }
        else if (repl->op_type == OP_CONST)
-           curop = repl;
-       else {
-           OP *lastop = NULL;
-           for (curop = LINKLIST(repl); curop!=repl; curop = LINKLIST(curop)) {
-               if (curop->op_type == OP_SCOPE
-                       || curop->op_type == OP_LEAVE
-                       || (PL_opargs[curop->op_type] & OA_DANGEROUS)) {
-                   if (curop->op_type == OP_GV) {
-                       GV * const gv = cGVOPx_gv(curop);
-                       repl_has_vars = 1;
-                       if (strchr("&`'123456789+-\016\022", *GvENAME(gv)))
-                           break;
-                   }
-                   else if (curop->op_type == OP_RV2CV)
-                       break;
-                   else if (curop->op_type == OP_RV2SV ||
-                            curop->op_type == OP_RV2AV ||
-                            curop->op_type == OP_RV2HV ||
-                            curop->op_type == OP_RV2GV) {
-                       if (lastop && lastop->op_type != OP_GV) /*funny deref?*/
-                           break;
-                   }
-                   else if (curop->op_type == OP_PADSV ||
-                            curop->op_type == OP_PADAV ||
-                            curop->op_type == OP_PADHV ||
-                            curop->op_type == OP_PADANY)
-                   {
-                       repl_has_vars = 1;
-                   }
-                   else if (curop->op_type == OP_PUSHRE)
-                       NOOP; /* Okay here, dangerous in newASSIGNOP */
-                   else
-                       break;
-               }
-               else if ((PL_opargs[curop->op_type] & OA_CLASS_MASK)
-                          == OA_LOGOP)
-                   break;
-               lastop = curop;
-           }
-       }
-       if (curop == repl
+           konst = TRUE;
+       else if (( (repl->op_type == OP_RV2SV ||
+                   repl->op_type == OP_RV2AV ||
+                   repl->op_type == OP_RV2HV ||
+                   repl->op_type == OP_RV2GV)
+                && cUNOPx(repl)->op_first
+                && cUNOPx(repl)->op_first->op_type == OP_GV )
+             || repl->op_type == OP_PADSV
+             || repl->op_type == OP_PADAV
+             || repl->op_type == OP_PADHV
+             || repl->op_type == OP_PADANY) {
+           repl_has_vars = 1;
+           konst = TRUE;
+       }
+       else konst = FALSE;
+       if (konst
            && !(repl_has_vars
                 && (!PM_GETRE(pm)
                     || RX_EXTFLAGS(PM_GETRE(pm)) & RXf_EVAL_SEEN)))
index ec0b3b82328752ccfa8fea47958fdaa165f0bd5f..799499242354d343ce812f1a1ea731563edbb744 100644 (file)
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2172,9 +2172,6 @@ PP(pp_subst)
 
     /* known replacement string? */
     if (dstr) {
-       if (SvTAINTED(dstr))
-           rxtainted |= SUBST_TAINT_REPL;
-
        /* replacement needing upgrading? */
        if (DO_UTF8(TARG) && !doutf8) {
             nsv = sv_newmortal();
@@ -2190,6 +2187,9 @@ PP(pp_subst)
            c = SvPV_const(dstr, clen);
            doutf8 = DO_UTF8(dstr);
        }
+
+       if (SvTAINTED(dstr))
+           rxtainted |= SUBST_TAINT_REPL;
     }
     else {
        c = NULL;
@@ -2347,8 +2347,6 @@ PP(pp_subst)
              first = FALSE;
            }
            else {
-               if (SvTAINTED(dstr))
-                   rxtainted |= SUBST_TAINT_REPL;
                if (PL_encoding) {
                    if (!nsv) nsv = sv_newmortal();
                    sv_copypv(nsv, repl);
@@ -2356,6 +2354,8 @@ PP(pp_subst)
                    sv_catsv(dstr, nsv);
                }
                else sv_catsv(dstr, repl);
+               if (SvTAINTED(repl))
+                   rxtainted |= SUBST_TAINT_REPL;
            }
            if (once)
                break;
index 6474b464fed3c264e20f1aa6e314cd4f028d324d..8aa2266494077811651d9f5517897792c37719ec 100644 (file)
@@ -7,7 +7,7 @@ BEGIN {
     require './test.pl';
 }
 
-plan( tests => 203 );
+plan( tests => 204 );
 
 $_ = 'david';
 $a = s/david/rules/r;
@@ -857,6 +857,13 @@ $_ = "hello";
     s/(.)\1/$a/g;
 }
 is $_, 'helo', 's/pat/$alias_to_match_var/';
+"g" =~ /(.)/;
+$_ = "hello";
+{
+    local *a = *1;
+    s/e(.)\1/a$a/g;
+}
+is $_, 'halo', 's/pat/$alias_to_match_var/';
 
 
 $_ = "\xc4\x80";