From 4f4d7508b0c2c114e5f52420e0e87a853c5f642a Mon Sep 17 00:00:00 2001
From: David Caldwell
Date: Mon, 23 Nov 2009 17:24:25 -0800
Subject: [PATCH] Add s///r (non-destructive substitution).
This changes s/// so that it doesn't act destructively on its target.
Instead it returns the result of the substitution (or the original string if
there was no match).
In addition this patch:
* Adds a new warning when s///r happens in void context.
* Adds a error when you try to use s///r with !~
* Makes it so constant strings can be bound to s///r with =~
* Adds documentation.
* Adds some tests.
* Updates various debug code so it knows about the /r flag.
* Adds some new 'r' words to B::Deparse.
---
dist/B-Deparse/Deparse.pm | 7 ++--
dump.c | 2 ++
ext/B/t/concise-xs.t | 3 +-
op.c | 14 +++++++-
op.h | 2 ++
pod/perlop.pod | 32 +++++++++++++++----
pod/perlrequick.pod | 15 +++++++++
pod/perlreref.pod | 3 +-
pod/perlretut.pod | 25 +++++++++++++++
pp_ctl.c | 5 ++-
pp_hot.c | 30 +++++++++++++++---
regexp.h | 4 ++-
t/re/subst.t | 81 ++++++++++++++++++++++++++++++++++++++++++++++-
toke.c | 9 +++---
14 files changed, 209 insertions(+), 23 deletions(-)
diff --git a/dist/B-Deparse/Deparse.pm b/dist/B-Deparse/Deparse.pm
index fc0125d..16b5642 100644
--- a/dist/B-Deparse/Deparse.pm
+++ b/dist/B-Deparse/Deparse.pm
@@ -19,7 +19,7 @@ use B qw(class main_root main_start main_cv svref_2object opnumber perlstring
OPpREVERSE_INPLACE
SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE SVs_RMG SVs_SMG
CVf_METHOD CVf_LVALUE
- PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE
+ PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_NONDESTRUCT
PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED),
($] < 5.009 ? 'PMf_SKIPWHITE' : 'RXf_SKIPWHITE'),
($] < 5.011 ? 'CVf_LOCKED' : ());
@@ -4310,7 +4310,9 @@ my %substwords;
map($substwords{join "", sort split //, $_} = $_, 'ego', 'egoism', 'em',
'es', 'ex', 'exes', 'gee', 'go', 'goes', 'ie', 'ism', 'iso', 'me',
'meese', 'meso', 'mig', 'mix', 'os', 'ox', 'oxime', 'see', 'seem',
- 'seg', 'sex', 'sig', 'six', 'smog', 'sog', 'some', 'xi');
+ 'seg', 'sex', 'sig', 'six', 'smog', 'sog', 'some', 'xi',
+ 'sir', 'rise', 'smore', 'more', 'seer', 'rome', 'gore', 'grim', 'grime',
+ 'or', 'rose', 'rosie');
sub pp_subst {
my $self = shift;
@@ -4351,6 +4353,7 @@ sub pp_subst {
($re) = $self->regcomp($kid, 1, $extended);
}
$flags .= "e" if $op->pmflags & PMf_EVAL;
+ $flags .= "r" if $op->pmflags & PMf_NONDESTRUCT;
$flags .= "g" if $op->pmflags & PMf_GLOBAL;
$flags .= "i" if $op->pmflags & PMf_FOLD;
$flags .= "m" if $op->pmflags & PMf_MULTILINE;
diff --git a/dump.c b/dump.c
index 6bfe5f4..631f37c 100644
--- a/dump.c
+++ b/dump.c
@@ -645,6 +645,8 @@ S_pm_description(pTHX_ const PMOP *pm)
sv_catpv(desc, ",RETAINT");
if (pmflags & PMf_EVAL)
sv_catpv(desc, ",EVAL");
+ if (pmflags & PMf_NONDESTRUCT)
+ sv_catpv(desc, ",NONDESTRUCT");
return desc;
}
diff --git a/ext/B/t/concise-xs.t b/ext/B/t/concise-xs.t
index 20ecb55..d4c25b4 100644
--- a/ext/B/t/concise-xs.t
+++ b/ext/B/t/concise-xs.t
@@ -164,7 +164,8 @@ my $testpkgs = {
OPpSORT_REVERSE OPpREVERSE_INPLACE OPpTARGET_MY
OPpTRANS_COMPLEMENT OPpTRANS_DELETE OPpTRANS_SQUASH
PMf_CONTINUE PMf_EVAL PMf_EXTENDED PMf_FOLD PMf_GLOBAL
- PMf_KEEP PMf_MULTILINE PMf_ONCE PMf_SINGLELINE
+ PMf_KEEP PMf_NONDESTRUCT
+ PMf_MULTILINE PMf_ONCE PMf_SINGLELINE
POSTFIX SVf_FAKE SVf_IOK SVf_NOK SVf_POK SVf_ROK
SVpad_OUR SVs_RMG SVs_SMG SWAP_CHILDREN OPpPAD_STATE
/, $] > 5.009 ? ('RXf_SKIPWHITE') : ('PMf_SKIPWHITE'),
diff --git a/op.c b/op.c
index 47f8300..40ef4bc 100644
--- a/op.c
+++ b/op.c
@@ -1110,6 +1110,11 @@ Perl_scalarvoid(pTHX_ OP *o)
useless = "negative pattern binding (!~)";
break;
+ case OP_SUBST:
+ if (cPMOPo->op_pmflags & PMf_NONDESTRUCT)
+ useless = "Non-destructive substitution (s///r)";
+ break;
+
case OP_RV2GV:
case OP_RV2SV:
case OP_RV2AV:
@@ -2225,6 +2230,11 @@ Perl_bind_match(pTHX_ I32 type, OP *left, OP *right)
no_bareword_allowed(right);
}
+ /* !~ doesn't make sense with s///r, so error on it for now */
+ if (rtype == OP_SUBST && (cPMOPx(right)->op_pmflags & PMf_NONDESTRUCT) &&
+ type == OP_NOT)
+ yyerror("Using !~ with s///r doesn't make sense");
+
ismatchop = rtype == OP_MATCH ||
rtype == OP_SUBST ||
rtype == OP_TRANS;
@@ -2238,7 +2248,9 @@ Perl_bind_match(pTHX_ I32 type, OP *left, OP *right)
right->op_flags |= OPf_STACKED;
if (rtype != OP_MATCH &&
! (rtype == OP_TRANS &&
- right->op_private & OPpTRANS_IDENTICAL))
+ right->op_private & OPpTRANS_IDENTICAL) &&
+ ! (rtype == OP_SUBST &&
+ (cPMOPx(right)->op_pmflags & PMf_NONDESTRUCT)))
newleft = mod(left, rtype);
else
newleft = left;
diff --git a/op.h b/op.h
index 2109891..b9327bb 100644
--- a/op.h
+++ b/op.h
@@ -376,6 +376,8 @@ struct pmop {
#define PMf_GLOBAL 0x00002000 /* pattern had a g modifier */
#define PMf_CONTINUE 0x00004000 /* don't reset pos() if //g fails */
#define PMf_EVAL 0x00008000 /* evaluating replacement as expr */
+#define PMf_NONDESTRUCT 0x00010000 /* Return substituted string instead
+ of modifying it. */
/* The following flags have exact equivalents in regcomp.h with the prefix RXf_
* which are stored in the regexp->extflags member. If you change them here,
diff --git a/pod/perlop.pod b/pod/perlop.pod
index 58c0660..0acf7b9 100644
--- a/pod/perlop.pod
+++ b/pod/perlop.pod
@@ -235,9 +235,11 @@ of operation work on some other string. The right argument is a search
pattern, substitution, or transliteration. The left argument is what is
supposed to be searched, substituted, or transliterated instead of the default
$_. When used in scalar context, the return value generally indicates the
-success of the operation. Behavior in list context depends on the particular
-operator. See L"Regexp Quote-Like Operators"> for details and
-L for examples using these operators.
+success of the operation. Not always though: the non-destructive substitution
+option (C) causes the return value to be the result of the substition, for
+example. Behavior in list context depends on the particular operator. See
+L"Regexp Quote-Like Operators"> for details and L for examples
+using these operators.
If the right argument is an expression rather than a search pattern,
substitution, or transliteration, it is interpreted as a search pattern at run
@@ -251,6 +253,8 @@ pattern C<\>, which it will consider a syntax error.
Binary "!~" is just like "=~" except the return value is negated in
the logical sense.
+Binary "!~" is not permitted to bind to a non-destructive substitute (s///r).
+
=head2 Multiplicative Operators
X
@@ -1428,14 +1432,20 @@ This usage is vaguely deprecated, which means it just might possibly
be removed in some distant future version of Perl, perhaps somewhere
around the year 2168.
-=item s/PATTERN/REPLACEMENT/msixpogce
+=item s/PATTERN/REPLACEMENT/msixpogcer
X X X X
-X X X X X X
X X X X
+X X X X X X X X X X X
Searches a string for a pattern, and if found, replaces that pattern
with the replacement text and returns the number of substitutions
made. Otherwise it returns false (specifically, the empty string).
+If the C (non-destructive) option is used then it will perform the
+substitution on a copy of the string and return the copy whether or not a
+substitution occurred. The original string will always remain unchanged in
+this case. The copy will always be a plain string, even If the input is an
+object or a tied variable.
+
If no string is specified via the C<=~> or C operator, the C<$_>
variable is searched and modified. (The string specified with C<=~> must
be scalar variable, an array element, a hash element, or an assignment
@@ -1456,7 +1466,8 @@ Options are as with m// with the addition of the following replacement
specific options:
e Evaluate the right side as an expression.
- ee Evaluate the right side as a string then eval the result
+ ee Evaluate the right side as a string then eval the result.
+ r Return substitution and leave the original string untouched.
Any non-whitespace delimiter may replace the slashes. Add space after
the C when using a character allowed in identifiers. If single quotes
@@ -1480,6 +1491,11 @@ Examples:
s/Login: $foo/Login: $bar/; # run-time pattern
($foo = $bar) =~ s/this/that/; # copy first, then change
+ ($foo = "$bar") =~ s/this/that/; # convert to string, copy, then change
+ $foo = $bar =~ s/this/that/r; # Same as above using /r
+ $foo = $bar =~ s/this/that/r
+ =~ s/that/the other/r; # Chained substitutes using /r
+ @foo = map { s/this/that/r } @bar # /r is very useful in maps
$count = ($paragraph =~ s/Mister\b/Mr./g); # get change-count
@@ -1492,6 +1508,10 @@ Examples:
s/%(.)/$percent{$1} || $&/ge; # expr now, so /e
s/^=(\w+)/pod($1)/ge; # use function call
+ $_ = 'abc123xyz';
+ $a = s/abc/def/r; # $a is 'def123xyz' and
+ # $_ remains 'abc123xyz'.
+
# expand variables in $_, but dynamics only, using
# symbolic dereferencing
s/\$(\w+)/${$1}/g;
diff --git a/pod/perlrequick.pod b/pod/perlrequick.pod
index 4b5e19a..ded1e6c 100644
--- a/pod/perlrequick.pod
+++ b/pod/perlrequick.pod
@@ -440,6 +440,21 @@ of the regex in the string:
$x = "I batted 4 for 4";
$x =~ s/4/four/g; # $x contains "I batted four for four"
+The non-destructive modifier C causes the result of the substitution
+to be returned instead of modifying C<$_> (or whatever variable the
+substitute was bound to with C<=~>):
+
+ $x = "I like dogs.";
+ $y = $x =~ s/dogs/cats/r;
+ print "$x $y\n"; # prints "I like dogs. I like cats."
+
+ $x = "Cats are great.";
+ print $x =~ s/Cats/Dogs/r =~ s/Dogs/Frogs/r =~ s/Frogs/Hedgehogs/r, "\n";
+ # prints "Hedgehogs are great."
+
+ @foo = map { s/[a-z]/X/r } qw(a b c 1 2 3);
+ # @foo is now qw(X X X 1 2 3)
+
The evaluation modifier C wraps an C around the
replacement string and the evaluated result is substituted for the
matched substring. Some examples:
diff --git a/pod/perlreref.pod b/pod/perlreref.pod
index 817b740..5ddacc5 100644
--- a/pod/perlreref.pod
+++ b/pod/perlreref.pod
@@ -45,9 +45,10 @@ within the regex.
C substitutes matches of
'pattern' with 'replacement'. Modifiers as for C,
-with one addition:
+with two additions:
e Evaluate 'replacement' as an expression
+ r Return substitution and leave the original string untouched.
'e' may be specified multiple times. 'replacement' is interpreted
as a double quoted string unless a single-quote (C<'>) is the delimiter.
diff --git a/pod/perlretut.pod b/pod/perlretut.pod
index 0ff7438..a9a3372 100644
--- a/pod/perlretut.pod
+++ b/pod/perlretut.pod
@@ -1714,6 +1714,31 @@ occurrences of the regexp on each line and the C modifier to
compile the regexp only once. As with C, both the
C and the C use C<$_> implicitly.
+If you don't want C to change your original variable you can use
+the non-destructive substitute modifier, C. This changes the
+behavior so that C returns the final substituted string:
+
+ $x = "I like dogs.";
+ $y = $x =~ s/dogs/cats/r;
+ print "$x $y\n";
+
+That example will print "I like dogs. I like cats". Notice the original
+C<$x> variable has not been affected by the substitute. The overall
+result of the substitution is instead stored in C<$y>. If the
+substitution doesn't affect anything then the original string is
+returned:
+
+ $x = "I like dogs.";
+ $y = $x =~ s/elephants/cougars/r;
+ print "$x $y\n"; # prints "I like dogs. I like dogs."
+
+One other interesting thing that the C flag allows is chaining
+substitutions:
+
+ $x = "Cats are great.";
+ print $x =~ s/Cats/Dogs/r =~ s/Dogs/Frogs/r =~ s/Frogs/Hedgehogs/r, "\n";
+ # prints "Hedgehogs are great."
+
A modifier available specifically to search and replace is the
C evaluation modifier. C wraps an C around
the replacement string and the evaluated result is substituted for the
diff --git a/pp_ctl.c b/pp_ctl.c
index 2408a7b..7b94587 100644
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -325,7 +325,10 @@ PP(pp_substcont)
SvPV_set(dstr, NULL);
TAINT_IF(cx->sb_rxtainted & 1);
- mPUSHi(saviters - 1);
+ if (pm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(targ);
+ else
+ mPUSHi(saviters - 1);
(void)SvPOK_only_UTF8(targ);
TAINT_IF(cx->sb_rxtainted);
diff --git a/pp_hot.c b/pp_hot.c
index edc4854..ea24062 100644
--- a/pp_hot.c
+++ b/pp_hot.c
@@ -2110,6 +2110,11 @@ PP(pp_subst)
EXTEND(SP,1);
}
+ /* In non-destructive replacement mode, duplicate target scalar so it
+ * remains unchanged. */
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ TARG = newSVsv(TARG);
+
#ifdef PERL_OLD_COPY_ON_WRITE
/* Awooga. Awooga. "bool" types that are actually char are dangerous,
because they make integers such as 256 "false". */
@@ -2233,7 +2238,10 @@ PP(pp_subst)
if (!matched)
{
SPAGAIN;
- PUSHs(&PL_sv_no);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ PUSHs(&PL_sv_no);
LEAVE_SCOPE(oldsave);
RETURN;
}
@@ -2287,7 +2295,10 @@ PP(pp_subst)
}
TAINT_IF(rxtainted & 1);
SPAGAIN;
- PUSHs(&PL_sv_yes);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ PUSHs(&PL_sv_yes);
}
else {
do {
@@ -2316,7 +2327,10 @@ PP(pp_subst)
}
TAINT_IF(rxtainted & 1);
SPAGAIN;
- mPUSHi((I32)iters);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ mPUSHi((I32)iters);
}
(void)SvPOK_only_UTF8(TARG);
TAINT_IF(rxtainted);
@@ -2402,7 +2416,10 @@ PP(pp_subst)
TAINT_IF(rxtainted & 1);
SPAGAIN;
- mPUSHi((I32)iters);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ mPUSHi((I32)iters);
(void)SvPOK_only(TARG);
if (doutf8)
@@ -2418,7 +2435,10 @@ PP(pp_subst)
nope:
ret_no:
SPAGAIN;
- PUSHs(&PL_sv_no);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ PUSHs(&PL_sv_no);
LEAVE_SCOPE(oldsave);
RETURN;
}
diff --git a/regexp.h b/regexp.h
index 502259f..758bdbe 100644
--- a/regexp.h
+++ b/regexp.h
@@ -263,11 +263,13 @@ and check for NULL.
#define SINGLE_PAT_MOD 's'
#define IGNORE_PAT_MOD 'i'
#define XTENDED_PAT_MOD 'x'
+#define NONDESTRUCT_PAT_MOD 'r'
#define ONCE_PAT_MODS "o"
#define KEEPCOPY_PAT_MODS "p"
#define EXEC_PAT_MODS "e"
#define LOOP_PAT_MODS "gc"
+#define NONDESTRUCT_PAT_MODS "r"
#define STD_PAT_MODS "msix"
@@ -276,7 +278,7 @@ and check for NULL.
#define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS
#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS
#define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS
-#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS
+#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS NONDESTRUCT_PAT_MODS
/*
* NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl
diff --git a/t/re/subst.t b/t/re/subst.t
index 82c4a6f..73c7ac0 100644
--- a/t/re/subst.t
+++ b/t/re/subst.t
@@ -7,7 +7,86 @@ BEGIN {
}
require './test.pl';
-plan( tests => 149 );
+plan( tests => 167 );
+
+# Stolen from re/ReTest.pl. Can't just use the file since it doesn't support
+# like() and it conflicts with test.pl
+sub must_warn {
+ my ($code, $pattern, $name) = @_;
+ my $w;
+ local $SIG {__WARN__} = sub {$w .= join "" => @_};
+ use warnings 'all';
+ ref $code ? &$code : eval $code;
+ my $r = $w && $w =~ /$pattern/;
+ $w //= "UNDEF";
+ ok( $r, $name // "Got warning /$pattern/", $r ? undef :
+ "# expected: /$pattern/\n" .
+ "# result: $w" );
+}
+
+$_ = 'david';
+$a = s/david/rules/r;
+ok( $_ eq 'david' && $a eq 'rules', 'non-destructive substitute' );
+
+$a = "david" =~ s/david/rules/r;
+ok( $a eq 'rules', 's///r with constant' );
+
+$a = "david" =~ s/david/"is"."great"/er;
+ok( $a eq 'isgreat', 's///er' );
+
+$a = "daviddavid" =~ s/david/cool/gr;
+ok( $a eq 'coolcool', 's///gr' );
+
+$a = 'david';
+$b = $a =~ s/david/sucks/r =~ s/sucks/rules/r;
+ok( $a eq 'david' && $b eq 'rules', 'chained s///r' );
+
+$a = 'david';
+$b = $a =~ s/xxx/sucks/r;
+ok( $a eq 'david' && $b eq 'david', 'non matching s///r' );
+
+$a = 'david';
+for (0..2) {
+ ok( 'david' =~ s/$a/rules/ro eq 'rules', 's///ro '.$_ );
+}
+
+$a = 'david';
+eval '$b = $a !~ s/david/is great/r';
+like( $@, qr{Using !~ with s///r doesn't make sense}, 's///r !~ operator gives error' );
+
+{
+ no warnings 'uninitialized';
+ $a = undef;
+ $b = $a =~ s/left/right/r;
+ ok ( !defined $a && !defined $b, 's///r with undef input' );
+
+ use warnings;
+ must_warn sub { $b = $a =~ s/left/right/r }, '^Use of uninitialized value', 's///r Uninitialized warning';
+
+ $a = 'david';
+ must_warn 's/david/sucks/r; 1', '^Useless use of Non-destructive substitution', 's///r void context warning';
+}
+
+$a = '';
+$b = $a =~ s/david/rules/r;
+ok( $a eq '' && $b eq '', 's///r on empty string' );
+
+$_ = 'david';
+@b = s/david/rules/r;
+ok( $_ eq 'david' && $b[0] eq 'rules', 's///r in list context' );
+
+# Magic value and s///r
+require Tie::Scalar;
+tie $m, 'Tie::StdScalar'; # makes $a magical
+$m = "david";
+$b = $m =~ s/david/rules/r;
+ok( $m eq 'david' && $b eq 'rules', 's///r with magic input' );
+
+$m = $b =~ s/rules/david/r;
+ok( defined tied($m), 's///r magic isn\'t lost' );
+
+$b = $m =~ s/xxx/yyy/r;
+ok( ! defined tied($b), 's///r magic isn\'t contagious' );
$x = 'foo';
$_ = "x";
diff --git a/toke.c b/toke.c
index 6cb33c1..f142ada 100644
--- a/toke.c
+++ b/toke.c
@@ -11826,10 +11826,11 @@ static U32
S_pmflag(U32 pmfl, const char ch) {
switch (ch) {
CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
- case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
- case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
- case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
- case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
+ case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
+ case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
+ case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
+ case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
+ case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
}
return pmfl;
}
--
2.7.4