This changes s/// so that it doesn't act destructively on its target.
Instead it returns the result of the substitution (or the original string if
there was no match).
In addition this patch:
* Adds a new warning when s///r happens in void context.
* Adds a error when you try to use s///r with !~
* Makes it so constant strings can be bound to s///r with =~
* Adds documentation.
* Adds some tests.
* Updates various debug code so it knows about the /r flag.
* Adds some new 'r' words to B::Deparse.
OPpREVERSE_INPLACE
SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE SVs_RMG SVs_SMG
CVf_METHOD CVf_LVALUE
- PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE
+ PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_NONDESTRUCT
PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED),
($] < 5.009 ? 'PMf_SKIPWHITE' : 'RXf_SKIPWHITE'),
($] < 5.011 ? 'CVf_LOCKED' : ());
map($substwords{join "", sort split //, $_} = $_, 'ego', 'egoism', 'em',
'es', 'ex', 'exes', 'gee', 'go', 'goes', 'ie', 'ism', 'iso', 'me',
'meese', 'meso', 'mig', 'mix', 'os', 'ox', 'oxime', 'see', 'seem',
- 'seg', 'sex', 'sig', 'six', 'smog', 'sog', 'some', 'xi');
+ 'seg', 'sex', 'sig', 'six', 'smog', 'sog', 'some', 'xi',
+ 'sir', 'rise', 'smore', 'more', 'seer', 'rome', 'gore', 'grim', 'grime',
+ 'or', 'rose', 'rosie');
sub pp_subst {
my $self = shift;
($re) = $self->regcomp($kid, 1, $extended);
}
$flags .= "e" if $op->pmflags & PMf_EVAL;
+ $flags .= "r" if $op->pmflags & PMf_NONDESTRUCT;
$flags .= "g" if $op->pmflags & PMf_GLOBAL;
$flags .= "i" if $op->pmflags & PMf_FOLD;
$flags .= "m" if $op->pmflags & PMf_MULTILINE;
sv_catpv(desc, ",RETAINT");
if (pmflags & PMf_EVAL)
sv_catpv(desc, ",EVAL");
+ if (pmflags & PMf_NONDESTRUCT)
+ sv_catpv(desc, ",NONDESTRUCT");
return desc;
}
OPpSORT_REVERSE OPpREVERSE_INPLACE OPpTARGET_MY
OPpTRANS_COMPLEMENT OPpTRANS_DELETE OPpTRANS_SQUASH
PMf_CONTINUE PMf_EVAL PMf_EXTENDED PMf_FOLD PMf_GLOBAL
- PMf_KEEP PMf_MULTILINE PMf_ONCE PMf_SINGLELINE
+ PMf_KEEP PMf_NONDESTRUCT
+ PMf_MULTILINE PMf_ONCE PMf_SINGLELINE
POSTFIX SVf_FAKE SVf_IOK SVf_NOK SVf_POK SVf_ROK
SVpad_OUR SVs_RMG SVs_SMG SWAP_CHILDREN OPpPAD_STATE
/, $] > 5.009 ? ('RXf_SKIPWHITE') : ('PMf_SKIPWHITE'),
useless = "negative pattern binding (!~)";
break;
+ case OP_SUBST:
+ if (cPMOPo->op_pmflags & PMf_NONDESTRUCT)
+ useless = "Non-destructive substitution (s///r)";
+ break;
+
case OP_RV2GV:
case OP_RV2SV:
case OP_RV2AV:
no_bareword_allowed(right);
}
+ /* !~ doesn't make sense with s///r, so error on it for now */
+ if (rtype == OP_SUBST && (cPMOPx(right)->op_pmflags & PMf_NONDESTRUCT) &&
+ type == OP_NOT)
+ yyerror("Using !~ with s///r doesn't make sense");
+
ismatchop = rtype == OP_MATCH ||
rtype == OP_SUBST ||
rtype == OP_TRANS;
right->op_flags |= OPf_STACKED;
if (rtype != OP_MATCH &&
! (rtype == OP_TRANS &&
- right->op_private & OPpTRANS_IDENTICAL))
+ right->op_private & OPpTRANS_IDENTICAL) &&
+ ! (rtype == OP_SUBST &&
+ (cPMOPx(right)->op_pmflags & PMf_NONDESTRUCT)))
newleft = mod(left, rtype);
else
newleft = left;
#define PMf_GLOBAL 0x00002000 /* pattern had a g modifier */
#define PMf_CONTINUE 0x00004000 /* don't reset pos() if //g fails */
#define PMf_EVAL 0x00008000 /* evaluating replacement as expr */
+#define PMf_NONDESTRUCT 0x00010000 /* Return substituted string instead
+ of modifying it. */
/* The following flags have exact equivalents in regcomp.h with the prefix RXf_
* which are stored in the regexp->extflags member. If you change them here,
pattern, substitution, or transliteration. The left argument is what is
supposed to be searched, substituted, or transliterated instead of the default
$_. When used in scalar context, the return value generally indicates the
-success of the operation. Behavior in list context depends on the particular
-operator. See L</"Regexp Quote-Like Operators"> for details and
-L<perlretut> for examples using these operators.
+success of the operation. Not always though: the non-destructive substitution
+option (C</r>) causes the return value to be the result of the substition, for
+example. Behavior in list context depends on the particular operator. See
+L</"Regexp Quote-Like Operators"> for details and L<perlretut> for examples
+using these operators.
If the right argument is an expression rather than a search pattern,
substitution, or transliteration, it is interpreted as a search pattern at run
Binary "!~" is just like "=~" except the return value is negated in
the logical sense.
+Binary "!~" is not permitted to bind to a non-destructive substitute (s///r).
+
=head2 Multiplicative Operators
X<operator, multiplicative>
be removed in some distant future version of Perl, perhaps somewhere
around the year 2168.
-=item s/PATTERN/REPLACEMENT/msixpogce
+=item s/PATTERN/REPLACEMENT/msixpogcer
X<substitute> X<substitution> X<replace> X<regexp, replace>
-X<regexp, substitute> X</m> X</s> X</i> X</x> X</p> X</o> X</g> X</c> X</e>
+X<regexp, substitute> X</m> X</s> X</i> X</x> X</p> X</o> X</g> X</c> X</e> X</r>
Searches a string for a pattern, and if found, replaces that pattern
with the replacement text and returns the number of substitutions
made. Otherwise it returns false (specifically, the empty string).
+If the C</r> (non-destructive) option is used then it will perform the
+substitution on a copy of the string and return the copy whether or not a
+substitution occurred. The original string will always remain unchanged in
+this case. The copy will always be a plain string, even If the input is an
+object or a tied variable.
+
If no string is specified via the C<=~> or C<!~> operator, the C<$_>
variable is searched and modified. (The string specified with C<=~> must
be scalar variable, an array element, a hash element, or an assignment
specific options:
e Evaluate the right side as an expression.
- ee Evaluate the right side as a string then eval the result
+ ee Evaluate the right side as a string then eval the result.
+ r Return substitution and leave the original string untouched.
Any non-whitespace delimiter may replace the slashes. Add space after
the C<s> when using a character allowed in identifiers. If single quotes
s/Login: $foo/Login: $bar/; # run-time pattern
($foo = $bar) =~ s/this/that/; # copy first, then change
+ ($foo = "$bar") =~ s/this/that/; # convert to string, copy, then change
+ $foo = $bar =~ s/this/that/r; # Same as above using /r
+ $foo = $bar =~ s/this/that/r
+ =~ s/that/the other/r; # Chained substitutes using /r
+ @foo = map { s/this/that/r } @bar # /r is very useful in maps
$count = ($paragraph =~ s/Mister\b/Mr./g); # get change-count
s/%(.)/$percent{$1} || $&/ge; # expr now, so /e
s/^=(\w+)/pod($1)/ge; # use function call
+ $_ = 'abc123xyz';
+ $a = s/abc/def/r; # $a is 'def123xyz' and
+ # $_ remains 'abc123xyz'.
+
# expand variables in $_, but dynamics only, using
# symbolic dereferencing
s/\$(\w+)/${$1}/g;
$x = "I batted 4 for 4";
$x =~ s/4/four/g; # $x contains "I batted four for four"
+The non-destructive modifier C<s///r> causes the result of the substitution
+to be returned instead of modifying C<$_> (or whatever variable the
+substitute was bound to with C<=~>):
+
+ $x = "I like dogs.";
+ $y = $x =~ s/dogs/cats/r;
+ print "$x $y\n"; # prints "I like dogs. I like cats."
+
+ $x = "Cats are great.";
+ print $x =~ s/Cats/Dogs/r =~ s/Dogs/Frogs/r =~ s/Frogs/Hedgehogs/r, "\n";
+ # prints "Hedgehogs are great."
+
+ @foo = map { s/[a-z]/X/r } qw(a b c 1 2 3);
+ # @foo is now qw(X X X 1 2 3)
+
The evaluation modifier C<s///e> wraps an C<eval{...}> around the
replacement string and the evaluated result is substituted for the
matched substring. Some examples:
C<s/pattern/replacement/msixpogce> substitutes matches of
'pattern' with 'replacement'. Modifiers as for C<m//>,
-with one addition:
+with two additions:
e Evaluate 'replacement' as an expression
+ r Return substitution and leave the original string untouched.
'e' may be specified multiple times. 'replacement' is interpreted
as a double quoted string unless a single-quote (C<'>) is the delimiter.
compile the regexp only once. As with C<simple_grep>, both the
C<print> and the C<s/$regexp/$replacement/go> use C<$_> implicitly.
+If you don't want C<s///> to change your original variable you can use
+the non-destructive substitute modifier, C<s///r>. This changes the
+behavior so that C<s///r> returns the final substituted string:
+
+ $x = "I like dogs.";
+ $y = $x =~ s/dogs/cats/r;
+ print "$x $y\n";
+
+That example will print "I like dogs. I like cats". Notice the original
+C<$x> variable has not been affected by the substitute. The overall
+result of the substitution is instead stored in C<$y>. If the
+substitution doesn't affect anything then the original string is
+returned:
+
+ $x = "I like dogs.";
+ $y = $x =~ s/elephants/cougars/r;
+ print "$x $y\n"; # prints "I like dogs. I like dogs."
+
+One other interesting thing that the C<s///r> flag allows is chaining
+substitutions:
+
+ $x = "Cats are great.";
+ print $x =~ s/Cats/Dogs/r =~ s/Dogs/Frogs/r =~ s/Frogs/Hedgehogs/r, "\n";
+ # prints "Hedgehogs are great."
+
A modifier available specifically to search and replace is the
C<s///e> evaluation modifier. C<s///e> wraps an C<eval{...}> around
the replacement string and the evaluated result is substituted for the
SvPV_set(dstr, NULL);
TAINT_IF(cx->sb_rxtainted & 1);
- mPUSHi(saviters - 1);
+ if (pm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(targ);
+ else
+ mPUSHi(saviters - 1);
(void)SvPOK_only_UTF8(targ);
TAINT_IF(cx->sb_rxtainted);
EXTEND(SP,1);
}
+ /* In non-destructive replacement mode, duplicate target scalar so it
+ * remains unchanged. */
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ TARG = newSVsv(TARG);
+
#ifdef PERL_OLD_COPY_ON_WRITE
/* Awooga. Awooga. "bool" types that are actually char are dangerous,
because they make integers such as 256 "false". */
if (!matched)
{
SPAGAIN;
- PUSHs(&PL_sv_no);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ PUSHs(&PL_sv_no);
LEAVE_SCOPE(oldsave);
RETURN;
}
}
TAINT_IF(rxtainted & 1);
SPAGAIN;
- PUSHs(&PL_sv_yes);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ PUSHs(&PL_sv_yes);
}
else {
do {
}
TAINT_IF(rxtainted & 1);
SPAGAIN;
- mPUSHi((I32)iters);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ mPUSHi((I32)iters);
}
(void)SvPOK_only_UTF8(TARG);
TAINT_IF(rxtainted);
TAINT_IF(rxtainted & 1);
SPAGAIN;
- mPUSHi((I32)iters);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ mPUSHi((I32)iters);
(void)SvPOK_only(TARG);
if (doutf8)
nope:
ret_no:
SPAGAIN;
- PUSHs(&PL_sv_no);
+ if (rpm->op_pmflags & PMf_NONDESTRUCT)
+ PUSHs(TARG);
+ else
+ PUSHs(&PL_sv_no);
LEAVE_SCOPE(oldsave);
RETURN;
}
#define SINGLE_PAT_MOD 's'
#define IGNORE_PAT_MOD 'i'
#define XTENDED_PAT_MOD 'x'
+#define NONDESTRUCT_PAT_MOD 'r'
#define ONCE_PAT_MODS "o"
#define KEEPCOPY_PAT_MODS "p"
#define EXEC_PAT_MODS "e"
#define LOOP_PAT_MODS "gc"
+#define NONDESTRUCT_PAT_MODS "r"
#define STD_PAT_MODS "msix"
#define EXT_PAT_MODS ONCE_PAT_MODS KEEPCOPY_PAT_MODS
#define QR_PAT_MODS STD_PAT_MODS EXT_PAT_MODS
#define M_PAT_MODS QR_PAT_MODS LOOP_PAT_MODS
-#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS
+#define S_PAT_MODS M_PAT_MODS EXEC_PAT_MODS NONDESTRUCT_PAT_MODS
/*
* NOTE: if you modify any RXf flags you should run regen.pl or regcomp.pl
}
require './test.pl';
-plan( tests => 149 );
+plan( tests => 167 );
+
+# Stolen from re/ReTest.pl. Can't just use the file since it doesn't support
+# like() and it conflicts with test.pl
+sub must_warn {
+ my ($code, $pattern, $name) = @_;
+ my $w;
+ local $SIG {__WARN__} = sub {$w .= join "" => @_};
+ use warnings 'all';
+ ref $code ? &$code : eval $code;
+ my $r = $w && $w =~ /$pattern/;
+ $w //= "UNDEF";
+ ok( $r, $name // "Got warning /$pattern/", $r ? undef :
+ "# expected: /$pattern/\n" .
+ "# result: $w" );
+}
+
+$_ = 'david';
+$a = s/david/rules/r;
+ok( $_ eq 'david' && $a eq 'rules', 'non-destructive substitute' );
+
+$a = "david" =~ s/david/rules/r;
+ok( $a eq 'rules', 's///r with constant' );
+
+$a = "david" =~ s/david/"is"."great"/er;
+ok( $a eq 'isgreat', 's///er' );
+
+$a = "daviddavid" =~ s/david/cool/gr;
+ok( $a eq 'coolcool', 's///gr' );
+
+$a = 'david';
+$b = $a =~ s/david/sucks/r =~ s/sucks/rules/r;
+ok( $a eq 'david' && $b eq 'rules', 'chained s///r' );
+
+$a = 'david';
+$b = $a =~ s/xxx/sucks/r;
+ok( $a eq 'david' && $b eq 'david', 'non matching s///r' );
+
+$a = 'david';
+for (0..2) {
+ ok( 'david' =~ s/$a/rules/ro eq 'rules', 's///ro '.$_ );
+}
+
+$a = 'david';
+eval '$b = $a !~ s/david/is great/r';
+like( $@, qr{Using !~ with s///r doesn't make sense}, 's///r !~ operator gives error' );
+
+{
+ no warnings 'uninitialized';
+ $a = undef;
+ $b = $a =~ s/left/right/r;
+ ok ( !defined $a && !defined $b, 's///r with undef input' );
+
+ use warnings;
+ must_warn sub { $b = $a =~ s/left/right/r }, '^Use of uninitialized value', 's///r Uninitialized warning';
+
+ $a = 'david';
+ must_warn 's/david/sucks/r; 1', '^Useless use of Non-destructive substitution', 's///r void context warning';
+}
+
+$a = '';
+$b = $a =~ s/david/rules/r;
+ok( $a eq '' && $b eq '', 's///r on empty string' );
+
+$_ = 'david';
+@b = s/david/rules/r;
+ok( $_ eq 'david' && $b[0] eq 'rules', 's///r in list context' );
+
+# Magic value and s///r
+require Tie::Scalar;
+tie $m, 'Tie::StdScalar'; # makes $a magical
+$m = "david";
+$b = $m =~ s/david/rules/r;
+ok( $m eq 'david' && $b eq 'rules', 's///r with magic input' );
+
+$m = $b =~ s/rules/david/r;
+ok( defined tied($m), 's///r magic isn\'t lost' );
+
+$b = $m =~ s/xxx/yyy/r;
+ok( ! defined tied($b), 's///r magic isn\'t contagious' );
$x = 'foo';
$_ = "x";
S_pmflag(U32 pmfl, const char ch) {
switch (ch) {
CASE_STD_PMMOD_FLAGS_PARSE_SET(&pmfl);
- case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
- case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
- case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
- case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
+ case GLOBAL_PAT_MOD: pmfl |= PMf_GLOBAL; break;
+ case CONTINUE_PAT_MOD: pmfl |= PMf_CONTINUE; break;
+ case ONCE_PAT_MOD: pmfl |= PMf_KEEP; break;
+ case KEEPCOPY_PAT_MOD: pmfl |= PMf_KEEPCOPY; break;
+ case NONDESTRUCT_PAT_MOD: pmfl |= PMf_NONDESTRUCT; break;
}
return pmfl;
}