From 1749ea0d81e275f5160a584ab9e554a4acc871e8 Mon Sep 17 00:00:00 2001 From: SADAHIRO Tomoyuki Date: Tue, 25 Jul 2006 09:15:50 +0900 Subject: [PATCH] interpolation of @- (and @+) in patterns ([perl #27940] comes back) Message-Id: <20060725001517.3C5D.BQW10602@nifty.com> p4raw-id: //depot/perl@28620 --- pod/perlop.pod | 10 +++++----- t/op/pat.t | 30 ++++++++++++++++++++++++++++-- t/op/subst.t | 12 +++++++++++- t/op/tr.t | 12 +++++++++++- toke.c | 11 ++++++++--- 5 files changed, 63 insertions(+), 12 deletions(-) diff --git a/pod/perlop.pod b/pod/perlop.pod index 8e57c39..f69b8bb 100644 --- a/pod/perlop.pod +++ b/pod/perlop.pod @@ -1983,7 +1983,7 @@ C<(>, C<)>, C<->, C, C, C, C, C, C, C, C, C, digits (C<0> to C<9>), C<[>, C<{>, C<]>, C<}>, whitespaces (SPACE, TAB, LF, CR, FF, and VT in addition), and C<#>. As C<\c> is skipped at this step, C<@> of C<\c@> in RE is possibly -treated as an array symbol (for example one of C<@foo> or C<@->), +treated as an array symbol (for example C<@foo>), even though the same text in C gives interpolation of C<\c@>. Note that C<\N{name}> is interpolated at this step. @@ -1992,10 +1992,10 @@ a C<#>-comment in a C-regular expression, no processing is performed whatsoever. This is the first step at which the presence of the C modifier is relevant. -Interpolation has several quirks: C<$|>, C<$(>, and C<$)> are not -interpolated, and constructs C<$var[SOMETHING]> are voted (by several -different estimators) to be either an array element or C<$var> -followed by an RE alternative. This is where the notation +Interpolation in patterns has several quirks: C<$|>, C<$(>, C<$)>, C<@+> +and C<@-> are not interpolated, and constructs C<$var[SOMETHING]> are +voted (by several different estimators) to be either an array element +or C<$var> followed by an RE alternative. This is where the notation C<${arr[$bar]}> comes handy: C is interpreted as array element C<-9>, not as a regular expression from the variable C<$arr> followed by a digit, which would be the interpretation of diff --git a/t/op/pat.t b/t/op/pat.t index 0de38e1..f0f1b2b 100755 --- a/t/op/pat.t +++ b/t/op/pat.t @@ -7,7 +7,7 @@ $| = 1; # please update note at bottom of file when you change this -print "1..1212\n"; +print "1..1231\n"; BEGIN { chdir 't' if -d 't'; @@ -3546,9 +3546,35 @@ if ($ordA == 193) { ok(defined($res) && length($res)==$size,"\$1 is correct size"); } +{ # related to [perl #27940] + ok("\0-A" =~ /\c@-A/, '@- should not be interpolated in a pattern'); + ok("\0\0A" =~ /\c@+A/, '@+ should not be interpolated in a pattern'); + ok("X\@-A" =~ /X@-A/, '@- should not be interpolated in a pattern'); + ok("X\@\@A" =~ /X@+A/, '@+ should not be interpolated in a pattern'); + + ok("X\0A" =~ /X\c@?A/, '\c@?'); + ok("X\0A" =~ /X\c@*A/, '\c@*'); + ok("X\0A" =~ /X\c@(A)/, '\c@('); + ok("X\0A" =~ /X(\c@)A/, '\c@)'); + ok("X\0A" =~ /X\c@|ZA/, '\c@|'); + + ok("X\@A" =~ /X@?A/, '@?'); + ok("X\@A" =~ /X@*A/, '@*'); + ok("X\@A" =~ /X@(A)/, '@('); + ok("X\@A" =~ /X(@)A/, '@)'); + ok("X\@A" =~ /X@|ZA/, '@|'); + + local $" = ','; # non-whitespace and non-RE-specific + ok('abc' =~ /(.)(.)(.)/, 'the last successful match is bogus'); + ok("A@+B" =~ /A@{+}B/, 'interpolation of @+ in /@{+}/'); + ok("A@-B" =~ /A@{-}B/, 'interpolation of @- in /@{-}/'); + ok("A@+B" =~ /A@{+}B/x, 'interpolation of @+ in /@{+}/x'); + ok("A@-B" =~ /A@{-}B/x, 'interpolation of @- in /@{-}/x'); +} + # Keep the following test last -- it may crash perl ok(("a" x (2**15 - 10)) =~ /^()(a|bb)*$/, "Recursive stack cracker: #24274") or print "# Unexpected outcome: should pass or crash perl\n"; -# last test 1211 +# last test 1231 diff --git a/t/op/subst.t b/t/op/subst.t index bd481e4..0b02ff9 100755 --- a/t/op/subst.t +++ b/t/op/subst.t @@ -7,7 +7,7 @@ BEGIN { } require './test.pl'; -plan( tests => 131 ); +plan( tests => 133 ); $x = 'foo'; $_ = "x"; @@ -553,3 +553,13 @@ is($name, "cis", q[#22351 bug with 'e' substitution modifier]); } +{ # [perl #27940] perlbug: [\x00-\x1f] works, [\c@-\c_] does not + my $c; + + ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ s/[\c@-\c_]//g; + is($c, "\x20\x30\x40\x50\x60", "s/[\\c\@-\\c_]//g"); + + ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ s/[\x00-\x1f]//g; + is($c, "\x20\x30\x40\x50\x60", "s/[\\x00-\\x1f]//g"); +} + diff --git a/t/op/tr.t b/t/op/tr.t index 796f96a..c38b208 100755 --- a/t/op/tr.t +++ b/t/op/tr.t @@ -6,7 +6,7 @@ BEGIN { require './test.pl'; } -plan tests => 116; +plan tests => 118; my $Is_EBCDIC = (ord('i') == 0x89 & ord('J') == 0xd1); @@ -455,3 +455,13 @@ is($s, "AxBC", "utf8, DELETE"); } # non-characters end +{ # related to [perl #27940] + my $c; + + ($c = "\x20\c@\x30\cA\x40\cZ\x50\c_\x60") =~ tr/\c@-\c_//d; + is($c, "\x20\x30\x40\x50\x60", "tr/\\c\@-\\c_//d"); + + ($c = "\x20\x00\x30\x01\x40\x1A\x50\x1F\x60") =~ tr/\x00-\x1f//d; + is($c, "\x20\x30\x40\x50\x60", "tr/\\x00-\\x1f//d"); +} + diff --git a/toke.c b/toke.c index 7bed30b..b3688bb 100644 --- a/toke.c +++ b/toke.c @@ -1994,9 +1994,14 @@ S_scan_const(pTHX_ char *start) /* check for embedded arrays (@foo, @::foo, @'foo, @{foo}, @$foo, @+, @-) */ - else if (*s == '@' && s[1] - && (isALNUM_lazy_if(s+1,UTF) || strchr(":'{$+-", s[1]))) - break; + else if (*s == '@' && s[1]) { + if (isALNUM_lazy_if(s+1,UTF)) + break; + if (strchr(":'{$", s[1])) + break; + if (!PL_lex_inpat && (s[1] == '+' || s[1] == '-')) + break; /* in regexp, neither @+ nor @- are interpolated */ + } /* check for embedded scalars. only stop if we're sure it's a variable. -- 2.7.4