From a74fb2cdc8f2121774cc6d2b5e9ddd01a96db467 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Fri, 25 Nov 2011 16:22:01 -0800 Subject: [PATCH] =?utf8?q?Don=E2=80=99t=20coerce=20$x=20immediately=20in?= =?utf8?q?=20foo(substr=20$x...)?= MIME-Version: 1.0 Content-Type: text/plain; charset=utf8 Content-Transfer-Encoding: 8bit This program: #!perl -l sub myprint { print @_ } print substr *foo, 1; myprint substr *foo, 1; produces: main::foo Can't coerce GLOB to string in substr at - line 4. Ouch! I would expect \substr simply to give me a scalar that peeks into the original string, but without modifying the original until the return value of \substr is actually assigned to. But it turns out that it coerces the original into a string immedi- ately, unless it’s GMAGICAL. I find the exception for magical varia- ble rather befuddling. I can only imagine it was for efficency (since the stringified form will be overwritten when magic_setsubstr calls SvGETMAGIC), but that doesn’t make sense as the original variable can itself be modified between the return of the special lvalue and the assignment to that lvalue. Since magic_setsubstr itself coerces the variable into a string upon assignment to the lvalue, we can just remove the coercion code from pp_substr. But that causes double uninitialized warnings in cases like substr($undef, 0,0) = "lrep". That happens because pp_substr is still stringifying the variable (but without modifying it). It has to do that, as it looks at the length of the original string and accordingly adjusts the offsets stored in the lvalue if they are negative or if they extend beyond the end of the string. So this commit takes the simple route of avoiding the warning in pp_substr by only stringifying a variable that is SvOK if called in lvalue context. Hence, assignment to substr($tied...) will continue to call FETCH twice, but that is not a new bug. The ideal solution would be for the offsets to be translated in mg.c, rather than in pp_substr. But that would be a more involved change (including most of this commit, which is therefore not wasted) with potential backward-compatibility issue with negative numbers. A side effect it that the ‘Attempt to use reference as lvalue in substr’ warning now occurs during the assignment to the substr lvalue, rather that substr itself. This means it occurs even for tied varia- bles, so things are now more consistent. The example at the beginning could still croak if the glob were replaced with a null string, so this commit only partially allevi- ates the pain. --- mg.c | 11 ++++++++--- pp.c | 23 +++++++---------------- t/lib/warnings/9uninit | 4 ++-- t/op/substr.t | 11 ++++++++++- 4 files changed, 27 insertions(+), 22 deletions(-) diff --git a/mg.c b/mg.c index 5c2628b..fa4b446 100644 --- a/mg.c +++ b/mg.c @@ -2192,10 +2192,15 @@ Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) PERL_ARGS_ASSERT_MAGIC_SETSUBSTR; PERL_UNUSED_ARG(mg); + SvGETMAGIC(lsv); + if (SvROK(lsv)) + Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), + "Attempt to use reference as lvalue in substr" + ); if (DO_UTF8(sv)) { sv_utf8_upgrade(lsv); lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); - sv_insert(lsv, lvoff, lvlen, tmps, len); + sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0); LvTARGLEN(sv) = sv_len_utf8(sv); SvUTF8_on(lsv); } @@ -2204,11 +2209,11 @@ Perl_magic_setsubstr(pTHX_ SV *sv, MAGIC *mg) lvoff = sv_pos_u2b_flags(lsv, lvoff, &lvlen, SV_CONST_RETURN); LvTARGLEN(sv) = len; utf8 = (char*)bytes_to_utf8((U8*)tmps, &len); - sv_insert(lsv, lvoff, lvlen, utf8, len); + sv_insert_flags(lsv, lvoff, lvlen, utf8, len, 0); Safefree(utf8); } else { - sv_insert(lsv, lvoff, lvlen, tmps, len); + sv_insert_flags(lsv, lvoff, lvlen, tmps, len, 0); LvTARGLEN(sv) = len; } diff --git a/pp.c b/pp.c index 16794ad..329ed17 100644 --- a/pp.c +++ b/pp.c @@ -3005,7 +3005,13 @@ PP(pp_substr) else if (DO_UTF8(sv)) repl_need_utf8_upgrade = TRUE; } - tmps = SvPV_const(sv, curlen); + if (lvalue && !repl) { + tmps = NULL; /* unused */ + SvGETMAGIC(sv); + if (SvOK(sv)) (void)SvPV_nomg_const(sv, curlen); + else curlen = 0; + } + else tmps = SvPV_const(sv, curlen); if (DO_UTF8(sv)) { utf8_curlen = sv_len_utf8(sv); if (utf8_curlen == curlen) @@ -3071,21 +3077,6 @@ PP(pp_substr) if (lvalue && !repl) { SV * ret; - - if (!SvGMAGICAL(sv)) { - if (SvROK(sv)) { - SvPV_force_nolen(sv); - Perl_ck_warner(aTHX_ packWARN(WARN_SUBSTR), - "Attempt to use reference as lvalue in substr"); - } - if (isGV_with_GP(sv)) - SvPV_force_nolen(sv); - else if (SvOK(sv)) /* is it defined ? */ - (void)SvPOK_only_UTF8(sv); - else - sv_setpvs(sv, ""); /* avoid lexical reincarnation */ - } - ret = sv_2mortal(newSV_type(SVt_PVLV)); /* Not TARG RT#67838 */ sv_magic(ret, NULL, PERL_MAGIC_substr, NULL, 0); LvTYPE(ret) = 'x'; diff --git a/t/lib/warnings/9uninit b/t/lib/warnings/9uninit index 12c1f84..b76c2ef 100644 --- a/t/lib/warnings/9uninit +++ b/t/lib/warnings/9uninit @@ -1024,12 +1024,12 @@ Use of uninitialized value $m2 in substr at - line 7. Use of uninitialized value $g1 in substr at - line 7. Use of uninitialized value $m1 in substr at - line 7. Use of uninitialized value $g1 in substr at - line 8. -Use of uninitialized value $m1 in substr at - line 8. Use of uninitialized value in scalar assignment at - line 8. +Use of uninitialized value $m1 in scalar assignment at - line 8. Use of uninitialized value $m2 in substr at - line 9. Use of uninitialized value $g1 in substr at - line 9. -Use of uninitialized value $m1 in substr at - line 9. Use of uninitialized value in scalar assignment at - line 9. +Use of uninitialized value $m1 in scalar assignment at - line 9. Use of uninitialized value $m2 in vec at - line 11. Use of uninitialized value $g1 in vec at - line 11. Use of uninitialized value $m1 in vec at - line 11. diff --git a/t/op/substr.t b/t/op/substr.t index e9ea126..8005a28 100644 --- a/t/op/substr.t +++ b/t/op/substr.t @@ -24,7 +24,7 @@ $SIG{__WARN__} = sub { BEGIN { require './test.pl'; } -plan(358); +plan(360); run_tests() unless caller; @@ -762,3 +762,12 @@ ok eval { substr $t, 0, 9, *ザ::ワルド; is($t, "*ザ::ワルド!", "substr works on a UTF-8 glob + stash"); } + +{ + my $x = *foo; + my $y = \substr *foo, 0, 0; + is ref \$x, 'GLOB', '\substr does not coerce its glob arg just yet'; + $x = \"foo"; + $y = \substr *foo, 0, 0; + is ref \$x, 'REF', '\substr does not coerce its ref arg just yet'; +} -- 2.7.4