Reinstate UTF8f
authorFather Chrysostomos <sprout@cpan.org>
Wed, 26 Jun 2013 03:31:54 +0000 (20:31 -0700)
committerFather Chrysostomos <sprout@cpan.org>
Fri, 28 Jun 2013 07:07:36 +0000 (00:07 -0700)
This format string allows char*s to be interpolated with the
utf8ness and length specified as well, avoiding the need to create
extra SVs:

Perl_croak(aTHX_ "Couldn't twiggle the twoggle in \"%"UTF8f"\"",
                  UTF8fARG(is_utf8, len, s));

This is the second attempt.

I screwed up in commits 1c8b67b38f0a5 and b3e714770ee1 because
I didn’t really understand how varargs functions receive their
arguments.

They are like structs, in that different members can be different
sizes.  So therefore both ends--the caller and the called--*must* get
the casts right, or the data will be corrupted.

The main mistake I made was to use %u in the format for the first
argument and then retrieve it as UV (a simple typo, I meant unsigned
int or U32--I don’t remember).

To be on the safe side, I added a UTF8fARG macro (after SVfARG), which
(unlike SVfARG) takes three arguments and casts them explicitly, mak-
ing it much harder to get this wrong at call sites.

gv.c
op.c
perl.h
pp.c
pp_ctl.c
sv.c
t/porting/diag.t
toke.c

diff --git a/gv.c b/gv.c
index 9658362..8449047 100644 (file)
--- a/gv.c
+++ b/gv.c
@@ -1026,10 +1026,9 @@ Perl_gv_fetchmethod_pvn_flags(pTHX_ HV *stash, const char *name, const STRLEN le
                        return gv;
                }
                Perl_croak(aTHX_
-                          "Can't locate object method \"%"SVf
+                          "Can't locate object method \"%"UTF8f
                           "\" via package \"%"HEKf"\"",
-                                   SVfARG(newSVpvn_flags(name, nend - name,
-                                           SVs_TEMP | is_utf8)),
+                                   UTF8fARG(is_utf8, nend - name, name),
                                     HEKfARG(HvNAME_HEK(stash)));
            }
            else {
@@ -1043,10 +1042,10 @@ Perl_gv_fetchmethod_pvn_flags(pTHX_ HV *stash, const char *name, const STRLEN le
                }
 
                Perl_croak(aTHX_
-                          "Can't locate object method \"%"SVf"\" via package \"%"SVf"\""
+                          "Can't locate object method \"%"UTF8f
+                          "\" via package \"%"SVf"\""
                           " (perhaps you forgot to load \"%"SVf"\"?)",
-                          SVfARG(newSVpvn_flags(name, nend - name,
-                                SVs_TEMP | is_utf8)),
+                          UTF8fARG(is_utf8, nend - name, name),
                            SVfARG(packnamesv), SVfARG(packnamesv));
            }
        }
@@ -1138,9 +1137,10 @@ Perl_gv_autoload_pvn(pTHX_ HV *stash, const char *name, STRLEN len, U32 flags)
      && (GvCVGEN(gv) || GvSTASH(gv) != stash)
     )
        Perl_ck_warner_d(aTHX_ packWARN(WARN_DEPRECATED),
-                        "Use of inherited AUTOLOAD for non-method %"SVf"::%"SVf"() is deprecated",
+                        "Use of inherited AUTOLOAD for non-method %"SVf
+                        "::%"UTF8f"() is deprecated",
                         SVfARG(packname),
-                         SVfARG(newSVpvn_flags(name, len, SVs_TEMP | is_utf8)));
+                         UTF8fARG(is_utf8, len, name));
 
     if (CvISXSUB(cv)) {
         /* Instead of forcing the XSUB do another lookup for $AUTOLOAD
@@ -1410,7 +1410,7 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
     const char *name = nambeg;
     GV *gv = NULL;
     GV**gvp;
-    I32 len;
+    STRLEN len;
     const char *name_cursor;
     HV *stash = NULL;
     const I32 no_init = flags & (GV_NOADD_NOINIT | GV_NOINIT);
@@ -1569,18 +1569,18 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
                             (sv_type == SVt_PVAV && !GvIMPORTED_AV(*gvp)) ||
                             (sv_type == SVt_PVHV && !GvIMPORTED_HV(*gvp)) )
                    {
-                        SV* namesv = newSVpvn_flags(name, len, SVs_TEMP | is_utf8);
                        /* diag_listed_as: Variable "%s" is not imported%s */
                        Perl_ck_warner_d(
                            aTHX_ packWARN(WARN_MISC),
-                           "Variable \"%c%"SVf"\" is not imported",
+                           "Variable \"%c%"UTF8f"\" is not imported",
                            sv_type == SVt_PVAV ? '@' :
                            sv_type == SVt_PVHV ? '%' : '$',
-                           SVfARG(namesv));
+                           UTF8fARG(is_utf8, len, name));
                        if (GvCVu(*gvp))
                            Perl_ck_warner_d(
                                aTHX_ packWARN(WARN_MISC),
-                               "\t(Did you mean &%"SVf" instead?)\n", SVfARG(namesv)
+                               "\t(Did you mean &%"UTF8f" instead?)\n",
+                               UTF8fARG(is_utf8, len, name)
                            );
                        stash = NULL;
                    }
@@ -1597,15 +1597,14 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
 
     if (!stash) {
        if (add && !PL_in_clean_all) {
-           SV * const namesv = newSVpvn_flags(name, len, is_utf8);
            SV * const err = Perl_mess(aTHX_
-                "Global symbol \"%s%"SVf"\" requires explicit package name",
+                "Global symbol \"%s%"UTF8f
+                "\" requires explicit package name",
                 (sv_type == SVt_PV ? "$"
                  : sv_type == SVt_PVAV ? "@"
                  : sv_type == SVt_PVHV ? "%"
-                 : ""), SVfARG(namesv));
+                 : ""), UTF8fARG(is_utf8, len, name));
            GV *gv;
-           SvREFCNT_dec_NN(namesv);
            if (is_utf8)
                SvUTF8_on(err);
            qerror(err);
@@ -1700,8 +1699,9 @@ Perl_gv_fetchpvn_flags(pTHX_ const char *nambeg, STRLEN full_len, I32 flags,
     faking_it = SvOK(gv);
 
     if (add & GV_ADDWARN)
-       Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL), "Had to create %"SVf" unexpectedly",
-                SVfARG(newSVpvn_flags(nambeg, name_end-nambeg, SVs_TEMP | is_utf8 )));
+       Perl_ck_warner_d(aTHX_ packWARN(WARN_INTERNAL),
+               "Had to create %"UTF8f" unexpectedly",
+                UTF8fARG(is_utf8, name_end-nambeg, nambeg));
     gv_init_pvn(gv, stash, name, len, (add & GV_ADDMULTI)|is_utf8);
 
     if ( isIDFIRST_lazy_if(name, is_utf8)
@@ -2124,10 +2124,10 @@ Perl_newGVgen_flags(pTHX_ const char *pack, U32 flags)
 {
     dVAR;
     PERL_ARGS_ASSERT_NEWGVGEN_FLAGS;
+    assert(!(flags & ~SVf_UTF8));
 
-    return gv_fetchpv(Perl_form(aTHX_ "%"SVf"::_GEN_%ld",
-                                    SVfARG(newSVpvn_flags(pack, strlen(pack),
-                                            SVs_TEMP | flags)),
+    return gv_fetchpv(Perl_form(aTHX_ "%"UTF8f"::_GEN_%ld",
+                                UTF8fARG(flags, strlen(pack), pack),
                                 (long)PL_gensym++),
                       GV_ADD, SVt_PVGV);
 }
diff --git a/op.c b/op.c
index 2416951..7b11d1e 100644 (file)
--- a/op.c
+++ b/op.c
@@ -6854,14 +6854,14 @@ Perl_cv_ckproto_len_flags(pTHX_ const CV *cv, const GV *gv, const char *p,
     if (name)
        Perl_sv_catpvf(aTHX_ msg, " sub %"SVf, SVfARG(name));
     if (cvp)
-       Perl_sv_catpvf(aTHX_ msg, " (%"SVf")",
-           SVfARG(newSVpvn_flags(cvp,clen, SvUTF8(cv)|SVs_TEMP))
+       Perl_sv_catpvf(aTHX_ msg, " (%"UTF8f")", 
+           UTF8fARG(SvUTF8(cv),clen,cvp)
        );
     else
        sv_catpvs(msg, ": none");
     sv_catpvs(msg, " vs ");
     if (p)
-       Perl_sv_catpvf(aTHX_ msg, "(%"SVf")", SVfARG(newSVpvn_flags(p, len, flags | SVs_TEMP)));
+       Perl_sv_catpvf(aTHX_ msg, "(%"UTF8f")", UTF8fARG(flags & SVf_UTF8,len,p));
     else
        sv_catpvs(msg, "none");
     Perl_warner(aTHX_ packWARN(WARN_PROTOTYPE), "%"SVf, SVfARG(msg));
diff --git a/perl.h b/perl.h
index 613fd3c..0593461 100644 (file)
--- a/perl.h
+++ b/perl.h
@@ -3038,6 +3038,12 @@ typedef pthread_key_t    perl_key;
 
 #define HEKfARG(p) ((void*)(p))
 
+/* Takes three arguments: is_utf8, length, str */
+#ifndef UTF8f
+#  define UTF8f "d%"UVuf"%4p"
+#endif
+#define UTF8fARG(u,l,p) (int)cBOOL(u), (UV)(l), (void*)(p)
+
 #ifdef PERL_CORE
 /* not used; but needed for backward compatibility with XS code? - RMB */
 #  undef VDf
diff --git a/pp.c b/pp.c
index 5efd87f..f6c20d0 100644 (file)
--- a/pp.c
+++ b/pp.c
@@ -492,11 +492,8 @@ PP(pp_prototype)
        if (strnEQ(s, "CORE::", 6)) {
            const int code = keyword(s + 6, SvCUR(TOPs) - 6, 1);
            if (!code || code == -KEY_CORE)
-               DIE(aTHX_ "Can't find an opnumber for \"%"SVf"\"",
-                   SVfARG(newSVpvn_flags(
-                       s+6, SvCUR(TOPs)-6,
-                       (SvFLAGS(TOPs) & SVf_UTF8)|SVs_TEMP
-                   )));
+               DIE(aTHX_ "Can't find an opnumber for \"%"UTF8f"\"",
+                  UTF8fARG(SvFLAGS(TOPs) & SVf_UTF8, SvCUR(TOPs)-6, s+6));
            {
                SV * const sv = core_prototype(NULL, s + 6, code, NULL);
                if (sv) ret = sv;
index f68336a..d8f63b7 100644 (file)
--- a/pp_ctl.c
+++ b/pp_ctl.c
@@ -3070,9 +3070,8 @@ PP(pp_goto)
            PL_lastgotoprobe = gotoprobe;
        }
        if (!retop)
-           DIE(aTHX_ "Can't find label %"SVf,
-                            SVfARG(newSVpvn_flags(label, label_len,
-                                        SVs_TEMP | label_flags)));
+           DIE(aTHX_ "Can't find label %"UTF8f, 
+                      UTF8fARG(label_flags, label_len, label));
 
        /* if we're leaving an eval, check before we pop any frames
            that we're not going to punt, otherwise the error
diff --git a/sv.c b/sv.c
index 35bb1a8..6306707 100644 (file)
--- a/sv.c
+++ b/sv.c
@@ -10467,7 +10467,8 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
                %-<num>p        include an SV with precision <num>      
                %2p             include a HEK
                %3p             include a HEK with precision of 256
-               %<num>p         (where num != 2 or 3) reserved for future
+               %4p             char* preceded by utf8 flag and length
+               %<num>p         (where num is 1 or > 4) reserved for future
                                extensions
 
        Robin Barker 2005-07-14 (but modified since)
@@ -10479,6 +10480,15 @@ Perl_sv_vcatpvfn_flags(pTHX_ SV *const sv, const char *const pat, const STRLEN p
            STRLEN n = 0;
            if (*q == '-')
                sv = *q++;
+           else if (strnEQ(q, UTF8f, sizeof(UTF8f)-1)) { /* UTF8f */
+               /* The argument has already gone through cBOOL, so the cast
+                  is safe. */
+               is_utf8 = (bool)va_arg(*args, int);
+               elen = va_arg(*args, UV);
+               eptr = va_arg(*args, char *);
+               q += sizeof(UTF8f)-1;
+               goto string;
+           }
            n = expect_number(&q);
            if (*q++ == 'p') {
                if (sv) {                       /* SVf */
index 729abaf..69d9363 100644 (file)
@@ -162,6 +162,7 @@ my %specialformats = (IVdf => 'd',
                      NVgf => 'f',
                      HEKf256=>'s',
                      HEKf => 's',
+                     UTF8f=> 's',
                      SVf256=>'s',
                      SVf32=> 's',
                      SVf  => 's');
diff --git a/toke.c b/toke.c
index 8f6eb44..e4a79a9 100644 (file)
--- a/toke.c
+++ b/toke.c
@@ -553,16 +553,14 @@ S_no_op(pTHX_ const char *const what, char *s)
                NOOP;
            if (t < PL_bufptr && isSPACE(*t))
                Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                       "\t(Do you need to predeclare %"SVf"?)\n",
-                   SVfARG(newSVpvn_flags(PL_oldoldbufptr, (STRLEN)(t - PL_oldoldbufptr),
-                                   SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                       "\t(Do you need to predeclare %"UTF8f"?)\n",
+                     UTF8fARG(UTF, t - PL_oldoldbufptr, PL_oldoldbufptr));
        }
        else {
            assert(s >= oldbp);
            Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                   "\t(Missing operator before %"SVf"?)\n",
-                    SVfARG(newSVpvn_flags(oldbp, (STRLEN)(s - oldbp),
-                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                   "\t(Missing operator before %"UTF8f"?)\n",
+                    UTF8fARG(UTF, s - oldbp, oldbp));
        }
     }
     PL_bufptr = oldbp;
@@ -6506,9 +6504,8 @@ Perl_yylex(pTHX)
                                if (*t == ';'
                                        && get_cvn_flags(tmpbuf, len, UTF ? SVf_UTF8 : 0))
                                    Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                                               "You need to quote \"%"SVf"\"",
-                                                 SVfARG(newSVpvn_flags(tmpbuf, len, 
-                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                                       "You need to quote \"%"UTF8f"\"",
+                                        UTF8fARG(UTF, len, tmpbuf));
                            }
                        }
                }
@@ -6593,11 +6590,9 @@ Perl_yylex(pTHX)
                        PL_bufptr = PEEKSPACE(PL_bufptr); /* XXX can realloc */
        /* diag_listed_as: Scalar value @%s[%s] better written as $%s[%s] */
                        Perl_warner(aTHX_ packWARN(WARN_SYNTAX),
-                           "Scalar value %"SVf" better written as $%"SVf,
-                           SVfARG(newSVpvn_flags(PL_bufptr, (STRLEN)(t-PL_bufptr),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))),
-                            SVfARG(newSVpvn_flags(PL_bufptr+1, (STRLEN)(t-PL_bufptr-1),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0 ))));
+                        "Scalar value %"UTF8f" better written as $%"UTF8f,
+                         UTF8fARG(UTF, t-PL_bufptr, PL_bufptr),
+                         UTF8fARG(UTF, t-PL_bufptr-1, PL_bufptr+1));
                    }
                }
            }
@@ -7040,9 +7035,8 @@ Perl_yylex(pTHX)
                    s = scan_word(s, PL_tokenbuf + len, sizeof PL_tokenbuf - len,
                                  TRUE, &morelen);
                    if (!morelen)
-                       Perl_croak(aTHX_ "Bad name after %"SVf"%s",
-                                        SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                            (UTF ? SVf_UTF8 : 0) | SVs_TEMP )),
+                       Perl_croak(aTHX_ "Bad name after %"UTF8f"%s",
+                               UTF8fARG(UTF, len, PL_tokenbuf),
                                *s == '\'' ? "'" : "::");
                    len += morelen;
                    pkgname = 1;
@@ -7069,9 +7063,8 @@ Perl_yylex(pTHX)
                    if (ckWARN(WARN_BAREWORD)
                        && ! gv_fetchpvn_flags(PL_tokenbuf, len, UTF ? SVf_UTF8 : 0, SVt_PVHV))
                        Perl_warner(aTHX_ packWARN(WARN_BAREWORD),
-                           "Bareword \"%"SVf"\" refers to nonexistent package",
-                            SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                        (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+                         "Bareword \"%"UTF8f"\" refers to nonexistent package",
+                          UTF8fARG(UTF, len, PL_tokenbuf));
                    len -= 2;
                    PL_tokenbuf[len] = '\0';
                    gv = NULL;
@@ -7261,10 +7254,11 @@ Perl_yylex(pTHX)
 
                if (cv) {
                    if (lastchar == '-' && penultchar != '-') {
-                        const SV *tmpsv = newSVpvn_flags( PL_tokenbuf, len ? len : strlen(PL_tokenbuf), (UTF ? SVf_UTF8 : 0) | SVs_TEMP );
+                       const STRLEN l = len ? len : strlen(PL_tokenbuf);
                        Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                               "Ambiguous use of -%"SVf" resolved as -&%"SVf"()",
-                               SVfARG(tmpsv), SVfARG(tmpsv));
+                           "Ambiguous use of -%"UTF8f" resolved as -&%"UTF8f"()",
+                            UTF8fARG(UTF, l, PL_tokenbuf),
+                            UTF8fARG(UTF, l, PL_tokenbuf));
                     }
                    /* Check for a constant sub */
                    if ((sv = cv_const_sv(cv))) {
@@ -7441,10 +7435,10 @@ Perl_yylex(pTHX)
                if ((lastchar == '*' || lastchar == '%' || lastchar == '&')
                 && saw_infix_sigil) {
                    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
-                                    "Operator or semicolon missing before %c%"SVf,
-                                    lastchar, SVfARG(newSVpvn_flags(PL_tokenbuf,
-                                                    strlen(PL_tokenbuf),
-                                                    SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+                                    "Operator or semicolon missing before %c%"UTF8f,
+                                    lastchar,
+                                    UTF8fARG(UTF, strlen(PL_tokenbuf),
+                                             PL_tokenbuf));
                    Perl_ck_warner_d(aTHX_ packWARN(WARN_AMBIGUOUS),
                                     "Ambiguous use of %c resolved as operator %c",
                                     lastchar, lastchar);
@@ -7604,9 +7598,8 @@ Perl_yylex(pTHX)
                    goto just_a_word;
                }
                if (!tmp)
-                   Perl_croak(aTHX_ "CORE::%"SVf" is not a keyword",
-                                    SVfARG(newSVpvn_flags(PL_tokenbuf, len,
-                                                (UTF ? SVf_UTF8 : 0) | SVs_TEMP)));
+                   Perl_croak(aTHX_ "CORE::%"UTF8f" is not a keyword",
+                                     UTF8fARG(UTF, len, PL_tokenbuf));
                if (tmp < 0)
                    tmp = -tmp;
                else if (tmp == KEY_require || tmp == KEY_do
@@ -8162,11 +8155,9 @@ Perl_yylex(pTHX)
                    && !(t[0] == ':' && t[1] == ':')
                    && !keyword(s, d-s, 0)
                ) {
-                   SV *tmpsv = newSVpvn_flags(s, (STRLEN)(d-s),
-                                                SVs_TEMP | (UTF ? SVf_UTF8 : 0));
                    Perl_warner(aTHX_ packWARN(WARN_PRECEDENCE),
-                          "Precedence problem: open %"SVf" should be open(%"SVf")",
-                           SVfARG(tmpsv), SVfARG(tmpsv));
+                      "Precedence problem: open %"UTF8f" should be open(%"UTF8f")",
+                       UTF8fARG(UTF, d-s, s), UTF8fARG(UTF, d-s, s));
                }
            }
            LOP(OP_OPEN,XTERM);
@@ -9018,9 +9009,9 @@ S_pending_ident(pTHX)
         {
             /* Downgraded from fatal to warning 20000522 mjd */
             Perl_warner(aTHX_ packWARN(WARN_AMBIGUOUS),
-                       "Possible unintended interpolation of %"SVf" in string",
-                       SVfARG(newSVpvn_flags(PL_tokenbuf, tokenbuf_len,
-                                        SVs_TEMP | ( UTF ? SVf_UTF8 : 0 ))));
+                       "Possible unintended interpolation of %"UTF8f
+                       " in string",
+                       UTF8fARG(UTF, tokenbuf_len, PL_tokenbuf));
         }
     }
 
@@ -11411,9 +11402,8 @@ Perl_yyerror_pvn(pTHX_ const char *const s, STRLEN len, U32 flags)
     Perl_sv_catpvf(aTHX_ msg, " at %s line %"IVdf", ",
         OutCopFILE(PL_curcop), (IV)CopLINE(PL_curcop));
     if (context)
-       Perl_sv_catpvf(aTHX_ msg, "near \"%"SVf"\"\n",
-                            SVfARG(newSVpvn_flags(context, contlen,
-                                        SVs_TEMP | (UTF ? SVf_UTF8 : 0))));
+       Perl_sv_catpvf(aTHX_ msg, "near \"%"UTF8f"\"\n",
+                            UTF8fARG(UTF, contlen, context));
     else
        Perl_sv_catpvf(aTHX_ msg, "%"SVf"\n", SVfARG(where_sv));
     if (PL_multi_start < PL_multi_end && (U32)(CopLINE(PL_curcop) - PL_multi_end) <= 1) {