New UTF-8 warning : Variable length character upgraded in print.

author SADAHIRO Tomoyuki <BQW10602@nifty.com>

Sun, 22 Oct 2006 14:32:34 +0000 (23:32 +0900)

committer Rafael Garcia-Suarez <rgarciasuarez@gmail.com>

Tue, 24 Oct 2006 12:53:14 +0000 (12:53 +0000)
author SADAHIRO Tomoyuki <BQW10602@nifty.com>
Sun, 22 Oct 2006 14:32:34 +0000 (23:32 +0900)
committer Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
Tue, 24 Oct 2006 12:53:14 +0000 (12:53 +0000)
diff --git a/doio.c b/doio.c

index e0a1fc5..88afb1d 100644 (file)
--- a/doio.c
+++ b/doio.c
@@ -1225,9 +1225,14 @@ Perl_do_print(pTHX_ register SV *sv, PerlIO *fp)
         tmps = SvPV_const(sv, len);
         if (PerlIO_isutf8(fp)) {
             if (!SvUTF8(sv)) {
+               const STRLEN origlen = len;
                 /* We don't modify the original scalar.  */
                 tmpbuf = bytes_to_utf8((const U8*) tmps, &len);
                 tmps = (char *) tmpbuf;
+               if (ckWARN(WARN_UTF8) && len != origlen) {
+                   Perl_warner(aTHX_ packWARN(WARN_UTF8),
+                               "Variable length character upgraded in print");
+               }
             }
         }
         else if (DO_UTF8(sv)) {
diff --git a/pod/perldiag.pod b/pod/perldiag.pod

index 59a9748..68df69f 100644 (file)
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -4710,6 +4710,18 @@ front of your variable.
  known at compile time. The <-- HERE shows in the regular expression about
  where the problem was discovered. See L<perlre>.
  
+=item Variable length character upgraded in print
+
+(W utf8) Perl met a variable length character that is not marked with
+Unicode in the output, but the output layer (like the C<:utf8> layer) does
+not expect that. (A variable length character is defined by having
+different memory representations between the native encoding (ISO-8859-1
+or single-byte EBCDIC) and perl's Unicode encoding (UTF-8 or UTF-EBCDIC).)
+Perl assumes any strings that are not marked as Unicode to be encoded in
+the native encoding, and implicitly converts (upgrades) them into perl's
+Unicode encoding on print. If you had intended to treat them as Unicode
+strings, you might have failed to cope with them properly.
+
  =item "%s" variable %s masks earlier declaration in same %s
  
  (W misc) A "my" or "our" variable has been redeclared in the current
author	SADAHIRO Tomoyuki <BQW10602@nifty.com>
	Sun, 22 Oct 2006 14:32:34 +0000 (23:32 +0900)
committer	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
	Tue, 24 Oct 2006 12:53:14 +0000 (12:53 +0000)
doio.c		patch \| blob \| history
pod/perldiag.pod		patch \| blob \| history