Subclass utf8 warnings so can turn off individually

author Karl Williamson <public@khwilliamson.com>

Fri, 18 Feb 2011 05:42:03 +0000 (22:42 -0700)

committer Karl Williamson <public@khwilliamson.com>

Fri, 18 Feb 2011 05:56:25 +0000 (22:56 -0700)
author Karl Williamson <public@khwilliamson.com>
Fri, 18 Feb 2011 05:42:03 +0000 (22:42 -0700)
committer Karl Williamson <public@khwilliamson.com>
Fri, 18 Feb 2011 05:56:25 +0000 (22:56 -0700)
diff --git a/lib/warnings.pm b/lib/warnings.pm

index 8cd24d4..90a9d0a 100644 (file)
--- a/lib/warnings.pm
+++ b/lib/warnings.pm
@@ -218,113 +218,125 @@ our %Offsets = (
  
      'imprecision'      => 92,
      'illegalproto'     => 94,
+
+    # Warnings Categories added in Perl 5.013
+
+    'non_unicode'      => 96,
+    'nonchar'          => 98,
+    'surrogate'                => 100,
    );
  
  our %Bits = (
-    'all'              => "\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55", # [0..47]
-    'ambiguous'                => "\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00", # [29]
-    'bareword'         => "\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00", # [30]
-    'closed'           => "\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6]
-    'closure'          => "\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1]
-    'debugging'                => "\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00", # [22]
-    'deprecated'       => "\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2]
-    'digit'            => "\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00", # [31]
-    'exec'             => "\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7]
-    'exiting'          => "\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
-    'glob'             => "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4]
-    'illegalproto'     => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40", # [47]
-    'imprecision'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10", # [46]
-    'inplace'          => "\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00", # [23]
-    'internal'         => "\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00", # [24]
-    'io'               => "\x00\x54\x55\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11]
-    'layer'            => "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8]
-    'malloc'           => "\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00", # [25]
-    'misc'             => "\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00", # [12]
-    'newline'          => "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9]
-    'numeric'          => "\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00", # [13]
-    'once'             => "\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00", # [14]
-    'overflow'         => "\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00", # [15]
-    'pack'             => "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00", # [16]
-    'parenthesis'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", # [32]
-    'pipe'             => "\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10]
-    'portable'         => "\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00", # [17]
-    'precedence'       => "\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00", # [33]
-    'printf'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00", # [34]
-    'prototype'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00", # [35]
-    'qw'               => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00", # [36]
-    'recursion'                => "\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00", # [18]
-    'redefine'         => "\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00", # [19]
-    'regexp'           => "\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00", # [20]
-    'reserved'         => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00", # [37]
-    'semicolon'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00", # [38]
-    'severe'           => "\x00\x00\x00\x00\x00\x54\x05\x00\x00\x00\x00\x00", # [21..25]
-    'signal'           => "\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00", # [26]
-    'substr'           => "\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00", # [27]
-    'syntax'           => "\x00\x00\x00\x00\x00\x00\x00\x55\x55\x15\x00\x40", # [28..38,47]
-    'taint'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00", # [39]
-    'threads'          => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00", # [40]
-    'uninitialized'    => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00", # [41]
-    'unopened'         => "\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11]
-    'unpack'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00", # [42]
-    'untie'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00", # [43]
-    'utf8'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", # [44]
-    'void'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04", # [45]
+    'all'              => "\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x55\x15", # [0..50]
+    'ambiguous'                => "\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00", # [29]
+    'bareword'         => "\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00", # [30]
+    'closed'           => "\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6]
+    'closure'          => "\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1]
+    'debugging'                => "\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00", # [22]
+    'deprecated'       => "\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2]
+    'digit'            => "\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00", # [31]
+    'exec'             => "\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7]
+    'exiting'          => "\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
+    'glob'             => "\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4]
+    'illegalproto'     => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00", # [47]
+    'imprecision'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00", # [46]
+    'inplace'          => "\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00", # [23]
+    'internal'         => "\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00", # [24]
+    'io'               => "\x00\x54\x55\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11]
+    'layer'            => "\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8]
+    'malloc'           => "\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00", # [25]
+    'misc'             => "\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [12]
+    'newline'          => "\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9]
+    'non_unicode'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01", # [48]
+    'nonchar'          => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04", # [49]
+    'numeric'          => "\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [13]
+    'once'             => "\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [14]
+    'overflow'         => "\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [15]
+    'pack'             => "\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00", # [16]
+    'parenthesis'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00", # [32]
+    'pipe'             => "\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10]
+    'portable'         => "\x00\x00\x00\x00\x04\x00\x00\x00\x00\x00\x00\x00\x00", # [17]
+    'precedence'       => "\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x00", # [33]
+    'printf'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00", # [34]
+    'prototype'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00", # [35]
+    'qw'               => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00", # [36]
+    'recursion'                => "\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00\x00\x00", # [18]
+    'redefine'         => "\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00", # [19]
+    'regexp'           => "\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00", # [20]
+    'reserved'         => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00", # [37]
+    'semicolon'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00", # [38]
+    'severe'           => "\x00\x00\x00\x00\x00\x54\x05\x00\x00\x00\x00\x00\x00", # [21..25]
+    'signal'           => "\x00\x00\x00\x00\x00\x00\x10\x00\x00\x00\x00\x00\x00", # [26]
+    'substr'           => "\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00\x00\x00\x00", # [27]
+    'surrogate'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10", # [50]
+    'syntax'           => "\x00\x00\x00\x00\x00\x00\x00\x55\x55\x15\x00\x40\x00", # [28..38,47]
+    'taint'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00\x00", # [39]
+    'threads'          => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00", # [40]
+    'uninitialized'    => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00\x00", # [41]
+    'unopened'         => "\x00\x00\x40\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11]
+    'unpack'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x10\x00\x00", # [42]
+    'untie'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x40\x00\x00", # [43]
+    'utf8'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x15", # [44,48..50]
+    'void'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00", # [45]
    );
  
  our %DeadBits = (
-    'all'              => "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa", # [0..47]
-    'ambiguous'                => "\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00", # [29]
-    'bareword'         => "\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00", # [30]
-    'closed'           => "\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6]
-    'closure'          => "\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1]
-    'debugging'                => "\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00", # [22]
-    'deprecated'       => "\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2]
-    'digit'            => "\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00", # [31]
-    'exec'             => "\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7]
-    'exiting'          => "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
-    'glob'             => "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4]
-    'illegalproto'     => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80", # [47]
-    'imprecision'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20", # [46]
-    'inplace'          => "\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00", # [23]
-    'internal'         => "\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00", # [24]
-    'io'               => "\x00\xa8\xaa\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11]
-    'layer'            => "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8]
-    'malloc'           => "\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00", # [25]
-    'misc'             => "\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00", # [12]
-    'newline'          => "\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9]
-    'numeric'          => "\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00", # [13]
-    'once'             => "\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00", # [14]
-    'overflow'         => "\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00", # [15]
-    'pack'             => "\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00", # [16]
-    'parenthesis'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00", # [32]
-    'pipe'             => "\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10]
-    'portable'         => "\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00", # [17]
-    'precedence'       => "\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00", # [33]
-    'printf'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00", # [34]
-    'prototype'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00", # [35]
-    'qw'               => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00", # [36]
-    'recursion'                => "\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00", # [18]
-    'redefine'         => "\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00", # [19]
-    'regexp'           => "\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00", # [20]
-    'reserved'         => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00", # [37]
-    'semicolon'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00", # [38]
-    'severe'           => "\x00\x00\x00\x00\x00\xa8\x0a\x00\x00\x00\x00\x00", # [21..25]
-    'signal'           => "\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00", # [26]
-    'substr'           => "\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00", # [27]
-    'syntax'           => "\x00\x00\x00\x00\x00\x00\x00\xaa\xaa\x2a\x00\x80", # [28..38,47]
-    'taint'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00", # [39]
-    'threads'          => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00", # [40]
-    'uninitialized'    => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00", # [41]
-    'unopened'         => "\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11]
-    'unpack'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00", # [42]
-    'untie'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00", # [43]
-    'utf8'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02", # [44]
-    'void'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08", # [45]
+    'all'              => "\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\xaa\x2a", # [0..50]
+    'ambiguous'                => "\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00", # [29]
+    'bareword'         => "\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00", # [30]
+    'closed'           => "\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [6]
+    'closure'          => "\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [1]
+    'debugging'                => "\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00", # [22]
+    'deprecated'       => "\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [2]
+    'digit'            => "\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00", # [31]
+    'exec'             => "\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [7]
+    'exiting'          => "\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [3]
+    'glob'             => "\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [4]
+    'illegalproto'     => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00", # [47]
+    'imprecision'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00", # [46]
+    'inplace'          => "\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00", # [23]
+    'internal'         => "\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00", # [24]
+    'io'               => "\x00\xa8\xaa\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [5..11]
+    'layer'            => "\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [8]
+    'malloc'           => "\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00", # [25]
+    'misc'             => "\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [12]
+    'newline'          => "\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [9]
+    'non_unicode'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02", # [48]
+    'nonchar'          => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08", # [49]
+    'numeric'          => "\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [13]
+    'once'             => "\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [14]
+    'overflow'         => "\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [15]
+    'pack'             => "\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00", # [16]
+    'parenthesis'      => "\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00", # [32]
+    'pipe'             => "\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [10]
+    'portable'         => "\x00\x00\x00\x00\x08\x00\x00\x00\x00\x00\x00\x00\x00", # [17]
+    'precedence'       => "\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00\x00", # [33]
+    'printf'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00", # [34]
+    'prototype'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00", # [35]
+    'qw'               => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00", # [36]
+    'recursion'                => "\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00\x00\x00", # [18]
+    'redefine'         => "\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00", # [19]
+    'regexp'           => "\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00", # [20]
+    'reserved'         => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00\x00", # [37]
+    'semicolon'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00", # [38]
+    'severe'           => "\x00\x00\x00\x00\x00\xa8\x0a\x00\x00\x00\x00\x00\x00", # [21..25]
+    'signal'           => "\x00\x00\x00\x00\x00\x00\x20\x00\x00\x00\x00\x00\x00", # [26]
+    'substr'           => "\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00\x00\x00\x00", # [27]
+    'surrogate'                => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20", # [50]
+    'syntax'           => "\x00\x00\x00\x00\x00\x00\x00\xaa\xaa\x2a\x00\x80\x00", # [28..38,47]
+    'taint'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00\x00", # [39]
+    'threads'          => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00", # [40]
+    'uninitialized'    => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00\x00", # [41]
+    'unopened'         => "\x00\x00\x80\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00", # [11]
+    'unpack'           => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x20\x00\x00", # [42]
+    'untie'            => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x80\x00\x00", # [43]
+    'utf8'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x2a", # [44,48..50]
+    'void'             => "\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x08\x00", # [45]
    );
  
-$NONE     = "\0\0\0\0\0\0\0\0\0\0\0\0";
-$LAST_BIT = 96 ;
-$BYTES    = 12 ;
+$NONE     = "\0\0\0\0\0\0\0\0\0\0\0\0\0";
+$LAST_BIT = 102 ;
+$BYTES    = 13 ;
  
  $All = "" ; vec($All, $Offsets{'all'}, 2) = 3 ;
  
diff --git a/pod/perldelta.pod b/pod/perldelta.pod

index cbae795..7523500 100644 (file)
--- a/pod/perldelta.pod
+++ b/pod/perldelta.pod
@@ -45,6 +45,16 @@ will match; it won't under C</aa>.  Note that like C</a>, C</aa>
  in 5.14 will not actually be able to be used as a suffix at the end of a
  regular expression.
  
+=head2 New warnings categories for problematic (non-)Unicode code points.
+
+Three new warnings subcategories of WARN_UTF8 have been added.  These
+allow you to turn off warnings for their covered events, while allowing
+the other UTF-8 warnings to remain on.  The three categories are:
+C<surrogate> when UTF-16 surrogates are encountered;
+C<nonchar> when Unicode non-character code points are encountered;
+and C<non_unicode> when code points that are above the legal Unicode
+maximum of 0x10FFFF are encountered.
+
  =head1 Security
  
  XXX Any security-related notices go here.  In particular, any security
diff --git a/pod/perldiag.pod b/pod/perldiag.pod

index df57e50..b3fdd5e 100644 (file)
--- a/pod/perldiag.pod
+++ b/pod/perldiag.pod
@@ -1362,7 +1362,7 @@ template code following the slash. See L<perlfunc/pack>.
  
  =item Code point 0x%X is not Unicode, no properties match it; all inverse properties do
  
-(W utf8) You had a code point above the Unicode maximum of U+10FFFF.
+(W utf8, non_unicode) You had a code point above the Unicode maximum of U+10FFFF.
  
  Perl allows strings to contain a superset of Unicode code
  points, up to the limit of what is storable in an unsigned integer on
@@ -3203,7 +3203,8 @@ the C<fallback> overloading key is specified to be true.  See L<overload>.
  
  =item Operation "%s" returns its argument for non-Unicode code point 0x%X
  
-(W) You performed an operation requiring Unicode semantics on a code
+(W utf8, non_unicode) You performed an operation requiring Unicode
+semantics on a code
  point that is not in Unicode, so what it should do is not defined.  Perl
  has chosen to have it do nothing, and warn you.
  
@@ -3211,11 +3212,12 @@ If the operation shown is "ToFold", it means that case-insensitive
  matching in a regular expression was done on the code point.
  
  If you know what you are doing you can turn off this warning by
-C<no warnings 'utf8';>.
+C<no warnings 'non_unicode';>.
  
  =item Operation "%s" returns its argument for UTF-16 surrogate U+%X
  
-(W) You performed an operation requiring Unicode semantics on a Unicode
+(W utf8, surrogate) You performed an operation requiring Unicode
+semantics on a Unicode
  surrogate.  Unicode frowns upon the use of surrogates for anything but
  storing strings in UTF-16, but semantics are (reluctantly) defined for
  the surrogates, and they are to do nothing for this operation.  Because
@@ -3225,7 +3227,7 @@ If the operation shown is "ToFold", it means that case-insensitive
  matching in a regular expression was done on the code point.
  
  If you know what you are doing you can turn off this warning by
-C<no warnings 'utf8';>.
+C<no warnings 'surrogate';>.
  
  =item Operator or semicolon missing before %s
  
@@ -4686,22 +4688,23 @@ representative, who probably put it there in the first place.
  
  =item Unicode non-character U+%X is illegal for open interchange
  
-(W utf8) Certain codepoints, such as U+FFFE and U+FFFF, are defined by the
+(W utf8, nonchar) Certain codepoints, such as U+FFFE and U+FFFF, are
+defined by the
  Unicode standard to be non-characters. Those are legal codepoints, but are
  reserved for internal use; so, applications shouldn't attempt to exchange
  them.  If you know what you are doing you can turn
-off this warning by C<no warnings 'utf8';>.
+off this warning by C<no warnings 'nonchar';>.
  
  =item Unicode surrogate U+%X is illegal in UTF-8
  
-(W utf8) You had a UTF-16 surrogate in a context where they are
+(W utf8, surrogate) You had a UTF-16 surrogate in a context where they are
  not considered acceptable.  These code points, between U+D800 and
  U+DFFF (inclusive), are used by Unicode only for UTF-16.  However, Perl
  internally allows all unsigned integer code points (up to the size limit
  available on your platform), including surrogates.  But these can cause
  problems when being input or output, which is likely where this message
  came from.  If you really really know what you are doing you can turn
-off this warning by C<no warnings 'utf8';>.
+off this warning by C<no warnings 'surrogate';>.
  
  =item Unknown BYTEORDER
  
@@ -5277,14 +5280,14 @@ providing improved functionality is being prepared.
  
  =item UTF-16 surrogate U+%X
  
-(W utf8) You had a UTF-16 surrogate in a context where they are
+(W utf8, surrogate) You had a UTF-16 surrogate in a context where they are
  not considered acceptable.  These code points, between U+D800 and
  U+DFFF (inclusive), are used by Unicode only for UTF-16.  However, Perl
  internally allows all unsigned integer code points (up to the size limit
  available on your platform), including surrogates.  But these can cause
  problems when being input or output, which is likely where this message
  came from.  If you really really know what you are doing you can turn
-off this warning by C<no warnings 'utf8';>.
+off this warning by C<no warnings 'surrogate';>.
  
  =item Value of %s can be "0"; test with defined()
  
diff --git a/pod/perllexwarn.pod b/pod/perllexwarn.pod

index ab71729..accbfdf 100644 (file)
--- a/pod/perllexwarn.pod
+++ b/pod/perllexwarn.pod
@@ -304,7 +304,13 @@ The current hierarchy is:
         |
         +- untie
         |
-       +- utf8
+       +- utf8----------+
+       |                |
+       |                +- surrogate
+       |                |
+       |                +- non_unicode
+       |                |
+       |                +- nonchar
         |
         +- void
  
diff --git a/regen/warnings.pl b/regen/warnings.pl

index 454035f..fafde5b 100644 (file)
--- a/regen/warnings.pl
+++ b/regen/warnings.pl
@@ -8,6 +8,9 @@
  # from information hardcoded into this script (the $tree hash), plus the
  # template for warnings.pm in the DATA section.
  #
+# When changing the number of warnings, the $BYTES in t/op/caller.t should
+# change to correspond with the same symbol's value in lib/warnings.pm
+#
  # With an argument of 'tree', just dump the contents of $tree and exits.
  # Also accepts the standard regen_lib -q and -v args.
  #
@@ -71,7 +74,11 @@ my $tree = {
         'closure'       => [ 5.008, DEFAULT_OFF],
         'overflow'      => [ 5.008, DEFAULT_OFF],
         'portable'      => [ 5.008, DEFAULT_OFF],
-       'utf8'          => [ 5.008, DEFAULT_OFF],
+       'utf8'          => [ 5.008, {
+                                'surrogate' => [ 5.013, DEFAULT_OFF],
+                                'nonchar' => [ 5.013, DEFAULT_OFF],
+                                'non_unicode' => [ 5.013, DEFAULT_OFF],
+                        }],
                 'exiting'       => [ 5.008, DEFAULT_OFF],
                 'pack'          => [ 5.008, DEFAULT_OFF],
                 'unpack'        => [ 5.008, DEFAULT_OFF],
diff --git a/t/lib/warnings/utf8 b/t/lib/warnings/utf8

index 6514175..7ed458d 100644 (file)
--- a/t/lib/warnings/utf8
+++ b/t/lib/warnings/utf8
@@ -67,6 +67,31 @@ Operation "uc" returns its argument for UTF-16 surrogate U+DFFF at - line 4.
  Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 14.
  ########
  use warnings 'utf8';
+my $d800  = uc(chr(0xD800));
+my $nonUnicode =  uc(chr(0x110000));
+no warnings 'surrogate';
+my $d800  = uc(chr(0xD800));
+my $nonUnicode =  uc(chr(0x110000));
+EXPECT
+Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
+Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
+Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 6.
+########
+use warnings 'utf8';
+my $d800  = uc(chr(0xD800));
+my $nonUnicode =  uc(chr(0x110000));
+my $big_nonUnicode = uc(chr(0x8000_0000));
+no warnings 'non_unicode';
+my $d800  = uc(chr(0xD800));
+my $nonUnicode =  uc(chr(0x110000));
+my $big_nonUnicode = uc(chr(0x8000_0000));
+EXPECT
+Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 2.
+Operation "uc" returns its argument for non-Unicode code point 0x110000 at - line 3.
+Operation "uc" returns its argument for non-Unicode code point 0x80000000 at - line 4.
+Operation "uc" returns its argument for UTF-16 surrogate U+D800 at - line 6.
+########
+use warnings 'utf8';
  my $d7ff  = lc pack("U", 0xD7FF);
  my $d800  = lc pack("U", 0xD800);
  my $dfff  = lc pack("U", 0xDFFF);
@@ -163,6 +188,13 @@ chr(0x110000) =~ /\p{Any}/;
  EXPECT
  Code point 0x110000 is not Unicode, no properties match it; all inverse properties do at - line 14.
  ########
+use warnings 'utf8';
+chr(0x110000) =~ /\p{Any}/;
+no warnings 'non_unicode';
+chr(0x110000) =~ /\p{Any}/;
+EXPECT
+Code point 0x110000 is not Unicode, no properties match it; all inverse properties do at - line 2.
+########
  require "../test.pl";
  use warnings 'utf8';
  my $file = tempfile();
@@ -257,6 +289,58 @@ Unicode non-character U+10FFFF is illegal for open interchange at - line 50.
  Code point 0x110000 is not Unicode, may not be portable at - line 51.
  ########
  require "../test.pl";
+use warnings 'utf8';
+my $file = tempfile();
+open(my $fh, "+>:utf8", $file);
+print $fh "\x{D800}", "\n";
+print $fh "\x{FFFF}", "\n";
+print $fh "\x{110000}", "\n";
+close $fh;
+EXPECT
+Unicode surrogate U+D800 is illegal in UTF-8 at - line 5.
+Unicode non-character U+FFFF is illegal for open interchange at - line 6.
+Code point 0x110000 is not Unicode, may not be portable at - line 7.
+########
+require "../test.pl";
+use warnings 'utf8';
+no warnings 'surrogate';
+my $file = tempfile();
+open(my $fh, "+>:utf8", $file);
+print $fh "\x{D800}", "\n";
+print $fh "\x{FFFF}", "\n";
+print $fh "\x{110000}", "\n";
+close $fh;
+EXPECT
+Unicode non-character U+FFFF is illegal for open interchange at - line 7.
+Code point 0x110000 is not Unicode, may not be portable at - line 8.
+########
+require "../test.pl";
+use warnings 'utf8';
+no warnings 'nonchar';
+my $file = tempfile();
+open(my $fh, "+>:utf8", $file);
+print $fh "\x{D800}", "\n";
+print $fh "\x{FFFF}", "\n";
+print $fh "\x{110000}", "\n";
+close $fh;
+EXPECT
+Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
+Code point 0x110000 is not Unicode, may not be portable at - line 8.
+########
+require "../test.pl";
+use warnings 'utf8';
+no warnings 'non_unicode';
+my $file = tempfile();
+open(my $fh, "+>:utf8", $file);
+print $fh "\x{D800}", "\n";
+print $fh "\x{FFFF}", "\n";
+print $fh "\x{110000}", "\n";
+close $fh;
+EXPECT
+Unicode surrogate U+D800 is illegal in UTF-8 at - line 6.
+Unicode non-character U+FFFF is illegal for open interchange at - line 7.
+########
+require "../test.pl";
  no warnings 'utf8';
  my $file = tempfile();
  open(my $fh, "+>:utf8", $file);
diff --git a/t/op/caller.t b/t/op/caller.t

index d08e143..a92b3ea 100644 (file)
--- a/t/op/caller.t
+++ b/t/op/caller.t
@@ -111,8 +111,8 @@ sub testwarn {
  
      # The repetition number must be set to the value of $BYTES in
      # lib/warnings.pm
-    BEGIN { check_bits( ${^WARNING_BITS}, "\0" x 12, 'all bits off via "no warnings"' ) }
-    testwarn("\0" x 12, 'no bits');
+    BEGIN { check_bits( ${^WARNING_BITS}, "\0" x 13, 'all bits off via "no warnings"' ) }
+    testwarn("\0" x 13, 'no bits');
  
      use warnings;
      BEGIN { check_bits( ${^WARNING_BITS}, $default,
diff --git a/utf8.c b/utf8.c

index 808d9a8..768fc01 100644 (file)
--- a/utf8.c
+++ b/utf8.c
@@ -139,7 +139,7 @@ Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
      if (ckWARN_d(WARN_UTF8)) {
         if (UNICODE_IS_SURROGATE(uv)) {
             if (flags & UNICODE_WARN_SURROGATE) {
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
+               Perl_ck_warner_d(aTHX_ packWARN(WARN_SURROGATE),
                                             "UTF-16 surrogate U+%04"UVXf, uv);
             }
             if (flags & UNICODE_DISALLOW_SURROGATE) {
@@ -150,7 +150,7 @@ Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
             if (flags & UNICODE_WARN_SUPER
                 || (UNICODE_IS_FE_FF(uv) && (flags & UNICODE_WARN_FE_FF)))
             {
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
+               Perl_ck_warner_d(aTHX_ packWARN(WARN_NON_UNICODE),
                           "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv);
             }
             if (flags & UNICODE_DISALLOW_SUPER
@@ -161,7 +161,7 @@ Perl_uvuni_to_utf8_flags(pTHX_ U8 *d, UV uv, UV flags)
         }
         else if (UNICODE_IS_NONCHAR(uv)) {
             if (flags & UNICODE_WARN_NONCHAR) {
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
+               Perl_ck_warner_d(aTHX_ packWARN(WARN_NONCHAR),
                  "Unicode non-character U+%04"UVXf" is illegal for open interchange",
                  uv);
             }
@@ -1829,16 +1829,20 @@ Perl_to_utf8_case(pTHX_ const U8 *p, U8* ustrp, STRLEN *lenp,
  
      /* Note that swash_fetch() doesn't output warnings for these because it
       * assumes we will */
-    if (uv1 >= UNICODE_SURROGATE_FIRST && ckWARN_d(WARN_UTF8)) {
+    if (uv1 >= UNICODE_SURROGATE_FIRST) {
         if (uv1 <= UNICODE_SURROGATE_LAST) {
-           const char* desc = (PL_op) ? OP_DESC(PL_op) : normal;
-           Perl_warner(aTHX_ packWARN(WARN_UTF8),
-               "Operation \"%s\" returns its argument for UTF-16 surrogate U+%04"UVXf"", desc, uv1);
+           if (ckWARN_d(WARN_SURROGATE)) {
+               const char* desc = (PL_op) ? OP_DESC(PL_op) : normal;
+               Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
+                   "Operation \"%s\" returns its argument for UTF-16 surrogate U+%04"UVXf"", desc, uv1);
+           }
         }
         else if (UNICODE_IS_SUPER(uv1)) {
-           const char* desc = (PL_op) ? OP_DESC(PL_op) : normal;
-           Perl_warner(aTHX_ packWARN(WARN_UTF8),
-               "Operation \"%s\" returns its argument for non-Unicode code point 0x%04"UVXf"", desc, uv1);
+           if (ckWARN_d(WARN_NON_UNICODE)) {
+               const char* desc = (PL_op) ? OP_DESC(PL_op) : normal;
+               Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
+                   "Operation \"%s\" returns its argument for non-Unicode code point 0x%04"UVXf"", desc, uv1);
+           }
         }
  
         /* Note that non-characters are perfectly legal, so no warning should
@@ -2165,7 +2169,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
        /* If char is encoded then swatch is for the prefix */
         needents = (1 << UTF_ACCUMULATION_SHIFT);
         off      = NATIVE_TO_UTF(ptr[klen]) & UTF_CONTINUATION_MASK;
-       if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_UTF8)) {
+       if (UTF8_IS_SUPER(ptr) && ckWARN_d(WARN_NON_UNICODE)) {
             const UV code_point = utf8n_to_uvuni(ptr, UTF8_MAXBYTES, 0, 0);
  
             /* This outputs warnings for binary properties only, assuming that
@@ -2173,7 +2177,7 @@ Perl_swash_fetch(pTHX_ SV *swash, const U8 *ptr, bool do_utf8)
              * for, as that would warn on things like /\p{Gc=Cs}/ */
             SV** const bitssvp = hv_fetchs(hv, "BITS", FALSE);
             if (SvUV(*bitssvp) == 1) {
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
+               Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
                     "Code point 0x%04"UVXf" is not Unicode, no properties match it; all inverse properties do", code_point);
             }
         }
@@ -2892,22 +2896,27 @@ Perl_check_utf8_print(pTHX_ register const U8* s, const STRLEN len)
         if (*s >= UTF8_FIRST_PROBLEMATIC_CODE_POINT_FIRST_BYTE) {
             STRLEN char_len;
             if (UTF8_IS_SUPER(s)) {
-               UV uv = utf8_to_uvchr(s, &char_len);
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
-                   "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv);
-               ok = FALSE;
+               if (ckWARN_d(WARN_NON_UNICODE)) {
+                   UV uv = utf8_to_uvchr(s, &char_len);
+                   Perl_warner(aTHX_ packWARN(WARN_NON_UNICODE),
+                       "Code point 0x%04"UVXf" is not Unicode, may not be portable", uv);
+                   ok = FALSE;
+               }
             }
             else if (UTF8_IS_SURROGATE(s)) {
-               UV uv = utf8_to_uvchr(s, &char_len);
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
-                   "Unicode surrogate U+%04"UVXf" is illegal in UTF-8", uv);
-               ok = FALSE;
+               if (ckWARN_d(WARN_SURROGATE)) {
+                   UV uv = utf8_to_uvchr(s, &char_len);
+                   Perl_warner(aTHX_ packWARN(WARN_SURROGATE),
+                       "Unicode surrogate U+%04"UVXf" is illegal in UTF-8", uv);
+                   ok = FALSE;
+               }
             }
             else if
-               (UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s))
+               ((UTF8_IS_NONCHAR_GIVEN_THAT_NON_SUPER_AND_GE_PROBLEMATIC(s))
+                && (ckWARN_d(WARN_NONCHAR)))
             {
                 UV uv = utf8_to_uvchr(s, &char_len);
-               Perl_warner(aTHX_ packWARN(WARN_UTF8),
+               Perl_warner(aTHX_ packWARN(WARN_NONCHAR),
                     "Unicode non-character U+%04"UVXf" is illegal for open interchange", uv);
                 ok = FALSE;
             }
diff --git a/warnings.h b/warnings.h

index d8793d1..39ebe6a 100644 (file)
--- a/warnings.h
+++ b/warnings.h
@@ -81,9 +81,15 @@
  #define WARN_IMPRECISION       46
  #define WARN_ILLEGALPROTO      47
  
-#define WARNsize               12
-#define WARN_ALLstring         "\125\125\125\125\125\125\125\125\125\125\125\125"
-#define WARN_NONEstring                "\0\0\0\0\0\0\0\0\0\0\0\0"
+/* Warnings Categories added in Perl 5.013 */
+
+#define WARN_NON_UNICODE       48
+#define WARN_NONCHAR           49
+#define WARN_SURROGATE         50
+
+#define WARNsize               13
+#define WARN_ALLstring         "\125\125\125\125\125\125\125\125\125\125\125\125\125"
+#define WARN_NONEstring                "\0\0\0\0\0\0\0\0\0\0\0\0\0"
  
  #define isLEXWARN_on   (PL_curcop->cop_warnings != pWARN_STD)
  #define isLEXWARN_off  (PL_curcop->cop_warnings == pWARN_STD)
author	Karl Williamson <public@khwilliamson.com>
	Fri, 18 Feb 2011 05:42:03 +0000 (22:42 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Fri, 18 Feb 2011 05:56:25 +0000 (22:56 -0700)
lib/warnings.pm		patch \| blob \| history
pod/perldelta.pod		patch \| blob \| history
pod/perldiag.pod		patch \| blob \| history
pod/perllexwarn.pod		patch \| blob \| history
regen/warnings.pl		patch \| blob \| history
t/lib/warnings/utf8		patch \| blob \| history
t/op/caller.t		patch \| blob \| history
utf8.c		patch \| blob \| history
warnings.h		patch \| blob \| history