From 90249f0ae5df4271829a2e527b72534b8974ec80 Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Wed, 24 Oct 2012 10:02:54 -0600 Subject: [PATCH] Make \N{unknown char} a syntax error Previously, it was a warning with the REPLACEMENT CHARACTER substituted. Unicode recommends that it be a syntax error, and any code that used this had to be buggy since the REPLACEMENT CHARACTER has no other use in Unicode. --- lib/_charnames.pm | 16 ++--------- lib/charnames.pm | 8 ++---- lib/charnames.t | 39 ++++++++++++++++--------- pod/perldelta.pod | 9 ++++++ t/lib/charnames/alias | 80 ++++++++++++++++++++++++++++++++++----------------- toke.c | 9 +++++- 6 files changed, 103 insertions(+), 58 deletions(-) diff --git a/lib/_charnames.pm b/lib/_charnames.pm index 5431d0f..347ad27 100644 --- a/lib/_charnames.pm +++ b/lib/_charnames.pm @@ -7,7 +7,7 @@ package _charnames; use strict; use warnings; use File::Spec; -our $VERSION = '1.32'; +our $VERSION = '1.33'; use unicore::Name; # mktables-generated algorithmically-defined names use bytes (); # for $bytes::hint_bits @@ -454,18 +454,8 @@ sub lookup_name ($$$) { } my $case = $name_has_uppercase ? "CAPITAL" : "SMALL"; - if (! $scripts_trie - || $txt !~ - /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U$lookup_name $/xm) - { - # Here we still don't have it, give up. - return if $runtime; - - # May have zapped input name, get it again. - $name = (defined $save_input) ? $save_input : $_[0]; - carp "Unknown charname '$name'"; - return ($wants_ord) ? 0xFFFD : pack("U", 0xFFFD); - } + return if (! $scripts_trie || $txt !~ + /\t (?: $scripts_trie ) \ (?:$case\ )? LETTER \ \U$lookup_name $/xm); # Here have found the input name in the table. @off = ($-[0] + 1, $+[0]); # The 1 is for the tab diff --git a/lib/charnames.pm b/lib/charnames.pm index bef3e1a..5c4ff3f 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -1,7 +1,7 @@ package charnames; use strict; use warnings; -our $VERSION = '1.32'; +our $VERSION = '1.33'; use unicore::Name; # mktables-generated algorithmically-defined names use _charnames (); # The submodule for this where most of the work gets done @@ -214,8 +214,7 @@ L)>. Since Unicode 6.0, it is deprecated to use C. Instead use C (but C will continue to work). -If the input name is unknown, C<\N{NAME}> raises a warning and -substitutes the Unicode REPLACEMENT CHARACTER (U+FFFD). +It is a syntax error to use C<\N{NAME}> where C is unknown. For C<\N{NAME}>, it is a fatal error if C is in effect and the input name is that of a character that won't fit into a byte (i.e., whose @@ -338,8 +337,7 @@ L