From 5a7fb30a54f192f9dc958d7a74add600705b96bb Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Sat, 3 Jul 2010 10:12:33 -0600 Subject: [PATCH] charnames: check for use bytes in vianame; efficiency When vianame returns a chr, it now verifies that it is legal under 'use bytes'. Update .t An instance of taking of a substr of a huge string is needed only in an error leg. Move it to that leg for performance. And make the message a subroutine so will be identical whenever raised. --- lib/charnames.pm | 19 ++++++++++++++----- lib/charnames.t | 9 +++++++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/lib/charnames.pm b/lib/charnames.pm index da52abc..25a63d8 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -450,6 +450,11 @@ sub alias (@) } } # alias +sub not_legal_use_bytes_msg { + my ($name, $ord) = @_; + return sprintf("Character 0x%04x with name '$name' is above 0xFF with 'use bytes' in effect", $ord); +} + sub alias_file ($) { my ($arg, $file) = @_; @@ -549,9 +554,6 @@ sub lookup_name { return "\x{FFFD}"; } - # Get the official name in case need to output a message - $name = substr($txt, $off[0], $off[1] - $off[0]); - ## ## Now know where in the string the name starts. ## The code, in hex, is before that. @@ -577,7 +579,11 @@ sub lookup_name { # Here is compile time, "use bytes" is in effect, and the character # won't fit in a byte - croak sprintf("Character 0x%04x with name '$name' is above 0xFF", $ord); + + # Get the official name if have one for the message + $name = substr($txt, $off[0], $off[1] - $off[0]) if @off; + + croak not_legal_use_bytes_msg($name, $ord); } # lookup_name sub charnames { @@ -730,7 +736,10 @@ sub vianame # khw claims that this is bad. The function should return either a # an ord or a chr for all inputs; not be bipolar. Also, under 'use # bytes', can create a chr above 255. - return chr CORE::hex $1; + my $ord = CORE::hex $1; + return chr $ord if $ord <= 255 || ! ((caller 0)[8] & $bytes::hint_bits); + carp not_legal_use_bytes_msg($arg, $ord); + return; } if (! exists $vianame{$arg}) { diff --git a/lib/charnames.t b/lib/charnames.t index fa132e8..e8ce58e 100644 --- a/lib/charnames.t +++ b/lib/charnames.t @@ -159,7 +159,8 @@ sub to_bytes { } { - is(sprintf("%04X", charnames::vianame("GOTHIC LETTER AHSA")), "10330"); + cmp_ok(charnames::vianame("GOTHIC LETTER AHSA"), "==", 0x10330, "Verify vianame \\N{name} returns an ord"); + is(charnames::vianame("U+10330"), "\x{10330}", "Verify vianame \\N{U+hex} returns a chr"); use warnings; my $warning_count = @WARN; ok (! defined charnames::vianame("NONE SUCH")); @@ -167,6 +168,10 @@ sub to_bytes { use bytes; is(charnames::vianame("GOTHIC LETTER AHSA"), 0x10330, "Verify vianame \\N{name} is unaffected by 'use bytes'"); + is(charnames::vianame("U+FF"), chr(0xFF), "Verify vianame \\N{U+FF} is unaffected by 'use bytes'"); + cmp_ok($warning_count, '==', @WARN, "Verify vianame doesn't warn on legal inputs"); + is(charnames::vianame("U+100"), undef, "Verify vianame \\N{U+100} is undef under 'use bytes'"); + ok($warning_count == @WARN - 1 && $WARN[-1] =~ /above 0xFF/, "Verify vianame gives appropriate warning for previous test"); } { @@ -670,7 +675,7 @@ is($_, 'foobar'); my $names = do "unicore/Name.pl"; ok(defined $names); my $non_ascii = native_to_latin1($names) =~ tr/\0-\177//c; -ok(! $non_ascii, "Make sure all names are ASCII-only"); +ok(! $non_ascii, "Verify all official names are ASCII-only"); # Verify that charnames propagate to eval("") my $evaltry = eval q[ "Eval: \N{LEFT-POINTING DOUBLE ANGLE QUOTATION MARK}" ]; -- 2.7.4