From da9dec57e250ecec9d2000bc94f516e6b3ee468c Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Thu, 1 Jul 2010 16:25:08 -0600 Subject: [PATCH] Clean up charnames pod, including new changes This patch brings the charnames pod up-to-date, and rewords it to hopefully be more clear. --- lib/charnames.pm | 156 ++++++++++++++++++++++++++++++------------------------- 1 file changed, 86 insertions(+), 70 deletions(-) diff --git a/lib/charnames.pm b/lib/charnames.pm index 0b1b99d..da52abc 100644 --- a/lib/charnames.pm +++ b/lib/charnames.pm @@ -746,7 +746,7 @@ __END__ =head1 NAME -charnames - define character names for C<\N{named}> string literal escapes +charnames - access to Unicode character names and define character names for C<\N{named}> string literal escapes =head1 SYNOPSIS @@ -764,7 +764,7 @@ charnames - define character names for C<\N{named}> string literal escapes mychar => 0xE8000, # Private use area }; print "\N{e_ACUTE} is a small letter e with an acute.\n"; - print "\\N{mychar} allows me to name and use private use characters.\n"; + print "\\N{mychar} allows me to name private use characters.\n"; use charnames (); print charnames::viacode(0x1234); # prints "ETHIOPIC SYLLABLE SEE" @@ -773,47 +773,61 @@ charnames - define character names for C<\N{named}> string literal escapes =head1 DESCRIPTION -Pragma C enables the use of C<\N{CHARNAME}> sequences to -insert a Unicode character into a string based on its name. (However, -you don't need this pragma to use C<\N{U+...}> where the C<...> is a -hexadecimal ordinal number.) - -The pragma supports arguments C<:full>, C<:short>, script names and -customized aliases. If C<:full> is present, for expansion of -C<\N{CHARNAME}>, the string C is first looked up in the list of +Pragma C is used to gain access to the names of the +Unicode characters, and to allow you to define your own character names. + +All forms of the pragma enable use of the +L)> function for run-time lookup of a +character name to get its ordinal (code point), and the inverse +function, L)>. + +Forms other than C> enable the use of of +C<\N{I}> sequences to compile a Unicode character into a +string based on its name. + +Note that C<\N{U+I<...>}>, where the I<...> is a hexadecimal number, +also inserts a character into a string, but doesn't require the use of +this pragma. The character it inserts is the one whose code point +(ordinal value) is equal to the number. For example, C<"\N{U+263a}"> is +the Unicode (white background, black foreground) smiley face; it doesn't +require this pragma, whereas the equivalent, C<"\N{WHITE SMILING FACE}"> +does. +Also, C<\N{I<...>}> can mean a regex quantifier instead of a character +name, when the I<...> is a number (or comma separated pair of numbers; +see L), and is not related to this pragma. + +The C pragma supports arguments C<:full>, C<:short>, script +names and customized aliases. If C<:full> is present, for expansion of +C<\N{I}>, the string I is first looked up in the list of standard Unicode character names. If C<:short> is present, and -C has the form C, then C is looked up -as a letter in script C