From 3aa957f9c7dbe37b7f2fe946b886b63a07d35ac7 Mon Sep 17 00:00:00 2001 From: Jarkko Hietaniemi Date: Thu, 5 Jul 2001 13:38:36 +0000 Subject: [PATCH] Update Unicode::UCD on \p{In...}. p4raw-id: //depot/perl@11162 --- lib/Unicode/UCD.pm | 77 ++++++++++++++++-------------------------------------- 1 file changed, 23 insertions(+), 54 deletions(-) diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 6c20d40..cc7adae 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -161,11 +161,6 @@ charblock() returns the block the character belongs to, e.g. C. Note that not all the character positions within all blocks are defined. -The name is the same name that is used in the C<\p{In...}> construct, -for example C<\p{InBasicLatin}> (spaces and dashes ('-') are squished -away from the names for the C<\p{In...}>, for example C -instead of C. - =cut my @BLOCKS; @@ -196,9 +191,6 @@ sub charblock { charscript() returns the script the character belongs to, e.g. C, C, C. -Unfortunately, currently (Perl 5.8.0) there is no regular expression -notation for matching scripts as there is for blocks (C<\p{In...}>. - =cut my @SCRIPTS; @@ -226,56 +218,33 @@ sub charscript { The difference between a character block and a script is that scripts are closer to the linguistic notion of a set of characters required to present languages, while block is more of an artifact of the Unicode -character numbering. For example the Latin B