From 0721d74039598968722031f4192aa5133e1659c9 Mon Sep 17 00:00:00 2001 From: Father Chrysostomos Date: Sun, 31 Oct 2010 10:23:39 -0700 Subject: [PATCH] Revert "Add consistent synonyms for \p{PosxFOO}" This reverts commit d5944336d74c819152158dabfd806d49ad0ecb21. --- lib/unicore/mktables | 38 +++++++----------------------------- pod/perlrecharclass.pod | 52 +++++++++++++++++++++---------------------------- 2 files changed, 29 insertions(+), 61 deletions(-) diff --git a/lib/unicore/mktables b/lib/unicore/mktables index 8a5c89a..c432809 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -11130,8 +11130,7 @@ sub compile_perl() { # range, with their names prefaced by 'Posix', to signify that these match # what the Posix standard says they should match. A couple are # effectively this, but the name doesn't have 'Posix' in it because there - # just isn't any Posix equivalent. 'XPosix' are the Posix tables extended - # to the full Unicode range, by our guesses as to what is appropriate. + # just isn't any Posix equivalent. # 'Any' is all code points. As an error check, instead of just setting it # to be that, construct it to be the union of all the major categories @@ -11196,7 +11195,6 @@ sub compile_perl() { $Lower->set_equivalent_to($gc->table('Lowercase_Letter'), Related => 1); } - $Lower->add_alias('XPosixLower'); $perl->add_match_table("PosixLower", Description => "[a-z]", Initialize => $Lower & $ASCII, @@ -11211,7 +11209,6 @@ sub compile_perl() { $Upper->set_equivalent_to($gc->table('Uppercase_Letter'), Related => 1); } - $Upper->add_alias('XPosixUpper'); $perl->add_match_table("PosixUpper", Description => "[A-Z]", Initialize => $Upper & $ASCII, @@ -11306,7 +11303,6 @@ sub compile_perl() { $Alpha += $gc->table('Nl') if defined $gc->table('Nl'); $Alpha->add_description('Alphabetic'); } - $Alpha->add_alias('XPosixAlpha'); $perl->add_match_table("PosixAlpha", Description => "[A-Za-z]", Initialize => $Alpha & $ASCII, @@ -11316,7 +11312,6 @@ sub compile_perl() { Description => 'Alphabetic and (Decimal) Numeric', Initialize => $Alpha + $gc->table('Decimal_Number'), ); - $Alnum->add_alias('XPosixAlnum'); $perl->add_match_table("PosixAlnum", Description => "[A-Za-z0-9]", Initialize => $Alnum & $ASCII, @@ -11326,16 +11321,14 @@ sub compile_perl() { Description => '\w, including beyond ASCII', Initialize => $Alnum + $gc->table('Mark'), ); - $Word->add_alias('XPosixWord'); my $Pc = $gc->table('Connector_Punctuation'); # 'Pc' Not in release 1 $Word += $Pc if defined $Pc; # This is a Perl extension, so the name doesn't begin with Posix. - my $PerlWord = $perl->add_match_table('PerlWord', + $perl->add_match_table('PerlWord', Description => '\w, restricted to ASCII = [A-Za-z0-9_]', Initialize => $Word & $ASCII, ); - $PerlWord->add_alias('PosixWord'); my $Blank = $perl->add_match_table('Blank', Description => '\h, Horizontal white space', @@ -11348,7 +11341,6 @@ sub compile_perl() { - 0x200B, # ZWSP ); $Blank->add_alias('HorizSpace'); # Another name for it. - $Blank->add_alias('XPosixBlank'); $perl->add_match_table("PosixBlank", Description => "\\t and ' '", Initialize => $Blank & $ASCII, @@ -11370,28 +11362,24 @@ sub compile_perl() { Description => '\s including beyond ASCII plus vertical tab', Initialize => $Blank + $VertSpace, ); - $Space->add_alias('XPosixSpace'); $perl->add_match_table("PosixSpace", Description => "\\t, \\n, \\cK, \\f, \\r, and ' '. (\\cK is vertical tab)", Initialize => $Space & $ASCII, ); # Perl's traditional space doesn't include Vertical Tab - my $XPerlSpace = $perl->add_match_table('XPerlSpace', + my $SpacePerl = $perl->add_match_table('SpacePerl', Description => '\s, including beyond ASCII', Initialize => $Space - 0x000B, ); - $XPerlSpace->add_alias('SpacePerl'); # A pre-existing synonym - my $PerlSpace = $perl->add_match_table('PerlSpace', + $perl->add_match_table('PerlSpace', Description => '\s, restricted to ASCII', - Initialize => $XPerlSpace & $ASCII, + Initialize => $SpacePerl & $ASCII, ); - my $Cntrl = $perl->add_match_table('Cntrl', Description => 'Control characters'); $Cntrl->set_equivalent_to($gc->table('Cc'), Related => 1); - $Cntrl->add_alias('XPosixCntrl'); $perl->add_match_table("PosixCntrl", Description => "ASCII control characters: NUL, SOH, STX, ETX, EOT, ENQ, ACK, BEL, BS, HT, LF, VT, FF, CR, SO, SI, DLE, DC1, DC2, DC3, DC4, NAK, SYN, ETB, CAN, EOM, SUB, ESC, FS, GS, RS, US, and DEL", Initialize => $Cntrl & $ASCII, @@ -11408,7 +11396,6 @@ sub compile_perl() { Description => 'Characters that are graphical', Initialize => ~ ($Space + $controls), ); - $Graph->add_alias('XPosixGraph'); $perl->add_match_table("PosixGraph", Description => '[-!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~0-9A-Za-z]', @@ -11419,7 +11406,6 @@ sub compile_perl() { Description => 'Characters that are graphical plus space characters (but no controls)', Initialize => $Blank + $Graph - $gc->table('Control'), ); - $print->add_alias('XPosixPrint'); $perl->add_match_table("PosixPrint", Description => '[- 0-9A-Za-z!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~]', @@ -11430,20 +11416,15 @@ sub compile_perl() { $Punct->set_equivalent_to($gc->table('Punctuation'), Related => 1); # \p{punct} doesn't include the symbols, which posix does - my $XPosixPunct = $perl->add_match_table('XPosixPunct', - Description => '\p{Punct} + ASCII-range \p{Symbol}', - Initialize => $gc->table('Punctuation') - + ($ASCII & $gc->table('Symbol')), - ); $perl->add_match_table('PosixPunct', Description => '[-!"#$%&\'()*+,./:;<>?@[\\\]^_`{|}~]', - Initialize => $ASCII & $XPosixPunct, + Initialize => $ASCII & ($gc->table('Punctuation') + + $gc->table('Symbol')), ); my $Digit = $perl->add_match_table('Digit', Description => '[0-9] + all other decimal digits'); $Digit->set_equivalent_to($gc->table('Decimal_Number'), Related => 1); - $Digit->add_alias('XPosixDigit'); my $PosixDigit = $perl->add_match_table("PosixDigit", Description => '[0-9]', Initialize => $Digit & $ASCII, @@ -11451,7 +11432,6 @@ sub compile_perl() { # Hex_Digit was not present in first release my $Xdigit = $perl->add_match_table('XDigit'); - $Xdigit->add_alias('XPosixXDigit'); my $Hex = property_ref('Hex_Digit'); if (defined $Hex && ! $Hex->is_empty) { $Xdigit->set_equivalent_to($Hex->table('Y'), Related => 1); @@ -11463,10 +11443,6 @@ sub compile_perl() { 0xFF10..0xFF19, 0xFF21..0xFF26, 0xFF41..0xFF46]); $Xdigit->add_description('[0-9A-Fa-f] and corresponding fullwidth versions, like U+FF10: FULLWIDTH DIGIT ZERO'); } - $perl->add_match_table('PosixXDigit', - Initialize => $ASCII & $Xdigit, - Description => '[0-9A-Fa-f]', - ); my $dt = property_ref('Decomposition_Type'); $dt->add_match_table('Non_Canon', Full_Name => 'Non_Canonical', diff --git a/pod/perlrecharclass.pod b/pod/perlrecharclass.pod index 7f96b4b..0b88cc4 100644 --- a/pod/perlrecharclass.pod +++ b/pod/perlrecharclass.pod @@ -522,8 +522,7 @@ The other counterpart, in the column labelled "Full-range Unicode", matches any appropriate characters in the full Unicode character set. For example, C<\p{Alpha}> will match not just the ASCII alphabetic characters, but any character in the entire Unicode character set that is considered to be -alphabetic. The backslash sequence column is a (short) synonym for -the Full-range Unicode form. +alphabetic. (Each of the counterparts has various synonyms as well. L lists all the @@ -549,25 +548,25 @@ EBCDIC code page is present, they will behave in accordance with those; if absent, the classes will match only their ASCII-range counterparts. If you disagree with this proposal, send email to C. - [[:...:]] ASCII-range Full-range backslash Note - Unicode Unicode sequence + [[:...:]] ASCII-range Full-range backslash Note + Unicode Unicode sequence ----------------------------------------------------- - alpha \p{PosixAlpha} \p{XPosixAlpha} - alnum \p{PosixAlnum} \p{XPosixAlnum} + alpha \p{PosixAlpha} \p{Alpha} + alnum \p{PosixAlnum} \p{Alnum} ascii \p{ASCII} - blank \p{PosixBlank} \p{XPosixBlank} \h [1] - or \p{HorizSpace} [1] - cntrl \p{PosixCntrl} \p{XPosixCntrl} [2] - digit \p{PosixDigit} \p{XPosixDigit} \d - graph \p{PosixGraph} \p{XPosixGraph} [3] - lower \p{PosixLower} \p{XPosixLower} - print \p{PosixPrint} \p{XPosixPrint} [4] - punct \p{PosixPunct} \p{XPosixPunct} [5] - \p{PerlSpace} \p{XPerlSpace} \s [6] - space \p{PosixSpace} \p{XPosixSpace} [6] - upper \p{PosixUpper} \p{XPosixUpper} - word \p{PosixWord} \p{XPosixWord} \w - xdigit \p{ASCII_Hex_Digit} \p{XPosixXDigit} + blank \p{PosixBlank} \p{Blank} = [1] + \p{HorizSpace} \h [1] + cntrl \p{PosixCntrl} \p{Cntrl} [2] + digit \p{PosixDigit} \p{Digit} \d + graph \p{PosixGraph} \p{Graph} [3] + lower \p{PosixLower} \p{Lower} + print \p{PosixPrint} \p{Print} [4] + punct \p{PosixPunct} \p{Punct} [5] + \p{PerlSpace} \p{SpacePerl} \s [6] + space \p{PosixSpace} \p{Space} [6] + upper \p{PosixUpper} \p{Upper} + word \p{PerlWord} \p{Word} \w + xdigit \p{ASCII_Hex_Digit} \p{XDigit} =over 4 @@ -622,11 +621,6 @@ matches the vertical tab, C<\cK>. Same for the two ASCII-only range forms. =back -There are various other synonyms that can be used for these besides -C<\p{HorizSpace}> and \C<\p{XPosixBlank}>. For example -C<\p{PosixAlpha}> can be written as C<\p{Alpha}>. All are listed -in L. - =head4 Negation X @@ -637,12 +631,10 @@ Some examples: POSIX ASCII-range Full-range backslash Unicode Unicode sequence ----------------------------------------------------- - [[:^digit:]] \P{PosixDigit} \P{XPosixDigit} \D - [[:^space:]] \P{PosixSpace} \P{XPosixSpace} - \P{PerlSpace} \P{XPerlSpace} \S - [[:^word:]] \P{PerlWord} \P{XPosixWord} \W - -Again, the backslash sequence means Full-range Unicode. + [[:^digit:]] \P{PosixDigit} \P{Digit} \D + [[:^space:]] \P{PosixSpace} \P{Space} + \P{PerlSpace} \P{SpacePerl} \S + [[:^word:]] \P{PerlWord} \P{Word} \W =head4 [= =] and [. .] -- 2.7.4