From 484741e121917d8979d45f886da5d146507d7f6d Mon Sep 17 00:00:00 2001 From: Karl Williamson Date: Fri, 19 Nov 2010 09:46:54 -0700 Subject: [PATCH] Add Unicode's minor 6.0 correction Unicode just released a minor correction to 6.0. This patch adds that. Unfortunately, a test thought the old behavior was correct, and so had to be modified as well. --- lib/unicore/mktables | 6 +++++- pod/perldelta.pod | 5 +++-- t/uni/class.t | 14 ++++++++------ 3 files changed, 16 insertions(+), 9 deletions(-) diff --git a/lib/unicore/mktables b/lib/unicore/mktables index a8209cf..042fb94 100644 --- a/lib/unicore/mktables +++ b/lib/unicore/mktables @@ -10153,12 +10153,16 @@ END # allow the BELL name for U+1F514, so that the old usage can be # deprecated for one cycle. - return if $_ !~ /^(?:0007|1F514);/; + return if $_ !~ /^(?:0007|1F514|070F);/; my ($code_point, @fields) = split /\s*;\s*/, $_, -1; if ($code_point eq '0007') { $fields[$CHARNAME] = "ALERT"; } + elsif ($code_point eq '070F') { # Unicode Corrigendum #8; see + # http://www.unicode.org/versions/corrigendum8.html + $fields[$BIDI] = "AL"; + } elsif ($^V lt v5.15.0) { # For 5.16 will convert to use Unicode's name $fields[$CHARNAME] = ""; } diff --git a/pod/perldelta.pod b/pod/perldelta.pod index 9b1b6f4..f3f5dbf 100644 --- a/pod/perldelta.pod +++ b/pod/perldelta.pod @@ -139,8 +139,9 @@ L. =head2 Unicode Version 6.0 is now supported (mostly) -Perl comes with the Unicode 6.0 data base, with one exception noted -below. +Perl comes with the Unicode 6.0 data base updated with +L, +with one exception noted below. See L for details on the new release. Perl does not support any Unicode provisional properties, including the new ones for this release, but their database files are diff --git a/t/uni/class.t b/t/uni/class.t index 107a202..fedec4c 100644 --- a/t/uni/class.t +++ b/t/uni/class.t @@ -65,12 +65,14 @@ is(($str =~ /(\p{Other::Class}+)/)[0], '@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_'); # make sure it finds class in other OTHER package is(($str =~ /(\p{A::B::Intersection}+)/)[0], '@ABCDEFGHIJKLMNO'); -# lib/unicore/Bc/AL.pl -$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}"; -is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{070F}"); -is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{070F}"); -is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{070F}"); -is(($str =~ /(\P{bc=AL}+)/)[0], "\x{070F}"); +# lib/unicore/lib/Bc/AL.pl. U+070E is unassigned, currently, but still has +# bidi class AL. The first one in the sequence that doesn't is 0711, which is +# BC=NSM. +$str = "\x{070D}\x{070E}\x{070F}\x{0710}\x{0711}\x{0712}"; +is(($str =~ /(\P{BidiClass: ArabicLetter}+)/)[0], "\x{0711}"); +is(($str =~ /(\P{BidiClass: AL}+)/)[0], "\x{0711}"); +is(($str =~ /(\P{BC :ArabicLetter}+)/)[0], "\x{0711}"); +is(($str =~ /(\P{bc=AL}+)/)[0], "\x{0711}"); # make sure InGreek works $str = "[\x{038B}\x{038C}\x{038D}]"; -- 2.7.4