cpan/Unicode-Collate/t/loc_zhpy.t Unicode::Collate
cpan/Unicode-Collate/t/loc_zhst.t Unicode::Collate
cpan/Unicode-Collate/t/loc_zh.t Unicode::Collate
+cpan/Unicode-Collate/t/nonchar.t Unicode::Collate
cpan/Unicode-Collate/t/normal.t Unicode::Collate
cpan/Unicode-Collate/t/overcjk0.t Unicode::Collate
cpan/Unicode-Collate/t/overcjk1.t Unicode::Collate
'Unicode::Collate' =>
{
'MAINTAINER' => 'sadahiro',
- 'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.66-withoutworldwriteables.tar.gz',
+ 'DISTRIBUTION' => 'SADAHIRO/Unicode-Collate-0.67-withoutworldwriteables.tar.gz',
'FILES' => q[cpan/Unicode-Collate],
# ignore experimental XS version
'EXCLUDED' => [ qr{X$},
Revision history for Perl module Unicode::Collate.
+0.67 Sun Nov 14 11:38:59 2010
+ - supported UCA_Version 22 for Unicode 6.0.0.
+ * 2B740..2B81D are new CJK unified ideographs.
+ * noncharacters (e.g. U+FFFF) should be overridable, not be ignored.
+ ! DUCET is NOT updated, as no maint perl supports Unicode 6.0.0.
+ Thus the default UCA_Version is still 20.
+ - added t/nonchar.t.
+ - improved discontiguous contractions of 3 or more characters.
+ (e.g. 0FB2 0F71 0F80 and 0FB3 0F71 0F80)
+ - auxiliary: now 'mklocale' also copes with Korean.pm according to DUCET.
+
0.66 Sun Nov 7 10:47:30 2010
- U::C::Locale newly supports locale: ko.
- added Unicode::Collate::CJK::Korean for ko.
- supported locales: cs, es, es__traditional, fr, nn, pl.
! added t/locale*.t that uses DUCET.
(locale_cs.t, locale_fr.t, locale_nn.t, locale_pl.t, locale_test.t)
+ - data/*.txt and mklocale for preparation of Locale/*.pl from DUCET.
0.54 Sun Jul 25 21:37:04 2010
- Now UCA Revision 20 (based on Unicode 5.2.0).
which *is required* to test this module.
! Please notice that allkeys.txt will be overwritten if you have had
other allkeys.txt already.
- - U+9FC4..U+9FCB and U+2A700..U+2B734 are new CJK Unified Ideographs.
+ - U+9FC4..U+9FCB and U+2A700..U+2B734 are new CJK unified ideographs.
- Many hangul jamo are assigned (affecting hangul_terminator).
! DUCET will be compiled when XS is used. Explicit saying
which is not required to test this module.
! Please notice that allkeys.txt will be overwritten if you have had
other allkeys.txt already.
- - U+9FBC..U+9FC3 are new CJK Unified Ideographs.
+ - U+9FBC..U+9FC3 are new CJK unified ideographs.
0.52 Thu Oct 13 21:51:09 2005
- The Unicode::Collate->new method does not destroy user's $_ any longer.
no warnings 'utf8';
-our $VERSION = '0.66';
+our $VERSION = '0.67';
our $PACKAGE = __PACKAGE__;
my @Path = qw(Unicode Collate);
use constant Hangul_LIni => 0x1100;
use constant Hangul_LFin => 0x1159;
use constant Hangul_LFill => 0x115F;
-use constant Hangul_LEnd => 0x115F; # Unicode 5.2.0
+use constant Hangul_LEnd => 0x115F; # Unicode 5.2
use constant Hangul_VBase => 0x1161;
use constant Hangul_VIni => 0x1160; # from Vowel Filler
use constant Hangul_VFin => 0x11A2;
-use constant Hangul_VEnd => 0x11A7; # Unicode 5.2.0
+use constant Hangul_VEnd => 0x11A7; # Unicode 5.2
use constant Hangul_TBase => 0x11A7; # from "no-final" codepoint
use constant Hangul_TIni => 0x11A8;
use constant Hangul_TFin => 0x11F9;
-use constant Hangul_TEnd => 0x11FF; # Unicode 5.2.0
-use constant HangulL2Ini => 0xA960; # Unicode 5.2.0
-use constant HangulL2Fin => 0xA97C; # Unicode 5.2.0
-use constant HangulV2Ini => 0xD7B0; # Unicode 5.2.0
-use constant HangulV2Fin => 0xD7C6; # Unicode 5.2.0
-use constant HangulT2Ini => 0xD7CB; # Unicode 5.2.0
-use constant HangulT2Fin => 0xD7FB; # Unicode 5.2.0
-
-use constant CJK_UidIni => 0x4E00;
-use constant CJK_UidFin => 0x9FA5;
-use constant CJK_UidF41 => 0x9FBB;
-use constant CJK_UidF51 => 0x9FC3;
-use constant CJK_UidF52 => 0x9FCB;
-use constant CJK_ExtAIni => 0x3400; # Unicode 3.0.0
-use constant CJK_ExtAFin => 0x4DB5; # Unicode 3.0.0
-use constant CJK_ExtBIni => 0x20000; # Unicode 3.1.0
-use constant CJK_ExtBFin => 0x2A6D6; # Unicode 3.1.0
-use constant CJK_ExtCIni => 0x2A700; # Unicode 5.2.0
-use constant CJK_ExtCFin => 0x2B734; # Unicode 5.2.0
+use constant Hangul_TEnd => 0x11FF; # Unicode 5.2
+use constant HangulL2Ini => 0xA960; # Unicode 5.2
+use constant HangulL2Fin => 0xA97C; # Unicode 5.2
+use constant HangulV2Ini => 0xD7B0; # Unicode 5.2
+use constant HangulV2Fin => 0xD7C6; # Unicode 5.2
+use constant HangulT2Ini => 0xD7CB; # Unicode 5.2
+use constant HangulT2Fin => 0xD7FB; # Unicode 5.2
+
+use constant CJK_UidIni => 0x4E00;
+use constant CJK_UidFin => 0x9FA5;
+use constant CJK_UidF41 => 0x9FBB;
+use constant CJK_UidF51 => 0x9FC3;
+use constant CJK_UidF52 => 0x9FCB;
+use constant CJK_ExtAIni => 0x3400; # Unicode 3.0
+use constant CJK_ExtAFin => 0x4DB5; # Unicode 3.0
+use constant CJK_ExtBIni => 0x20000; # Unicode 3.1
+use constant CJK_ExtBFin => 0x2A6D6; # Unicode 3.1
+use constant CJK_ExtCIni => 0x2A700; # Unicode 5.2
+use constant CJK_ExtCFin => 0x2B734; # Unicode 5.2
+use constant CJK_ExtDIni => 0x2B740; # Unicode 6.0
+use constant CJK_ExtDFin => 0x2B81D; # Unicode 6.0
my %CompatUI = map +($_ => 1), (
0xFA0E, 0xFA0F, 0xFA11, 0xFA13, 0xFA14, 0xFA1F,
/;
our @ChangeNG = qw/
- entry mapping table maxlength
+ entry mapping table maxlength contraction
ignoreChar ignoreName undefChar undefName variableTable
versionTable alternateTable backwardsTable forwardsTable rearrangeTable
derivCode normCode rearrangeHash backwardsFlag
16 => \&_derivCE_14, # 16 == 14
18 => \&_derivCE_18,
20 => \&_derivCE_20,
+ 22 => \&_derivCE_22,
);
sub checkCollator {
$self->{mapping}{$entry} = $is_L3_ignorable ? [] : \@key;
if (@uv > 1) {
- (!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv)
- and $self->{maxlength}{$uv[0]} = @uv;
+ if (!$self->{maxlength}{$uv[0]} || $self->{maxlength}{$uv[0]} < @uv) {
+ $self->{maxlength}{$uv[0]} = @uv;
+ }
+ }
+ if (@uv > 2) {
+ while (@uv) {
+ pop @uv;
+ my $fake_entry = join(CODE_SEP, @uv); # in JCPS
+ $self->{contraction}{$fake_entry} = 1;
+ }
}
}
my $map = $self->{mapping};
my $max = $self->{maxlength};
my $reH = $self->{rearrangeHash};
- my $ver9 = $self->{UCA_Version} >= 9 && $self->{UCA_Version} <= 11;
+ my $vers = $self->{UCA_Version};
+ my $ver9 = $vers >= 9 && $vers <= 11;
my ($str, @buf);
# remove a code point marked as a completely ignorable.
for (my $i = 0; $i < @src; $i++) {
- $src[$i] = undef
- if _isIllegal($src[$i]) || ($ver9 &&
- $map->{ $src[$i] } && @{ $map->{ $src[$i] } } == 0);
+ if (_isIllegal($src[$i]) || $vers <= 20 && _isNonchar($src[$i])) {
+ $src[$i] = undef;
+ } elsif ($ver9) {
+ $src[$i] = undef if $map->{ $src[$i] } &&
+ @{ $map->{ $src[$i] } } == 0;
+ }
}
for (my $i = 0; $i < @src; $i++) {
}
}
- # not-contiguous contraction with Combining Char (cf. UTS#10, S2.1).
+ # discontiguous contraction with Combining Char (cf. UTS#10, S2.1).
# This process requires Unicode::Normalize.
# If "normalization" is undef, here should be skipped *always*
# (in spite of bool value of $CVgetCombinClass),
# since canonical ordering cannot be expected.
# Blocked combining character should not be contracted.
- if ($self->{normalization})
# $self->{normCode} is false in the case of "prenormalized".
- {
+ if ($self->{normalization}) {
+ my $cont = $self->{contraction};
my $preCC = 0;
- my $curCC = 0;
+ my $preCC_uc = 0;
+ my $jcps_uc = $jcps;
+ my(@out, @out_uc);
for (my $p = $i + 1; $p < @src; $p++) {
next if ! defined $src[$p];
- $curCC = $CVgetCombinClass->($src[$p]);
+ my $curCC = $CVgetCombinClass->($src[$p]);
last unless $curCC;
my $tail = CODE_SEP . $src[$p];
+
+ if ($preCC_uc != $curCC && ($map->{$jcps_uc.$tail} ||
+ $cont->{$jcps_uc.$tail})) {
+ $jcps_uc .= $tail;
+ push @out_uc, $p;
+ } else {
+ $preCC_uc = $curCC;
+ }
+
if ($preCC != $curCC && $map->{$jcps.$tail}) {
$jcps .= $tail;
- $src[$p] = undef;
+ push @out, $p;
} else {
$preCC = $curCC;
}
}
+
+ if ($map->{$jcps_uc}) {
+ $jcps = $jcps_uc;
+ $src[$_] = undef for @out_uc;
+ } else {
+ $src[$_] = undef for @out;
+ }
}
}
}
+sub _derivCE_22 {
+ my $u = shift;
+ my $base = (CJK_UidIni <= $u && $u <= CJK_UidF52 || $CompatUI{$u})
+ ? 0xFB40 : # CJK
+ (CJK_ExtAIni <= $u && $u <= CJK_ExtAFin ||
+ CJK_ExtBIni <= $u && $u <= CJK_ExtBFin ||
+ CJK_ExtCIni <= $u && $u <= CJK_ExtCFin ||
+ CJK_ExtDIni <= $u && $u <= CJK_ExtDFin)
+ ? 0xFB80 # CJK ext.
+ : 0xFBC0; # others
+ my $aaaa = $base + ($u >> 15);
+ my $bbbb = ($u & 0x7FFF) | 0x8000;
+ return pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
+ pack(VCE_TEMPLATE, NON_VAR, $bbbb, 0, 0, $u);
+}
+
sub _derivCE_20 {
my $u = shift;
my $base = (CJK_UidIni <= $u && $u <= CJK_UidF52 || $CompatUI{$u})
($uca_vers >= 8 && CJK_ExtBIni <= $u && $u <= CJK_ExtBFin)
||
($uca_vers >= 20 && CJK_ExtCIni <= $u && $u <= CJK_ExtCFin)
+ ||
+ ($uca_vers >= 22 && CJK_ExtDIni <= $u && $u <= CJK_ExtDFin)
);
}
sub _isIllegal {
my $code = shift;
- return ! defined $code # removed
+ return((! defined $code) # removed
|| ($code < 0 || 0x10FFFF < $code) # out of range
- || (($code & 0xFFFE) == 0xFFFE) # ??FFF[EF] (cf. utf8.c)
+ );
+}
+
+sub _isNonchar {
+ my $code = shift;
+ return((($code & 0xFFFE) == 0xFFFE) # ??FFF[EF] (cf. utf8.c)
|| (0xD800 <= $code && $code <= 0xDFFF) # unpaired surrogates
|| (0xFDD0 <= $code && $code <= 0xFDEF) # other non-characters
- ;
+ );
}
# Hangul Syllable Type
If the tracking version number of UCA is given,
behavior of that tracking version is emulated on collating.
If omitted, the return value of C<UCA_Version()> is used.
-C<UCA_Version()> should return the latest tracking version supported.
-The supported tracking version: 8, 9, 11, 14, 16, 18 or 20.
+The following tracking versions are supported. The default is 20.
UCA Unicode Standard DUCET (@version)
- ---------------------------------------------------
+ -------------------------------------------------------
8 3.1 3.0.1 (3.0.1d9)
9 3.1 with Corrigendum 3 3.1.1 (3.1.1)
11 4.0 4.0.0 (4.0.0)
16 5.0 5.0.0 (5.0.0)
18 5.1.0 5.1.0 (5.1.0)
20 5.2.0 5.2.0 (5.2.0)
+ 22 6.0.0 6.0.0 (6.0.0)
Note: Recent UTS #10 renames "Tracking Version" to "Revision."
+* Noncharacters (e.g. U+FFFF) are not ignored, and can be overrided
+since C<UCA_Version> 22.
+
+* Fully ignorable characters were ignored, and would not interrupt
+contractions with C<UCA_Version> 9 and 11.
+
+* Treatment of ignorables after variables and some behaviors
+were changed at C<UCA_Version> 9.
+
+* Characters regarded as CJK unified ideographs (cf. C<overrideCJK>)
+depend on C<UCA_Version>.
+
+* Many hangul jamo are assigned at C<UCA_Version> 20, that will affect
+C<hangul_terminator>.
+
=item alternate
-- see 3.2.2 Alternate Weighting, version 8 of UTS #10
In the case of C<(normalization =E<gt> "prenormalized")>,
any normalization is not performed, but
-non-contiguous contractions with combining characters are performed.
+discontiguous contractions with combining characters are performed.
Therefore
C<(normalization =E<gt> 'prenormalized', preprocess =E<gt> sub { NFD(shift) })>
B<is> equivalent to C<(normalization =E<gt> 'NFD')>.
order, but those in the CJK Unified Ideographs block are lesser than
those in the CJK Unified Ideographs Extension A etc.
- In CJK Unified Ideographs block:
- U+4E00..U+9FA5 if UCA_Version is 8 to 11;
- U+4E00..U+9FBB if UCA_Version is 14 to 16;
- U+4E00..U+9FC3 if UCA_Version is 18;
- U+4E00..U+9FCB if UCA_Version is 20.
+ In the CJK Unified Ideographs block:
+ U+4E00..U+9FA5 if UCA_Version is 8 to 11.
+ U+4E00..U+9FBB if UCA_Version is 14 to 16.
+ U+4E00..U+9FC3 if UCA_Version is 18.
+ U+4E00..U+9FCB if UCA_Version is 20 or greater.
- In CJK Unified Ideographs Extension blocks:
- Ext.A (U+3400..U+4DB5) and Ext.B (U+20000..U+2A6D6) in any UCA_Version;
- Ext.C (U+2A700..U+2B734) if UCA_Version is 20.
+ In the CJK Unified Ideographs Extension blocks:
+ Ext.A (U+3400..U+4DB5) and Ext.B (U+20000..U+2A6D6) in any UCA_Version.
+ Ext.C (U+2A700..U+2B734) if UCA_Version is 20 or greater.
+ Ext.D (U+2B740..U+2B81D) if UCA_Version is 22 or greater.
Through C<overrideCJK>, ordering of CJK unified ideographs (including
extensions) can be overrided.
=item C<UCA_Version()>
Returns the tracking version number of UTS #10 this module consults.
+C<UCA_Version()> should return the tracking version corresponding
+with the DUCET incorporated.
=item C<Base_Unicode_Version()>
use 5.006;
use strict;
-our $VERSION = '0.64';
+our $VERSION = '0.65';
my %u2p;
my $wt = 0x8000;
use Carp;
use base qw(Unicode::Collate);
-our $VERSION = '0.66';
+our $VERSION = '0.67';
use File::Spec;
+{
entry => <<'ENTRY', # for DUCET v5.2.0
-0587 ; [.1858.0020.0002.0584][.FFFF.0000.0000.0000] # ARMENIAN SMALL LIGATURE ECH YIWN
-0535 0582 ; [.1858.0020.0008.0554][.FFFF.0000.0000.0000] # <ARMENIAN CAPITAL LETTER ECH, ARMENIAN SMALL LETTER YIWN>
+0587 ; [.1858.0020.0002.0584][.FFF1.0000.0000.0000] # ARMENIAN SMALL LIGATURE ECH YIWN
+0535 0582 ; [.1858.0020.0008.0554][.FFF1.0000.0000.0000] # <ARMENIAN CAPITAL LETTER ECH, ARMENIAN SMALL LETTER YIWN>
ENTRY
};
-Unicode/Collate version 0.66
+Unicode/Collate version 0.67
===============================
NAME
from http://www.unicode.org/Public/UCA/latest/allkeys.txt
to <a place in @INC>/Unicode/Collate/allkeys.txt manually.
+HOW TO CHANGE DUCET (NOT WARRANTED)
+
+ 0. rewriting UCA_Version and Base_Unicode_Version in Collate.pm
+ and t/version.t is preferred.
+ 1. replace Collate/allkeys.txt with a new DUCET.
+ 2. run mklocale to generate new Locale/*.pl and Korean.pm.
+ 3. replace Collate/Locale/*.pl with the new Locale/*.pl,
+ and Collate/CJK/Korean.pm with the new Korean.pm.
+ 4. make test.
+ IF FAIL, it may require more changes, not be easy.
+
AUTHOR, COPYRIGHT AND LICENSE
The Unicode::Collate module for perl was written by SADAHIRO Tomoyuki,
}
use Test;
-BEGIN { plan tests => 83 };
+BEGIN { plan tests => 321 }; # 1 + 40 x @Versions
use strict;
use warnings;
normalization => undef,
);
-# U+9FC4..U+9FCB are CJK UI since Unicode 5.2.0.
-# U+9FBC..U+9FC3 are CJK UI since Unicode 5.1.0.
-# U+9FA6..U+9FBB are CJK UI since Unicode 4.1.0.
-# CJK UI Ext are greater than any CJK UI.
-# U+3400 ..U+4DB5 are CJK UI Ext.A since Unicode 3.0.0.
-# U+20000..U+2A6D6 are CJK UI Ext.B since Unicode 3.1.0.
-# U+2A700..U+2B734 are CJK UI Ext.C since Unicode 5.2.0.
-
-##### 2..13
-$Collator->change(UCA_Version => 8);
-ok($Collator->gt("\x{9FA5}", "\x{3400}")); # UI > ExtA
-ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC3}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC4}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
-ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < Unassigned(ExtB)
-ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < Unassigned(ExtB)
-ok($Collator->lt("\x{9FFF}","\x{20000}")); # Unassigned < Unassigned(ExtB)
-ok($Collator->lt("\x{9FFF}","\x{2A6D6}")); # Unassigned < Unassigned(ExtB)
-
-##### 14..25
-$Collator->change(UCA_Version => 9);
-ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
-ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC3}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC4}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
-ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
-ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
-ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
-ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
-
-##### 26..37
-$Collator->change(UCA_Version => 11);
-ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
-ok($Collator->gt("\x{9FA6}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FBB}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC3}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC4}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # Unassigned > Unassigned
-ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
-ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
-ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
-ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
-
-
-##### 38..49
-$Collator->change(UCA_Version => 14);
-ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
-ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
-ok($Collator->gt("\x{9FBC}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC3}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FC4}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
-ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
-ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
-ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
-ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
-
-##### 50..65
-$Collator->change(UCA_Version => 18);
-ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
-ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FBC}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FC3}", "\x{3400}")); # new UI < ExtA
-ok($Collator->gt("\x{9FC4}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FCB}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FCC}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
-ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
-ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
-ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
-ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
-ok($Collator->lt("\x{9FFF}","\x{2A700}")); # Unassigned < Unassigned(ExtC)
-ok($Collator->lt("\x{9FFF}","\x{2B734}")); # Unassigned < Unassigned(ExtC)
-
-##### 65..81
-$Collator->change(UCA_Version => 20);
-ok($Collator->lt("\x{9FA5}", "\x{3400}")); # UI < ExtA
-ok($Collator->lt("\x{9FA6}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FBB}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FBC}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FC3}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FC4}", "\x{3400}")); # new UI < ExtA
-ok($Collator->lt("\x{9FCB}", "\x{3400}")); # new UI < ExtA
-ok($Collator->gt("\x{9FCC}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->gt("\x{9FFF}", "\x{3400}")); # Unassigned > ExtA
-ok($Collator->lt("\x{9FA6}", "\x{9FBB}")); # new UI > new UI
-ok($Collator->lt("\x{3400}","\x{20000}")); # ExtA < ExtB
-ok($Collator->lt("\x{3400}","\x{2A6D6}")); # ExtA < ExtB
-ok($Collator->gt("\x{9FFF}","\x{20000}")); # Unassigned > ExtB
-ok($Collator->gt("\x{9FFF}","\x{2A6D6}")); # Unassigned > ExtB
-ok($Collator->gt("\x{9FFF}","\x{2A700}")); # Unassigned > ExtC
-ok($Collator->gt("\x{9FFF}","\x{2B734}")); # Unassigned > ExtC
-ok($Collator->lt("\x{9FFF}","\x{2B735}")); # Unassigned < Unassigned
-ok($Collator->lt("\x{9FFF}","\x{2B73F}")); # Unassigned < Unassigned
+# CJK UI Ext > CJK UI.
+# [ UCA_Version 8: Ext.A < UI and BMP < Ext.B (code point order) ]
+# 4E00..9FA5 are CJK UI.
+# 9FA6..9FBB are CJK UI since UCA_Version 14 (Unicode 4.1).
+# 9FBC..9FC3 are CJK UI since UCA_Version 18 (Unicode 5.1).
+# 9FC4..9FCB are CJK UI since UCA_Version 20 (Unicode 5.2).
+
+# 3400..4DB5 are CJK UI Ext.A since UCA_Version 8 (Unicode 3.0).
+# 20000..2A6D6 are CJK UI Ext.B since UCA_Version 8 (Unicode 3.1).
+# 2A700..2B734 are CJK UI Ext.C since UCA_Version 20 (Unicode 5.2).
+# 2B740..2B81D are CJK UI Ext.D since UCA_Version 22 (Unicode 6.0).
+
+my @Versions = (8, 9, 11, 14, 16, 18, 20, 22);
+
+for my $v (@Versions) {
+$Collator->change(UCA_Version => $v);
+
+# Ext.A > UI
+ok($Collator->cmp("\x{3400}", "\x{4E00}") == ($v >= 9 ? 1 : -1)); # UI
+ok($Collator->cmp("\x{3400}", "\x{9FA5}") == ($v >= 9 ? 1 : -1)); # UI
+ok($Collator->cmp("\x{3400}", "\x{9FA6}") == ($v >= 14 ? 1 : -1)); # new
+ok($Collator->cmp("\x{3400}", "\x{9FBB}") == ($v >= 14 ? 1 : -1)); # new
+ok($Collator->cmp("\x{3400}", "\x{9FBC}") == ($v >= 18 ? 1 : -1)); # new
+ok($Collator->cmp("\x{3400}", "\x{9FC3}") == ($v >= 18 ? 1 : -1)); # new
+ok($Collator->cmp("\x{3400}", "\x{9FC4}") == ($v >= 20 ? 1 : -1)); # new
+ok($Collator->cmp("\x{3400}", "\x{9FCB}") == ($v >= 20 ? 1 : -1)); # new
+ok($Collator->cmp("\x{3400}", "\x{9FCC}") == -1); # na
+ok($Collator->cmp("\x{3400}", "\x{9FFF}") == -1); # na
+
+# UI < UI
+ok($Collator->cmp("\x{4E00}", "\x{9FA5}") == -1); # UI < UI
+ok($Collator->cmp("\x{9FA5}", "\x{9FA6}") == -1); # UI < new
+ok($Collator->cmp("\x{9FA6}", "\x{9FBB}") == -1); # new < new
+ok($Collator->cmp("\x{9FBB}", "\x{9FBC}") == -1); # new < new
+ok($Collator->cmp("\x{9FBC}", "\x{9FC3}") == -1); # new < new
+ok($Collator->cmp("\x{9FC3}", "\x{9FC4}") == -1); # new < new
+ok($Collator->cmp("\x{9FC4}", "\x{9FCB}") == -1); # new < new
+ok($Collator->cmp("\x{9FCB}", "\x{9FCC}") == -1); # new < na
+ok($Collator->cmp("\x{9FCC}", "\x{9FFF}") == -1); # na < na
+
+# Ext.A < Ext.B
+ok($Collator->cmp("\x{3400}", "\x{20000}") == -1);
+
+# Ext.A
+ok($Collator->cmp("\x{3400}", "\x{4DB5}") == -1); # A < A
+ok($Collator->cmp("\x{2FFF}", "\x{3400}") == ($v >= 8 ? 1 : -1)); # na > A
+ok($Collator->cmp("\x{2FFF}", "\x{4DB5}") == ($v >= 8 ? 1 : -1)); # na > A
+ok($Collator->cmp("\x{2FFF}", "\x{4DB6}") == -1); # na < na
+ok($Collator->cmp("\x{2FFF}", "\x{4DBF}") == -1); # na < na
+
+# Ext.B
+ok($Collator->cmp("\x{20000}","\x{2A6D6}") == -1); # B < B
+ok($Collator->cmp("\x{2FFF}", "\x{20000}") == ($v >= 9 ? 1 : -1)); # na > B
+ok($Collator->cmp("\x{2FFF}", "\x{2A6D6}") == ($v >= 9 ? 1 : -1)); # na > B
+ok($Collator->cmp("\x{2FFF}", "\x{2A6D7}") == -1); # na < na
+ok($Collator->cmp("\x{2FFF}", "\x{2A6DF}") == -1); # na < na
+
+# Ext.C
+ok($Collator->cmp("\x{2A700}","\x{2B734}") == -1); # C < C
+ok($Collator->cmp("\x{2FFF}", "\x{2A700}") == ($v >= 20 ? 1 : -1)); # na > C
+ok($Collator->cmp("\x{2FFF}", "\x{2B734}") == ($v >= 20 ? 1 : -1)); # na > C
+ok($Collator->cmp("\x{2FFF}", "\x{2B735}") == -1); # na < na
+ok($Collator->cmp("\x{2FFF}", "\x{2B73F}") == -1); # na < na
+
+# Ext.D
+ok($Collator->cmp("\x{2B740}","\x{2B81D}") == -1); # D < D
+ok($Collator->cmp("\x{2FFF}", "\x{2B740}") == ($v >= 22 ? 1 : -1)); # na > D
+ok($Collator->cmp("\x{2FFF}", "\x{2B81D}") == ($v >= 22 ? 1 : -1)); # na > D
+ok($Collator->cmp("\x{2FFF}", "\x{2B81E}") == -1); # na < na
+ok($Collator->cmp("\x{2FFF}", "\x{2B81F}") == -1); # na < na
+}
}
use Test;
-BEGIN { plan tests => 491 };
+BEGIN { plan tests => 561 }; # 1 + 70 x @Versions
use strict;
use warnings;
ok(1);
-my @Versions = (8, 9, 11, 14, 16, 18, 20);
+my @Versions = (8, 9, 11, 14, 16, 18, 20, 22);
# 12 compatibility ideographs are treated as unified ideographs:
# FA0E, FA0F, FA11, FA13, FA14, FA1F, FA21, FA23, FA24, FA27, FA28, FA29.
+
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
}
use Test;
-BEGIN { plan tests => 44 };
+BEGIN { plan tests => 74 };
use strict;
use warnings;
ok($kjeNoN->eq("\x{43A}", "\x{43A}\x{334}\x{301}"));
ok($kjeNoN->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
+# 5
+
our %sortkeys;
$sortkeys{'KAac'} = $kjeNoN->viewSortKey("\x{43A}\x{301}");
table => undef,
entry => $kjeEntry,
);
+
ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{301}"));
ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{334}\x{301}"));
ok($kjeNFD->lt("\x{43A}", "\x{43A}\x{334}\x{301}"));
ok($kjeNFD->eq("\x{45C}", "\x{43A}\x{301}\x{334}"));
+# 9
my $aaNFD = Unicode::Collate->new(
level => 1,
ok($aaNFD->lt("Z", "A\x{30A}\x{327}"));
ok($aaNFD->lt("Z", "A\x{31A}\x{30A}"));
ok($aaNFD->lt("Z", "A\x{30A}\x{31A}"));
+# 17
my $aaPre = Unicode::Collate->new(
level => 1,
ok($aaPre->lt("Z", "A\x{30A}\x{327}"));
ok($aaPre->lt("Z", "A\x{31A}\x{30A}"));
ok($aaPre->lt("Z", "A\x{30A}\x{31A}"));
-}
-else {
- ok(1) for 1..20;
+# 25
+} else {
+ ok(1) for 1..20;
}
# again: loading Unicode::Normalize should not affect $kjeNoN.
ok($sortkeys{'KAta'}, $kjeNoN->viewSortKey("\x{43A}\x{334}\x{301}"));
ok($sortkeys{'KAat'}, $kjeNoN->viewSortKey("\x{43A}\x{301}\x{334}"));
+# 32
+
my $aaNoN = Unicode::Collate->new(
level => 1,
table => undef,
ok($aaNoN->eq("A", "A\x{31A}\x{30A}"));
ok($aaNoN->lt("Z", "A\x{30A}\x{31A}"));
+# 40
+
# suppress contractions
my $kjeSup = Unicode::Collate->new(
ok($kjeSup->eq("\x{41A}", "\x{41A}\x{301}"));
ok($kjeSup->gt("\x{40C}", "\x{41A}\x{301}"));
+# 44
+
+our $tibetanEntry = <<'ENTRIES';
+0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
+0F71 ; [.206D.0020.0002.0F71] # TIBETAN VOWEL SIGN AA
+0F72 ; [.206E.0020.0002.0F72] # TIBETAN VOWEL SIGN I
+0F73 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
+0F71 0F72 ; [.206F.0020.0002.0F73] # TIBETAN VOWEL SIGN II
+0F80 ; [.2070.0020.0002.0F80] # TIBETAN VOWEL SIGN REVERSED I
+0F81 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
+0F71 0F80 ; [.2071.0020.0002.0F81] # TIBETAN VOWEL SIGN REVERSED II
+0F74 ; [.2072.0020.0002.0F74] # TIBETAN VOWEL SIGN U
+0F75 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
+0F71 0F74 ; [.2073.0020.0002.0F75] # TIBETAN VOWEL SIGN UU
+0F76 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
+0FB2 0F80 ; [.2074.0020.0002.0F76] # TIBETAN VOWEL SIGN VOCALIC R
+0F77 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
+0FB2 0F81 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
+0FB2 0F71 0F80 ; [.2075.0020.0002.0F77] # TIBETAN VOWEL SIGN VOCALIC RR
+0F78 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
+0FB3 0F80 ; [.2076.0020.0002.0F78] # TIBETAN VOWEL SIGN VOCALIC L
+0F79 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
+0FB3 0F81 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
+0FB3 0F71 0F80 ; [.2077.0020.0002.0F79] # TIBETAN VOWEL SIGN VOCALIC LL
+ENTRIES
+
+# ccc(0F71) = 129
+# ccc(0F80) = 130
+# 0F76 = 0FB2 0F80
+# 0F78 = 0FB3 0F80
+# 0F81 = 0F71 0F80
+# 0F77 = <compat> 0FB2 0F81 = 0FB2 0F71 0F80 = 0F76 0F71
+# 0F79 = <compat> 0FB3 0F81 = 0FB3 0F71 0F80 = 0F78 0F71
+
+eval { require Unicode::Normalize };
+if (!$@) {
+ my $tibNFD = Unicode::Collate->new(
+ table => undef,
+ entry => $tibetanEntry,
+ );
+
+ # VOCALIC RR
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F81}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\x{334}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F81}\0\x{334}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{334}\x{F71}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\x{334}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{F76}\x{F71}\0\x{334}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F71}\x{F80}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{334}\x{F80}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\x{334}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F71}\x{F80}\0\x{334}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{334}\x{F80}\x{F71}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{334}\x{F71}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\x{334}"));
+ ok($tibNFD->eq("\x{F77}\0\x{334}", "\x{FB2}\x{F80}\x{F71}\0\x{334}"));
+# 58
+
+ # VOCALIC LL
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F81}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\x{334}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F81}\0\x{334}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{334}\x{F71}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\x{334}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{F78}\x{F71}\0\x{334}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F71}\x{F80}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{334}\x{F80}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\x{334}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F71}\x{F80}\0\x{334}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{334}\x{F80}\x{F71}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{334}\x{F71}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\x{334}"));
+ ok($tibNFD->eq("\x{F79}\0\x{334}", "\x{FB3}\x{F80}\x{F71}\0\x{334}"));
+# 72
+
+ my $discontNFD = Unicode::Collate->new(
+ table => undef,
+ entry => <<'ENTRIES',
+0000 ; [.0000.0000.0000.0000] # [0000] NULL (in 6429)
+0301 ; [.0000.0032.0002.0301] # COMBINING ACUTE ACCENT
+0300 ; [.0000.0035.0002.0300] # COMBINING GRAVE ACCENT
+0327 ; [.0000.0055.0002.0327] # COMBINING CEDILLA
+0334 ; [.0000.008B.0002.0334] # COMBINING TILDE OVERLAY
+0041 ; [.0101.0020.0008.0041] # LATIN CAPITAL LETTER A
+0041 0327 0301 ; [.0102.0020.0008.0041]
+0041 0300 ; [.0103.0020.0008.0041]
+ENTRIES
+ );
+
+ ok($discontNFD->eq("A\x{327}\x{301}\0\x{334}", "A\x{334}\x{327}\x{301}"));
+ ok($discontNFD->eq("A\x{300}\0\x{327}", "A\x{327}\x{300}"));
+} else {
+ ok(1) for 1..30;
+}
+# 74
}
}
-BEGIN { plan tests => 40 };
+BEGIN { plan tests => 65 };
ok(1);
# illegal code points should be always ingored
# (cf. UCA, 7.1.1 Illegal code points).
-my $illeg = Unicode::Collate->new(
- entry => <<'ENTRIES',
+my $entry = <<'ENTRIES';
0000 ; [.0020.0000.0000.0000] # [0000] NULL
0001 ; [.0021.0000.0000.0001] # [0001] START OF HEADING
FFFE ; [.0022.0000.0000.FFFE] # <noncharacter-FFFE> (invalid)
0041 0000 ; [.1100.0020.0008.0041] # latin A + NULL
0041 FFFF ; [.1200.0020.0008.0041] # latin A + FFFF (invalid)
ENTRIES
+
+##################
+
+my $illeg = Unicode::Collate->new(
+ entry => $entry,
level => 1,
table => undef,
normalization => undef,
+ UCA_Version => 20,
);
# 2..12
##################
-my($match, $str, $sub, $ret);
+my $nonch = Unicode::Collate->new(
+ entry => $entry,
+ level => 1,
+ table => undef,
+ normalization => undef,
+ UCA_Version => 22,
+);
+
+# 27..37
+ok($nonch->lt("", "\x00"));
+ok($nonch->lt("", "\x01"));
+ok($nonch->lt("", "\x{FFFE}"));
+ok($nonch->lt("", "\x{FFFF}"));
+ok($nonch->lt("", "\x{D800}"));
+ok($nonch->lt("", "\x{DFFF}"));
+ok($nonch->lt("", "\x{FDD0}"));
+ok($nonch->lt("", "\x{FDEF}"));
+ok($nonch->lt("", "\x02"));
+ok($nonch->lt("", "\x{10FFFF}"));
+ok($nonch->eq("", "\x{110000}"));
+
+# 38..47
+ok($nonch->lt("\x00", "\x01"));
+ok($nonch->lt("\x01", "\x{FFFE}"));
+ok($nonch->lt("\x{FFFE}", "\x{FFFF}"));
+ok($nonch->lt("\x{FFFF}", "\x{D800}"));
+ok($nonch->lt("\x{D800}", "\x{DFFF}"));
+ok($nonch->lt("\x{DFFF}", "\x{FDD0}"));
+ok($nonch->lt("\x{FDD0}", "\x{FDEF}"));
+ok($nonch->lt("\x{FDEF}", "\x02"));
+ok($nonch->lt("\x02", "\x{10FFFF}"));
+ok($nonch->gt("\x{10FFFF}", "\x{110000}"));
+
+# 48..51
+ok($nonch->lt("A", "A\x{FFFF}"));
+ok($nonch->lt("A\0", "A\x{FFFF}"));
+ok($nonch->lt("A", "A\0"));
+ok($nonch->lt("AA", "A\0"));
+
+##################
my $Collator = Unicode::Collate->new(
table => 'keys.txt',
level => 1,
normalization => undef,
+ UCA_Version => 8,
);
-$sub = "pe";
-
-
-$str = "Pe\x{300}\x{301}rl";
-$ret = "Pe\x{300}\x{301}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{300}\0\0\x{301}rl";
-$ret = "Pe\x{300}\0\0\x{301}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{DA00}\x{301}\x{DFFF}rl";
-$ret = "Pe\x{DA00}\x{301}\x{DFFF}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{FFFF}\x{301}rl";
-$ret = "Pe\x{FFFF}\x{301}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{110000}\x{301}rl";
-$ret = "Pe\x{110000}\x{301}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{300}\x{d801}\x{301}rl";
-$ret = "Pe\x{300}\x{d801}\x{301}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{300}\x{ffff}\x{301}rl";
-$ret = "Pe\x{300}\x{ffff}\x{301}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{300}\x{110000}\x{301}rl";
-$ret = "Pe\x{300}\x{110000}\x{301}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{D9ab}\x{DFFF}rl";
-$ret = "Pe\x{D9ab}\x{DFFF}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{FFFF}rl";
-$ret = "Pe\x{FFFF}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{110000}rl";
-$ret = "Pe\x{110000}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{300}\x{D800}\x{DFFF}rl";
-$ret = "Pe\x{300}\x{D800}\x{DFFF}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{300}\x{FFFF}rl";
-$ret = "Pe\x{300}\x{FFFF}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
-
-$str = "Pe\x{300}\x{110000}rl";
-$ret = "Pe\x{300}\x{110000}";
-($match) = $Collator->match($str, $sub);
-ok($match, $ret);
+my @ret = (
+ "Pe\x{300}\x{301}",
+ "Pe\x{300}\0\0\x{301}",
+ "Pe\x{DA00}\x{301}\x{DFFF}",
+ "Pe\x{FFFF}\x{301}",
+ "Pe\x{110000}\x{301}",
+ "Pe\x{300}\x{d801}\x{301}",
+ "Pe\x{300}\x{ffff}\x{301}",
+ "Pe\x{300}\x{110000}\x{301}",
+ "Pe\x{D9ab}\x{DFFF}",
+ "Pe\x{FFFF}",
+ "Pe\x{110000}",
+ "Pe\x{300}\x{D800}\x{DFFF}",
+ "Pe\x{300}\x{FFFF}",
+ "Pe\x{300}\x{110000}",
+);
+# 52..65
+for my $ret (@ret) {
+ my $str = $ret."rl";
+ my($match) = $Collator->match($str, "pe");
+ ok($match eq $ret);
+}
use Unicode::Collate::Locale;
use Test;
-plan tests => 7;
+plan tests => 13;
my $objHy = Unicode::Collate::Locale->
new(locale => 'HY', normalization => undef);
ok($objHy->lt("\x{584}", "\x{587}"));
ok($objHy->gt("\x{585}", "\x{587}"));
+ok($objHy->lt("\x{584}\x{4E00}", "\x{587}"));
+ok($objHy->lt("\x{584}\x{20000}", "\x{587}"));
+ok($objHy->lt("\x{584}\x{10FFFD}","\x{587}"));
+
+# 7
+
$objHy->change(level => 2);
ok($objHy->eq("\x{587}", "\x{535}\x{582}"));
ok($objHy->gt("\x{587}", "\x{535}\x{582}"));
-# 7
+# 10
+
+$objHy->change(UCA_Version => 8);
+
+ok($objHy->lt("\x{584}\x{4E00}", "\x{587}"));
+ok($objHy->lt("\x{584}\x{20000}", "\x{587}"));
+ok($objHy->lt("\x{584}\x{10FFFD}","\x{587}"));
+
+# 13
--- /dev/null
+
+BEGIN {
+ unless ("A" eq pack('U', 0x41)) {
+ print "1..0 # Unicode::Collate " .
+ "cannot stringify a Unicode code point\n";
+ exit 0;
+ }
+ if ($ENV{PERL_CORE}) {
+ chdir('t') if -d 't';
+ @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
+ }
+}
+
+use Test;
+use strict;
+use warnings;
+
+BEGIN {
+ use Unicode::Collate;
+
+ unless (exists &Unicode::Collate::bootstrap or 5.008 <= $]) {
+ print "1..0 # skipped: XSUB, or Perl 5.8.0 or later".
+ " needed for this test\n";
+ print $@;
+ exit;
+ }
+}
+
+BEGIN { plan tests => 27 };
+
+ok(1);
+
+#########################
+
+no warnings 'utf8';
+
+# Unicode 6.0 Sorting
+#
+# Special Database Values. The data files for CLDR provide
+# special weights for two noncharacters:
+#
+# 1. A special noncharacter <HIGH> (U+FFFF) for specification of a range
+# in a database, allowing "Sch" <= X <= "Sch<HIGH>" to pick all strings
+# starting with "sch" plus those that sort equivalently.
+# 2. A special noncharacter <LOW> (U+FFFE) for merged database fields,
+# allowing "Disi\x{301}lva<LOW>John" to sort next to "Disilva<LOW>John".
+
+my $Collator = Unicode::Collate->new(
+ table => 'keys.txt',
+ level => 1,
+ normalization => undef,
+ UCA_Version => 22,
+ entry => <<'ENTRIES',
+FFFE ; [*0001.0020.0005.FFFE] # <noncharacter-FFFE>
+FFFF ; [.FFFE.0020.0005.FFFF] # <noncharacter-FFFF>
+ENTRIES
+);
+
+# 2..16
+
+ok($Collator->lt("\x{FFFD}", "\x{FFFF}"));
+ok($Collator->lt("\x{1FFFD}", "\x{1FFFF}"));
+ok($Collator->lt("\x{2FFFD}", "\x{2FFFF}"));
+ok($Collator->lt("\x{10FFFD}", "\x{10FFFF}"));
+
+ok($Collator->lt("perl\x{FFFD}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{1FFFD}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{1FFFE}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{1FFFF}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{2FFFD}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{2FFFE}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{2FFFF}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{10FFFD}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{10FFFE}", "perl\x{FFFF}"));
+ok($Collator->lt("perl\x{10FFFF}", "perl\x{FFFF}"));
+
+ok($Collator->gt("perl\x{FFFF}AB", "perl\x{FFFF}"));
+
+$Collator->change(level => 4);
+
+# 17..23
+
+my @dsf = (
+ "di Silva\x{FFFE}Fred",
+ "diSilva\x{FFFE}Fred",
+ "di Si\x{301}lva\x{FFFE}Fred",
+ "diSi\x{301}lva\x{FFFE}Fred",
+);
+my @dsj = (
+ "di Silva\x{FFFE}John",
+ "diSilva\x{FFFE}John",
+ "di Si\x{301}lva\x{FFFE}John",
+ "diSi\x{301}lva\x{FFFE}John",
+);
+
+ok($Collator->lt($dsf[0], $dsf[1]));
+ok($Collator->lt($dsf[1], $dsf[2]));
+ok($Collator->lt($dsf[2], $dsf[3]));
+
+ok($Collator->lt($dsf[3], $dsj[0]));
+
+ok($Collator->lt($dsj[0], $dsj[1]));
+ok($Collator->lt($dsj[1], $dsj[2]));
+ok($Collator->lt($dsj[2], $dsj[3]));
+
+# 24..27
+
+my @ds_j = (
+ "di Silva John",
+ "diSilva John",
+ "di Si\x{301}lva John",
+ "diSi\x{301}lva John",
+);
+
+ok($Collator->lt($ds_j[0], $ds_j[1]));
+ok($Collator->lt($ds_j[1], $ds_j[2]));
+ok($Collator->lt($ds_j[2], $ds_j[3]));
+
+ok($Collator->lt($dsj[0], $ds_j[0]));
+
+
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
}
use Test;
-BEGIN { plan tests => 66 };
+BEGIN { plan tests => 246 }; # 6 + 30 x @Versions
use strict;
use warnings;
ok($ignoreCJK->gt("\x{4DFF}", "\x{4E00}")); # U+4DFF is not CJK.
ok($ignoreCJK->lt("Pe\x{5B57}rl", "Perl")); # 'r' is unassigned.
-##### 7..20
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBC}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->eq("\x{9FC3}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->eq("\x{9FC4}", "")); # UI since Unicode 5.2.0
-ok($ignoreCJK->eq("\x{9FCB}", "")); # UI since Unicode 5.2.0
-ok($ignoreCJK->gt("\x{9FCC}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->eq("\x{2A700}", "")); # ExtC since Unicode 5.2.0
-ok($ignoreCJK->eq("\x{2B734}", "")); # ExtC since Unicode 5.2.0
-
-##### 21..30
-$ignoreCJK->change(UCA_Version => 8);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC3}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0
-
-##### 31..40
-$ignoreCJK->change(UCA_Version => 9);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->gt("\x{9FA6}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBB}", "Perl"));
-ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC3}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0
-
-##### 41..52
-$ignoreCJK->change(UCA_Version => 14);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->gt("\x{9FBC}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC3}", "Perl"));
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->gt("\x{2A700}", "Perl"));
-ok($ignoreCJK->gt("\x{2B734}", "Perl"));
-
-##### 53..66
-$ignoreCJK->change(UCA_Version => 18);
-ok($ignoreCJK->eq("\x{3400}", ""));
-ok($ignoreCJK->eq("\x{4DB5}", ""));
-ok($ignoreCJK->eq("\x{9FA5}", ""));
-ok($ignoreCJK->eq("\x{9FA6}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBB}", "")); # UI since Unicode 4.1.0
-ok($ignoreCJK->eq("\x{9FBC}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->eq("\x{9FC3}", "")); # UI since Unicode 5.1.0
-ok($ignoreCJK->gt("\x{9FC4}", "Perl"));
-ok($ignoreCJK->gt("\x{9FCB}", "Perl"));
-ok($ignoreCJK->gt("\x{9FCC}", "Perl"));
-ok($ignoreCJK->eq("\x{20000}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->eq("\x{2A6D6}", "")); # ExtB since Unicode 3.1.0
-ok($ignoreCJK->gt("\x{2A700}", "Perl"));
-ok($ignoreCJK->gt("\x{2B734}", "Perl"));
+#####
+
+# 4E00..9FA5 are CJK UI.
+# 9FA6..9FBB are CJK UI since UCA_Version 14 (Unicode 4.1).
+# 9FBC..9FC3 are CJK UI since UCA_Version 18 (Unicode 5.1).
+# 9FC4..9FCB are CJK UI since UCA_Version 20 (Unicode 5.2).
+
+# 3400..4DB5 are CJK UI Ext.A since UCA_Version 8 (Unicode 3.0).
+# 20000..2A6D6 are CJK UI Ext.B since UCA_Version 8 (Unicode 3.1).
+# 2A700..2B734 are CJK UI Ext.C since UCA_Version 20 (Unicode 5.2).
+# 2B740..2B81D are CJK UI Ext.D since UCA_Version 22 (Unicode 6.0).
+
+my @Versions = (8, 9, 11, 14, 16, 18, 20, 22);
+
+for my $v (@Versions) {
+$ignoreCJK->change(UCA_Version => $v);
+# UI
+ok($ignoreCJK->cmp("\x{4E00}", "") == 0);
+ok($ignoreCJK->cmp("\x{9FA5}", "") == 0);
+ok($ignoreCJK->cmp("\x{9FA6}", "") == ($v >= 14 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FAF}", "") == ($v >= 14 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FB0}", "") == ($v >= 14 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FBB}", "") == ($v >= 14 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FBC}", "") == ($v >= 18 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FBF}", "") == ($v >= 18 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FC0}", "") == ($v >= 18 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FC3}", "") == ($v >= 18 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FC4}", "") == ($v >= 20 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FCB}", "") == ($v >= 20 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{9FCC}", "") == 1);
+ok($ignoreCJK->cmp("\x{9FCF}", "") == 1);
+
+# Ext.A
+ok($ignoreCJK->cmp("\x{3400}", "") == 0);
+ok($ignoreCJK->cmp("\x{4DB5}", "") == 0);
+ok($ignoreCJK->cmp("\x{4DB6}", "") == 1);
+ok($ignoreCJK->cmp("\x{4DBF}", "") == 1);
+
+# Ext.B
+ok($ignoreCJK->cmp("\x{20000}","") == 0);
+ok($ignoreCJK->cmp("\x{2A6D6}","") == 0);
+ok($ignoreCJK->cmp("\x{2A6D7}","") == 1);
+ok($ignoreCJK->cmp("\x{2A6DF}","") == 1);
+
+# Ext.C
+ok($ignoreCJK->cmp("\x{2A700}","") == ($v >= 20 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{2B734}","") == ($v >= 20 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{2B735}","") == 1);
+ok($ignoreCJK->cmp("\x{2B73F}","") == 1);
+
+# Ext.D
+ok($ignoreCJK->cmp("\x{2B740}","") == ($v >= 22 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{2B81D}","") == ($v >= 22 ? 0 : 1));
+ok($ignoreCJK->cmp("\x{2B81E}","") == 1);
+ok($ignoreCJK->cmp("\x{2B81F}","") == 1);
+}
+
BEGIN {
unless ("A" eq pack('U', 0x41)) {
print "1..0 # Unicode::Collate " .
}
use Test;
-BEGIN { plan tests => 57 };
+BEGIN { plan tests => 131 }; # 11 + 15 x @Versions
use strict;
use warnings;
ok(1);
-##### 2..6
+##### 2..11
my $overCJK = Unicode::Collate->new(
- table => undef,
+ table => 'keys.txt',
normalization => undef,
entry => <<'ENTRIES',
-0061 ; [.0101.0020.0002.0061] # latin a
-0041 ; [.0101.0020.0008.0041] # LATIN A
-4E00 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
+4E01 ; [.B1FC.0030.0004.4E00] # Ideograph; B1FC = FFFF - 4E03.
ENTRIES
overrideCJK => sub {
my $u = 0xFFFF - $_[0]; # reversed
},
);
+ok($overCJK->gt("B", "A")); # diff. at level 1.
ok($overCJK->lt("a", "A")); # diff. at level 3.
-ok($overCJK->lt( "\x{4E03}", "\x{4E00}")); # diff. at level 2.
-ok($overCJK->lt("A\x{4E03}", "A\x{4E00}"));
-ok($overCJK->lt("A\x{4E03}", "a\x{4E00}"));
-ok($overCJK->lt("a\x{4E03}", "A\x{4E00}"));
-
-##### 7..17
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FBB}", "A\x{9FBC}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBC}", "A\x{9FBF}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBF}", "A\x{9FC3}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FC3}", "A\x{9FC4}")); # UI since Unicode 5.2.0
-ok($overCJK->gt("a\x{9FC4}", "A\x{9FCB}")); # UI since Unicode 5.2.0
-ok($overCJK->lt("a\x{9FCB}", "A\x{9FCC}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
+ok($overCJK->lt( "\x{4E03}", "\x{4E01}")); # diff. at level 2.
+ok($overCJK->gt( "\x{4E03}B", "\x{4E01}A"));
+ok($overCJK->lt( "\x{4E03}A", "\x{4E01}B"));
+ok($overCJK->gt("B\x{4E03}", "A\x{4E01}"));
+ok($overCJK->lt("A\x{4E03}", "B\x{4E01}"));
+ok($overCJK->lt("A\x{4E03}", "A\x{4E01}"));
+ok($overCJK->lt("A\x{4E03}", "a\x{4E01}"));
+ok($overCJK->lt("a\x{4E03}", "A\x{4E01}"));
-##### 18..26
-$overCJK->change(UCA_Version => 9);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->lt("a\x{9FA5}", "A\x{9FA6}"));
-ok($overCJK->lt("a\x{9FA6}", "A\x{9FBB}"));
-ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
-ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
-ok($overCJK->lt("a\x{9FBF}", "A\x{9FC3}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FC4}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
+#####
-##### 27..35
-$overCJK->change(UCA_Version => 14);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->lt("a\x{9FBB}", "A\x{9FBC}"));
-ok($overCJK->lt("a\x{9FBC}", "A\x{9FBF}"));
-ok($overCJK->lt("a\x{9FBF}", "A\x{9FC3}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FC4}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
+# 9FA6..9FBB are CJK UI since UCA_Version 14 (Unicode 4.1).
+# 9FBC..9FC3 are CJK UI since UCA_Version 18 (Unicode 5.1).
+# 9FC4..9FCB are CJK UI since UCA_Version 20 (Unicode 5.2).
-##### 36..46
-$overCJK->change(UCA_Version => 18);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FBB}", "A\x{9FBC}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBC}", "A\x{9FBF}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBF}", "A\x{9FC3}")); # UI since Unicode 5.1.0
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FC4}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FCB}"));
-ok($overCJK->lt("a\x{9FC3}", "A\x{9FCC}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
-
-##### 47..57
-$overCJK->change(UCA_Version => 20);
-ok($overCJK->gt("a\x{3400}", "A\x{4DB5}"));
-ok($overCJK->gt("a\x{4DB5}", "A\x{9FA5}"));
-ok($overCJK->gt("a\x{9FA5}", "A\x{9FA6}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FA6}", "A\x{9FBB}")); # UI since Unicode 4.1.0
-ok($overCJK->gt("a\x{9FBB}", "A\x{9FBC}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBC}", "A\x{9FBF}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FBF}", "A\x{9FC3}")); # UI since Unicode 5.1.0
-ok($overCJK->gt("a\x{9FC3}", "A\x{9FC4}")); # UI since Unicode 5.2.0
-ok($overCJK->gt("a\x{9FC4}", "A\x{9FCB}")); # UI since Unicode 5.2.0
-ok($overCJK->lt("a\x{9FCB}", "A\x{9FCC}"));
-ok($overCJK->lt("a\x{9FC4}", "A\x{9FCF}"));
+my @Versions = (8, 9, 11, 14, 16, 18, 20, 22);
+for my $v (@Versions) {
+$overCJK->change(UCA_Version => $v);
+ok($overCJK->cmp("a\x{3400}", "A\x{4DB5}") == 1);
+ok($overCJK->cmp("a\x{4DB5}", "A\x{4E00}") == 1);
+ok($overCJK->cmp("a\x{4E00}", "A\x{9FA5}") == 1);
+ok($overCJK->cmp("a\x{9FA5}", "A\x{9FA6}") == ($v >= 14 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FA6}", "A\x{9FAF}") == ($v >= 14 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FAF}", "A\x{9FB0}") == ($v >= 14 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FB0}", "A\x{9FBB}") == ($v >= 14 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FBB}", "A\x{9FBC}") == ($v >= 18 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FBC}", "A\x{9FBF}") == ($v >= 18 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FBF}", "A\x{9FC3}") == ($v >= 18 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FC3}", "A\x{9FC4}") == ($v >= 20 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FC4}", "A\x{9FCA}") == ($v >= 20 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FCA}", "A\x{9FCB}") == ($v >= 20 ? 1 : -1));
+ok($overCJK->cmp("a\x{9FCB}", "A\x{9FCC}") == -1);
+ok($overCJK->cmp("a\x{9FCC}", "A\x{9FCF}") == -1);
+}
=item *
The following modules were added by the C<Unicode::Collate>
-upgrade from 0.63 to 0.66. See below for details.
+upgrade from 0.63 to 0.67. See below for details.
C<Unicode::Collate::CJK::Big5>
=item *
-C<Unicode::Collate> has been upgraded from 0.63 to 0.66
+C<Unicode::Collate> has been upgraded from 0.63 to 0.67
This release newly adds locales C<ja> C<ko> and C<zh> and its variants
( C<zh__big5han>, C<zh__gb2312han>, C<zh__pinyin>, C<zh__stroke> ).
+Supported UCA_Version 22 for Unicode 6.0.0.
+
The following modules have been added:
C<Unicode::Collate::CJK::Big5> for C<zh__big5han> which makes