Upgrade to Unicode::Normalize 0.21 and Unicode::Collate 0.24,

author Rafael Garcia-Suarez <rgarciasuarez@gmail.com>

Sat, 5 Apr 2003 11:28:22 +0000 (11:28 +0000)

committer Rafael Garcia-Suarez <rgarciasuarez@gmail.com>

Sat, 5 Apr 2003 11:28:22 +0000 (11:28 +0000)
author Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
Sat, 5 Apr 2003 11:28:22 +0000 (11:28 +0000)
committer Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
Sat, 5 Apr 2003 11:28:22 +0000 (11:28 +0000)
diff --git a/ext/Unicode/Normalize/Changes b/ext/Unicode/Normalize/Changes

index 30f5c4a..92b944e 100644 (file)
--- a/ext/Unicode/Normalize/Changes
+++ b/ext/Unicode/Normalize/Changes
@@ -1,5 +1,8 @@
  Revision history for Perl extension Unicode::Normalize.
  
+0.21  Thu Apr 02 23:12:54 2003
+       - internal tweak: for (?un)pack 'U'.
+
  0.20  Sun Mar 02 13:29:25 2003
         - decompose Hangul syllables in a decomposition mapping.
  
diff --git a/ext/Unicode/Normalize/Normalize.pm b/ext/Unicode/Normalize/Normalize.pm

index e0232d3..14c121a 100644 (file)
--- a/ext/Unicode/Normalize/Normalize.pm
+++ b/ext/Unicode/Normalize/Normalize.pm
@@ -1,8 +1,8 @@
  package Unicode::Normalize;
  
  BEGIN {
-    if (ord("A") == 193) {
-       die "Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       die "Unicode::Normalize cannot stringify a Unicode code point\n";
      }
  }
  
@@ -11,7 +11,7 @@ use strict;
  use warnings;
  use Carp;
  
-our $VERSION = '0.20';
+our $VERSION = '0.21';
  our $PACKAGE = __PACKAGE__;
  
  require Exporter;
@@ -35,6 +35,29 @@ our %EXPORT_TAGS = (
  
  bootstrap Unicode::Normalize $VERSION;
  
+use constant UNICODE_FOR_PACK => "A" eq pack('U', 0x41);
+use constant NATIVE_FOR_PACK  => "A" eq pack('U', ord("A"));
+
+use constant UNICODE_FOR_UNPACK => 0x41 == unpack('U', "A");
+use constant NATIVE_FOR_UNPACK  => ord("A") == unpack('U', "A");
+
+sub pack_U {
+    return UNICODE_FOR_PACK
+       ? pack('U*', @_)
+       : NATIVE_FOR_PACK
+           ? pack('U*', map utf8::unicode_to_native($_), @_)
+           : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
+}
+
+sub unpack_U {
+    return UNICODE_FOR_UNPACK
+       ? unpack('U*', shift)
+       : NATIVE_FOR_UNPACK
+           ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
+           : die "$PACKAGE, a code point returned from unpack U " .
+               "cannot be converted into Unicode.\n";
+}
+
  use constant COMPAT => 1;
  
  sub NFD  ($) { reorder(decompose($_[0])) }
@@ -136,7 +159,7 @@ As C<$form_name>, one of the following names must be given.
  
  =item C<$decomposed_string = decompose($string, $useCompatMapping)>
  
-Decompose the specified string and returns the result.
+Decomposes the specified string and returns the result.
  
  If the second parameter (a boolean) is omitted or false, decomposes it
  using the Canonical Decomposition Mapping.
@@ -150,7 +173,7 @@ Reordering may be required.
  
  =item C<$reordered_string  = reorder($string)>
  
-Reorder the combining characters and the like in the canonical ordering
+Reorders the combining characters and the like in the canonical ordering
  and returns the result.
  
  E.g., when you have a list of NFD/NFKD strings,
diff --git a/ext/Unicode/Normalize/README b/ext/Unicode/Normalize/README

index f1b1754..8447502 100644 (file)
--- a/ext/Unicode/Normalize/README
+++ b/ext/Unicode/Normalize/README
@@ -1,4 +1,4 @@
-Unicode/Normalize version 0.20
+Unicode/Normalize version 0.21
  ===================================
  
  Unicode::Normalize - Unicode Normalization Forms
diff --git a/ext/Unicode/Normalize/mkheader b/ext/Unicode/Normalize/mkheader

index 6cac390..e2c4f12 100644 (file)
--- a/ext/Unicode/Normalize/mkheader
+++ b/ext/Unicode/Normalize/mkheader
@@ -15,7 +15,11 @@ use warnings;
  use Carp;
  use File::Spec;
  
-our $IsEBCDIC = ord("A") != 0x41;
+BEGIN {
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       die "Unicode::Normalize cannot stringify a Unicode code point\n";
+    }
+}
  
  our $PACKAGE = 'Unicode::Normalize, mkheader';
  
@@ -197,12 +201,17 @@ foreach my $key (keys %Compat) {
      $Compat{$key} = [ getCompatList($key) ];
  }
  
+sub _pack_U {
+    return "A" eq pack('U', 0x41)
+       ? pack('U*', @_)
+       : "A" eq pack('U', ord("A"))
+           ? pack('U*', map utf8::unicode_to_native($_), @_)
+           : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
+}
+
  sub _U_stringify {
      sprintf '"%s"', join '',
-       map sprintf("\\x%02x", $_), unpack 'C*',
-           $IsEBCDIC
-               ? pack('U*', map utf8::unicode_to_native($_), @_)
-               : pack('U*', @_);
+       map sprintf("\\x%02x", $_), unpack 'C*', _pack_U(@_);
  }
  
  foreach my $hash (\%Canon, \%Compat) {
diff --git a/ext/Unicode/Normalize/t/func.t b/ext/Unicode/Normalize/t/func.t

index d540d99..81e092a 100644 (file)
--- a/ext/Unicode/Normalize/t/func.t
+++ b/ext/Unicode/Normalize/t/func.t
@@ -1,7 +1,8 @@
  
  BEGIN {
-    if (ord("A") == 193) {
-       print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       print "1..0 # Unicode::Normalize " .
+           "cannot stringify a Unicode code point\n";
         exit 0;
      }
  }
@@ -9,7 +10,7 @@ BEGIN {
  BEGIN {
      if ($ENV{PERL_CORE}) {
          chdir('t') if -d 't';
-        @INC = qw(../lib);
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
      }
  }
  
@@ -22,19 +23,8 @@ BEGIN { plan tests => 13 };
  use Unicode::Normalize qw(:all);
  ok(1); # If we made it this far, we're ok.
  
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
-    return $IsEBCDIC
-       ? pack('U*', map utf8::unicode_to_native($_), @_)
-       : pack('U*', @_);
-}
-
-sub _unpack_U {
-    return $IsEBCDIC
-       ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
-       : unpack('U*', shift);
-}
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
  
  #########################
  
@@ -50,7 +40,7 @@ print ! defined getCanon( 0)
     && getCanon(0x00EF) eq _pack_U(0x0069, 0x0308)
     && getCanon(0x304C) eq _pack_U(0x304B, 0x3099)
     && getCanon(0x1EA4) eq _pack_U(0x0041, 0x0302, 0x0301)
-   && getCanon(0x1F82) eq "\x{03B1}\x{0313}\x{0300}\x{0345}"
+   && getCanon(0x1F82) eq _pack_U(0x03B1, 0x0313, 0x0300, 0x0345)
     && getCanon(0x1FAF) eq _pack_U(0x03A9, 0x0314, 0x0342, 0x0345)
     && getCanon(0xAC00) eq _pack_U(0x1100, 0x1161)
     && getCanon(0xAE00) eq _pack_U(0x1100, 0x1173, 0x11AF)
diff --git a/ext/Unicode/Normalize/t/norm.t b/ext/Unicode/Normalize/t/norm.t

index 77ca218..76ee255 100644 (file)
--- a/ext/Unicode/Normalize/t/norm.t
+++ b/ext/Unicode/Normalize/t/norm.t
@@ -1,7 +1,8 @@
  
  BEGIN {
-    if (ord("A") == 193) {
-       print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       print "1..0 # Unicode::Normalize " .
+           "cannot stringify a Unicode code point\n";
         exit 0;
      }
  }
@@ -9,7 +10,7 @@ BEGIN {
  BEGIN {
      if ($ENV{PERL_CORE}) {
          chdir('t') if -d 't';
-        @INC = qw(../lib);
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
      }
  }
  
@@ -22,19 +23,8 @@ BEGIN { plan tests => 18 };
  use Unicode::Normalize qw(normalize);
  ok(1); # If we made it this far, we're ok.
  
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
-    return $IsEBCDIC
-       ? pack('U*', map utf8::unicode_to_native($_), @_)
-       : pack('U*', @_);
-}
-
-sub _unpack_U {
-    return $IsEBCDIC
-       ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
-       : unpack('U*', shift);
-}
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
  
  #########################
  
diff --git a/ext/Unicode/Normalize/t/test.t b/ext/Unicode/Normalize/t/test.t

index db1a536..b98a8b8 100644 (file)
--- a/ext/Unicode/Normalize/t/test.t
+++ b/ext/Unicode/Normalize/t/test.t
@@ -1,7 +1,8 @@
  
  BEGIN {
-    if (ord("A") == 193) {
-       print "1..0 # Unicode::Normalize not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       print "1..0 # Unicode::Normalize " .
+           "cannot stringify a Unicode code point\n";
         exit 0;
      }
  }
@@ -9,7 +10,7 @@ BEGIN {
  BEGIN {
      if ($ENV{PERL_CORE}) {
          chdir('t') if -d 't';
-        @INC = qw(../lib);
+        @INC = $^O eq 'MacOS' ? qw(::lib) : qw(../lib);
      }
  }
  
@@ -22,19 +23,8 @@ BEGIN { plan tests => 20 };
  use Unicode::Normalize;
  ok(1); # If we made it this far, we're ok.
  
-our $IsEBCDIC = ord("A") != 0x41;
-
-sub _pack_U {
-    return $IsEBCDIC
-       ? pack('U*', map utf8::unicode_to_native($_), @_)
-       : pack('U*', @_);
-}
-
-sub _unpack_U {
-    return $IsEBCDIC
-       ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
-       : unpack('U*', shift);
-}
+sub _pack_U   { Unicode::Normalize::pack_U(@_) }
+sub _unpack_U { Unicode::Normalize::unpack_U(@_) }
  
  #########################
  
diff --git a/lib/Unicode/Collate.pm b/lib/Unicode/Collate.pm

index 5193559..a753808 100644 (file)
--- a/lib/Unicode/Collate.pm
+++ b/lib/Unicode/Collate.pm
@@ -1,8 +1,8 @@
  package Unicode::Collate;
  
  BEGIN {
-    if (ord("A") == 193) {
-       die "Unicode::Collate not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       die "Unicode::Collate cannot stringify a Unicode code point\n";
      }
  }
  
@@ -14,11 +14,7 @@ use File::Spec;
  
  require Exporter;
  
-# Supporting on EBCDIC platform is not tested.
-# Tester(s) welcome!
-our $IsEBCDIC = ord("A") != 0x41;
-
-our $VERSION = '0.23';
+our $VERSION = '0.24';
  our $PACKAGE = __PACKAGE__;
  
  our @ISA = qw(Exporter);
@@ -37,7 +33,7 @@ eval { require Unicode::UCD };
  unless ($@) {
      $UNICODE_VERSION = Unicode::UCD::UnicodeVersion();
  }
-else { # XXX, Perl 5.6.1
+else { # Perl 5.6.1
      my($f, $fh);
      foreach my $d (@INC) {
         $f = File::Spec->catfile($d, "unicode", "Unicode.301");
@@ -59,17 +55,21 @@ use constant NOMATCHPOS => -1;
  # This is also used as a HAS_UNICODE_NORMALIZE flag.
  our $getCombinClass;
  
+# Supported Levels
+use constant MinLevel => 1;
+use constant MaxLevel => 4;
+
  # Minimum weights at level 2 and 3, respectively
-use constant Min2   => 0x20;
-use constant Min3   => 0x02;
+use constant Min2Wt => 0x20;
+use constant Min3Wt => 0x02;
  
  # Shifted weight at 4th level
-use constant Shift4 => 0xFFFF;
+use constant Shift4Wt => 0xFFFF;
  
  # Variable weight at 1st level.
  # This is a negative value but should be regarded as zero on collation.
  # This is for distinction of variable chars from level 3 ignorable chars.
-use constant Var1 => -1;
+use constant Var1Wt => -1;
  
  
  # A boolean for Variable and 16-bit weights at 4 levels of Collation Element
@@ -79,10 +79,6 @@ use constant Var1 => -1;
  # other than "shift" (as well as "shift-trimmed") is unreliable.
  use constant VCE_TEMPLATE => 'Cn4';
  
-# Unicode encoding of strings to be collated
-# TODO: 'N*' for UTF-32BE, 'V*' for UTF-32LE.
-use constant UTF_TEMPLATE => 'U*';
-
  # A sort key: 16-bit weights
  # See also the PROBLEM on VCE_TEMPLATE above.
  use constant KEY_TEMPLATE => 'n*';
@@ -113,6 +109,33 @@ sub UCA_Version { "9" }
  
  sub Base_Unicode_Version { $UNICODE_VERSION || 'unknown' }
  
+######
+
+use constant UNICODE_FOR_PACK => ("A" eq pack('U', 0x41));
+use constant NATIVE_FOR_PACK  => ("A" eq pack('U', ord("A")));
+
+use constant UNICODE_FOR_UNPACK => (0x41 == unpack('U', "A"));
+use constant NATIVE_FOR_UNPACK  => (ord("A") == unpack('U', "A"));
+
+sub pack_U {
+    return UNICODE_FOR_PACK
+       ? pack('U*', @_)
+       : NATIVE_FOR_PACK
+           ? pack('U*', map utf8::unicode_to_native($_), @_)
+           : die "$PACKAGE, a Unicode code point cannot be stringified.\n";
+}
+
+sub unpack_U {
+    return UNICODE_FOR_UNPACK
+       ? unpack('U*', shift)
+       : NATIVE_FOR_UNPACK
+           ? map(utf8::native_to_unicode($_), unpack 'U*', shift)
+           : die "$PACKAGE, a code point returned from unpack U " .
+               "cannot be converted into Unicode.\n";
+}
+
+######
+
  my (%AlternateOK);
  @AlternateOK{ qw/
      blanked  non-ignorable  shifted  shift-trimmed
@@ -125,13 +148,15 @@ our @ChangeOK = qw/
    /;
  
  our @ChangeNG = qw/
-    entry entries table combining maxlength
+    entry entries table maxlength
      ignoreChar ignoreName undefChar undefName
      versionTable alternateTable backwardsTable forwardsTable rearrangeTable
      derivCode normCode rearrangeHash L3_ignorable
+    backwardsFlag
    /;
-# The hash key 'ignored' is deleted at VERSION 0.21.
-# The hash key 'isShift' are deleted at VERSION 0.23.
+# The hash key 'ignored' is deleted at v 0.21.
+# The hash key 'isShift' is deleted at v 0.23.
+# The hash key 'combining' is deleted at v 0.24.
  
  my (%ChangeOK, %ChangeNG);
  @ChangeOK{ @ChangeOK } = ();
@@ -155,12 +180,18 @@ sub change {
      return wantarray ? %old : $self;
  }
  
+sub _checkLevel {
+    my $level = shift;
+    my $key   = shift;
+    croak sprintf "Illegal level %d (in \$self->{%s}) lower than %d.",
+       $level, $key, MinLevel if MinLevel > $level;
+    croak sprintf "Unsupported level %d (in \$self->{%s}) higher than %d ",
+       $level, $key, MaxLevel if MaxLevel < $level;
+}
+
  sub checkCollator {
      my $self = shift;
-    croak "Illegal level lower than 1 (passed $self->{level})."
-       if $self->{level} < 1;
-    croak "A level higher than 4 (passed $self->{level}) is not supported."
-       if 4 < $self->{level};
+    _checkLevel($self->{level}, "level");
  
      $self->{derivCode} =
         $self->{UCA_Version} ==  8 ? \&_derivCE_8 :
@@ -171,10 +202,24 @@ sub checkCollator {
      croak "$PACKAGE unknown alternate tag name: $self->{alternate}"
         unless exists $AlternateOK{ $self->{alternate} };
  
-    $self->{backwards} = []
-       if ! defined $self->{backwards};
-    $self->{backwards} = [ $self->{backwards} ]
-       if ! ref $self->{backwards};
+    if (! defined $self->{backwards}) {
+       $self->{backwardsFlag} = 0;
+    }
+    elsif (! ref $self->{backwards}) {
+       _checkLevel($self->{backwards}, "backwards");
+       $self->{backwardsFlag} = 1 << $self->{backwards};
+    }
+    else {
+       my %level;
+       $self->{backwardsFlag} = 0;
+       for my $b (@{ $self->{backwards} }) {
+           _checkLevel($b, "backwards");
+           $level{$b} = 1;
+       }
+       for my $v (sort keys %level) {
+           $self->{backwardsFlag} += 1 << $v;
+       }
+    }
  
      $self->{rearrange} = []
         if ! defined $self->{rearrange};
@@ -223,7 +268,7 @@ sub new
         $self->parseEntry($_) foreach split /\n/, $self->{entry};
      }
  
-    $self->{level} ||= 4;
+    $self->{level} ||= MaxLevel;
      $self->{UCA_Version} ||= UCA_Version();
  
      $self->{overrideHangul} = ''
@@ -305,10 +350,7 @@ sub parseEntry
      $entry = join(CODE_SEP, @uv); # in JCPS
  
      if (defined $self->{undefChar} || defined $self->{ignoreChar}) {
-       # Do not use UTF_TEMPLATE; Perl' RE is only for utf8.
-       my $ele = $IsEBCDIC
-           ? pack('U*', map utf8::unicode_to_native($_), @uv)
-           : pack('U*', @uv);
+       my $ele = pack_U(@uv);
  
         # regarded as if it were not entried in the table
         return
@@ -323,15 +365,12 @@ sub parseEntry
      $k = '[.0000.0000.0000.0000]'
         if defined $self->{ignoreName} && $name =~ /$self->{ignoreName}/;
  
-    my $combining = TRUE; # primary = 0, secondary != 0;
      my $is_L3_ignorable;
  
      foreach my $arr ($k =~ /\[([^\[\]]+)\]/g) { # SPACEs allowed
         my $var = $arr =~ /\*/; # exactly /^\*/ but be lenient.
         my @wt = _getHexArray($arr);
         push @key, pack(VCE_TEMPLATE, $var, @wt);
-       $combining = FALSE
-           unless $wt[0] == 0 && $wt[1] != 0;
         $is_L3_ignorable = TRUE
             if $wt[0] + $wt[1] + $wt[2] == 0;
           # if $arr !~ /[1-9A-Fa-f]/; NG
@@ -340,11 +379,6 @@ sub parseEntry
  
      $self->{entries}{$entry} = \@key;
  
-    $self->{combining}{$entry} = TRUE
-       if $combining;
-
-    # The key is a string representing a numeral code point.
-
      $self->{L3_ignorable}{$uv[0]} = TRUE
         if @uv == 1 && $is_L3_ignorable;
  
@@ -353,8 +387,9 @@ sub parseEntry
         if @uv > 1;
  }
  
+
  ##
-## arrayref[weights] = altCE(bool variable?, list[num] weights)
+## arrayref[weights] = altCE(VCE)
  ##
  sub altCE
  {
@@ -362,26 +397,29 @@ sub altCE
      my($var, @wt) = unpack(VCE_TEMPLATE, shift);
  
      $self->{alternate} eq 'blanked' ?
-       $var ? [Var1, 0, 0, $wt[3]] : \@wt :
+       $var ? [Var1Wt, 0, 0, $wt[3]] : \@wt :
      $self->{alternate} eq 'non-ignorable' ?
         \@wt :
      $self->{alternate} eq 'shifted' ?
-       $var ? [Var1, 0, 0, $wt[0] ]
-            : [ @wt[0..2], $wt[0]+$wt[1]+$wt[2] ? Shift4 : 0 ] :
+       $var ? [Var1Wt, 0, 0, $wt[0] ]
+            : [ @wt[0..2], $wt[0]+$wt[1]+$wt[2] ? Shift4Wt : 0 ] :
      $self->{alternate} eq 'shift-trimmed' ?
-       $var ? [Var1, 0, 0, $wt[0] ] : [ @wt[0..2], 0 ] :
+       $var ? [Var1Wt, 0, 0, $wt[0] ] : [ @wt[0..2], 0 ] :
          croak "$PACKAGE unknown alternate name: $self->{alternate}";
  }
  
  sub viewSortKey
  {
      my $self = shift;
-    my $ver = $self->{UCA_Version};
+    $self->visualizeSortKey($self->getSortKey(@_));
+}
  
-    my $key  = $self->getSortKey(@_);
-    my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, $key);
+sub visualizeSortKey
+{
+    my $self = shift;
+    my $view = join " ", map sprintf("%04X", $_), unpack(KEY_TEMPLATE, shift);
  
-    if ($ver <= 8) {
+    if ($self->{UCA_Version} <= 8) {
         $view =~ s/ ?0000 ?/|/g;
      } else {
         $view =~ s/\b0000\b/|/g;
@@ -423,9 +461,7 @@ sub splitCE
      }
  
      # get array of Unicode code point of string.
-    my @src = $IsEBCDIC
-       ? map(utf8::native_to_unicode($_), unpack UTF_TEMPLATE, $str)
-       : unpack(UTF_TEMPLATE, $str);
+    my @src = unpack_U($str);
  
      # rearrangement:
      # Character positions are not kept if rearranged,
@@ -529,7 +565,7 @@ sub getWt
             $cjk
                 ? map(pack(VCE_TEMPLATE, NON_VAR, @$_), &$cjk($u))
                 : defined $cjk && $self->{UCA_Version} <= 8 && $u < 0x10000
-                   ? pack(VCE_TEMPLATE, NON_VAR, $u, Min2, Min3, $u)
+                   ? pack(VCE_TEMPLATE, NON_VAR, $u, Min2Wt, Min3Wt, $u)
                     : $der->($u);
      }
      else {
@@ -557,7 +593,7 @@ sub getSortKey
             if ($wt->[0] == 0) { # ignorable
                 next if $last_is_variable;
             } else {
-               $last_is_variable = ($wt->[0] == Var1);
+               $last_is_variable = ($wt->[0] == Var1Wt);
             }
         }
         push @buf, $wt;
@@ -571,10 +607,6 @@ sub getSortKey
                 if 0 < $b->[$v];
         }
      }
-    foreach (@{ $self->{backwards} }) {
-       my $v = $_ - 1;
-       @{ $ret[$v] } = reverse @{ $ret[$v] };
-    }
  
      # modification of tertiary weights
      if ($self->{upper_before_lower}) {
@@ -591,6 +623,15 @@ sub getSortKey
             elsif (0x0D <= $_ && $_ <= 0x0E) { $_ += 5 } # hiragana
         }
      }
+
+    if ($self->{backwardsFlag}) {
+       for (my $v = MinLevel; $v <= MaxLevel; $v++) {
+           if ($self->{backwardsFlag} & (1 << $v)) {
+               @{ $ret[$v-1] } = reverse @{ $ret[$v-1] };
+           }
+       }
+    }
+
      join LEVEL_SEP, map pack(KEY_TEMPLATE, @$_), @ret;
  }
  
@@ -630,8 +671,8 @@ sub _derivCE_9 {
      my $aaaa = $base + ($u >> 15);
      my $bbbb = ($u & 0x7FFF) | 0x8000;
      return
-       pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2, Min3, $u),
-       pack(VCE_TEMPLATE, NON_VAR, $bbbb,    0,    0, $u);
+       pack(VCE_TEMPLATE, NON_VAR, $aaaa, Min2Wt, Min3Wt, $u),
+       pack(VCE_TEMPLATE, NON_VAR, $bbbb,      0,      0, $u);
  }
  
  sub _derivCE_8 {
@@ -684,7 +725,7 @@ sub _nonIgnorAtLevel($$)
      my $wt = shift;
      return if ! defined $wt;
      my $lv = shift;
-    return grep($wt->[$_] != 0, 0..$lv-1) ? TRUE : FALSE;
+    return grep($wt->[$_-1] != 0, MinLevel..$lv) ? TRUE : FALSE;
  }
  
  ##
@@ -731,7 +772,6 @@ sub index
         $pos   = 0 if $pos < 0;
      my $grob  = shift;
  
-    my $comb  = $self->{combining};
      my $lev   = $self->{level};
      my $ver9  = $self->{UCA_Version} > 8;
      my $v2i   = $self->{alternate} ne 'non-ignorable';
@@ -760,14 +800,14 @@ sub index
             if ($wt->[0] == 0) {
                 $to_be_pushed = FALSE if $last_is_variable;
             } else {
-               $last_is_variable = ($wt->[0] == Var1);
+               $last_is_variable = ($wt->[0] == Var1Wt);
             }
         }
  
         if (@subWt && $wt->[0] == 0) {
             push @{ $subWt[-1] }, $wt if $to_be_pushed;
         } else {
-           $wt->[0] = 0 if $wt->[0] == Var1;
+           $wt->[0] = 0 if $wt->[0] == Var1Wt;
             push @subWt, [ $wt ];
         }
      }
@@ -789,7 +829,7 @@ sub index
                     if ($wt->[0] == 0) {
                         $to_be_pushed = FALSE if $last_is_variable;
                     } else {
-                       $last_is_variable = ($wt->[0] == Var1);
+                       $last_is_variable = ($wt->[0] == Var1Wt);
                     }
                 }
  
@@ -797,7 +837,7 @@ sub index
                     push @{ $strWt[-1] }, $wt if $to_be_pushed;
                     $finPos[-1] = $strCE->[$i][2];
                 } elsif ($to_be_pushed) {
-                   $wt->[0] = 0 if $wt->[0] == Var1;
+                   $wt->[0] = 0 if $wt->[0] == Var1Wt;
                     push @strWt,  [ $wt ];
                     push @iniPos, $found_base ? NOMATCHPOS : $strCE->[$i][1];
                     $finPos[-1] = NOMATCHPOS if $found_base;
@@ -1217,9 +1257,9 @@ If the tag is made true, this is reversed.
  
  B<NOTE>: These tags simplemindedly assume
  any lowercase/uppercase or hiragana/katakana distinctions
-should occur in level 3, and their weights at level 3
-should be same as those mentioned in 7.3.1, UTS #10.
-If you define your collation elements which violates this,
+must occur in level 3, and their weights at level 3
+must be same as those mentioned in 7.3.1, UTS #10.
+If you define your collation elements which violate this requirement,
  these tags don't work validly.
  
  =back
diff --git a/lib/Unicode/Collate/Changes b/lib/Unicode/Collate/Changes

index 3e60f0b..2f7b6e7 100644 (file)
--- a/lib/Unicode/Collate/Changes
+++ b/lib/Unicode/Collate/Changes
@@ -1,5 +1,8 @@
  Revision history for Perl extension Unicode::Collate.
  
+0.24  Thu Apr 02 23:12:54 2003
+    - internal tweak for (?un)pack 'U'.
+
  0.23  Wed Sep 04 19:25:20 2002
      - fix: scalar match() no longer returns an lvalue substr ref.
      - fix: "Ignorable after variable" should be made level 3 ignorable
diff --git a/lib/Unicode/Collate/README b/lib/Unicode/Collate/README

index d829c77..fc0f68f 100644 (file)
--- a/lib/Unicode/Collate/README
+++ b/lib/Unicode/Collate/README
@@ -1,4 +1,4 @@
-Unicode/Collate version 0.23
+Unicode/Collate version 0.24
  ===============================
  
  NAME
diff --git a/lib/Unicode/Collate/t/index.t b/lib/Unicode/Collate/t/index.t

index e759ef2..550cbe3 100644 (file)
--- a/lib/Unicode/Collate/t/index.t
+++ b/lib/Unicode/Collate/t/index.t
@@ -1,7 +1,8 @@
  
  BEGIN {
-    if (ord("A") == 193) {
-       print "1..0 # Unicode::Collate not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       print "1..0 # Unicode::Collate " .
+           "cannot stringify a Unicode code point\n";
         exit 0;
      }
  }
diff --git a/lib/Unicode/Collate/t/test.t b/lib/Unicode/Collate/t/test.t

index 777e9fb..502e0b1 100644 (file)
--- a/lib/Unicode/Collate/t/test.t
+++ b/lib/Unicode/Collate/t/test.t
@@ -1,7 +1,8 @@
  
  BEGIN {
-    if (ord("A") == 193) {
-       print "1..0 # Unicode::Collate not ported to EBCDIC\n";
+    unless ("A" eq pack('U', 0x41) || "A" eq pack('U', ord("A"))) {
+       print "1..0 # Unicode::Collate " .
+           "cannot stringify a Unicode code point\n";
         exit 0;
      }
  }
@@ -53,10 +54,12 @@ ok($Collator->cmp("", "perl"), -1);
  
  ##############
  
-# Use pack('U'), not chr(), for Perl 5.6.1.
-my $A_acute = pack('U', $IsEBCDIC ? 0x65 : 0xC1);
-my $a_acute = pack('U', $IsEBCDIC ? 0x45 : 0xE1);
-my $acute   = pack('U', 0x0301);
+sub _pack_U   { Unicode::Collate::pack_U(@_) }
+sub _unpack_U { Unicode::Collate::unpack_U(@_) }
+
+my $A_acute = _pack_U(0xC1);
+my $a_acute = _pack_U(0xE1);
+my $acute   = _pack_U(0x0301);
  
  ok($Collator->cmp("A$acute", $A_acute), 0); # @version 3.1.1 (prev: -1)
  ok($Collator->cmp($a_acute, $A_acute), -1);
author	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
	Sat, 5 Apr 2003 11:28:22 +0000 (11:28 +0000)
committer	Rafael Garcia-Suarez <rgarciasuarez@gmail.com>
	Sat, 5 Apr 2003 11:28:22 +0000 (11:28 +0000)
ext/Unicode/Normalize/Changes		patch \| blob \| history
ext/Unicode/Normalize/Normalize.pm		patch \| blob \| history
ext/Unicode/Normalize/README		patch \| blob \| history
ext/Unicode/Normalize/mkheader		patch \| blob \| history
ext/Unicode/Normalize/t/func.t		patch \| blob \| history
ext/Unicode/Normalize/t/norm.t		patch \| blob \| history
ext/Unicode/Normalize/t/test.t		patch \| blob \| history
lib/Unicode/Collate.pm		patch \| blob \| history
lib/Unicode/Collate/Changes		patch \| blob \| history
lib/Unicode/Collate/README		patch \| blob \| history
lib/Unicode/Collate/t/index.t		patch \| blob \| history
lib/Unicode/Collate/t/test.t		patch \| blob \| history