UCD.pm: Convert charscript to use mktables tables

author Karl Williamson <public@khwilliamson.com>

Tue, 1 Mar 2011 15:53:05 +0000 (08:53 -0700)

committer Karl Williamson <public@khwilliamson.com>

Tue, 1 Mar 2011 16:24:28 +0000 (09:24 -0700)
author Karl Williamson <public@khwilliamson.com>
Tue, 1 Mar 2011 15:53:05 +0000 (08:53 -0700)
committer Karl Williamson <public@khwilliamson.com>
Tue, 1 Mar 2011 16:24:28 +0000 (09:24 -0700)
diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm

index 90fda96..160511b 100644 (file)
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -91,7 +91,6 @@ unlimited): you may have more than 4 hexdigits.
  
  my $UNICODEFH;
  my $BLOCKSFH;
-my $SCRIPTSFH;
  my $VERSIONFH;
  my $COMPEXCLFH;
  my $CASEFOLDFH;
@@ -546,22 +545,9 @@ my @SCRIPTS;
  my %SCRIPTS;
  
  sub _charscripts {
-    unless (@SCRIPTS) {
-       if (openunicode(\$SCRIPTSFH, "Scripts.txt")) {
-           local $_;
-           while (<$SCRIPTSFH>) {
-               if (/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+)/) {
-                   my ($lo, $hi) = (hex($1), $2 ? hex($2) : hex($1));
-                   my $script = lc($3);
-                   $script =~ s/\b(\w)/uc($1)/ge;
-                   my $subrange = [ $lo, $hi, $script ];
-                   push @SCRIPTS, $subrange;
-                   push @{$SCRIPTS{$script}}, $subrange;
-               }
-           }
-           close($SCRIPTSFH);
-           @SCRIPTS = sort { $a->[0] <=> $b->[0] } @SCRIPTS;
-       }
+    @SCRIPTS =_read_table("unicore/To/Sc.pl") unless @SCRIPTS;
+    foreach my $entry (@SCRIPTS) {
+        push @{$SCRIPTS{$entry->[2]}}, $entry;
      }
  }
  
@@ -573,14 +559,14 @@ sub charscript {
      my $code = _getcode($arg);
  
      if (defined $code) {
-       _search(\@SCRIPTS, 0, $#SCRIPTS, $code);
-    } else {
-       if (exists $SCRIPTS{$arg}) {
-           return dclone $SCRIPTS{$arg};
-       } else {
-           return;
-       }
+       my $result = _search(\@SCRIPTS, 0, $#SCRIPTS, $code);
+        return $result if defined $result;
+        #return $utf8::SwashInfo{'ToSc'}{'missing'};
+    } elsif (exists $SCRIPTS{$arg}) {
+        return dclone $SCRIPTS{$arg};
      }
+
+    return;
  }
  
  =head2 B<charblocks()>
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t

index 63d0aad..933fbbf 100644 (file)
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -266,8 +266,8 @@ is($charscript, 'Ethiopic');
  my $ranges;
  
  $ranges = charscript('Ogham');
-is($ranges->[1]->[0], hex('1681'), 'Ogham charscript');
-is($ranges->[1]->[1], hex('169a'));
+is($ranges->[0]->[0], hex('1680'), 'Ogham charscript');
+is($ranges->[0]->[1], hex('169C'));
  
  use Unicode::UCD qw(charinrange);
  
@@ -423,7 +423,7 @@ is(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
  {
      my $r1 = charscript('Latin');
      my $n1 = @$r1;
-    is($n1, 45, "number of ranges in Latin script (Unicode 6.0.0)");
+    is($n1, 30, "number of ranges in Latin script (Unicode 6.0.0)");
      shift @$r1 while @$r1;
      my $r2 = charscript('Latin');
      is(@$r2, $n1, "modifying results should not mess up internal caches");
author	Karl Williamson <public@khwilliamson.com>
	Tue, 1 Mar 2011 15:53:05 +0000 (08:53 -0700)
committer	Karl Williamson <public@khwilliamson.com>
	Tue, 1 Mar 2011 16:24:28 +0000 (09:24 -0700)
lib/Unicode/UCD.pm		patch \| blob \| history
lib/Unicode/UCD.t		patch \| blob \| history