From: Karl Williamson <public@khwilliamson.com>
Date: Tue, 1 Mar 2011 15:53:05 +0000 (-0700)
Subject: UCD.pm: Convert charscript to use mktables tables
X-Git-Tag: accepted/trunk/20130322.191538~5231
X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7bccef0b00916eda11696a7ea88cfd578df216cd;p=platform%2Fupstream%2Fperl.git

UCD.pm: Convert charscript to use mktables tables

This removes the need for Scripts.txt
---

diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm
index 90fda96..160511b 100644
--- a/lib/Unicode/UCD.pm
+++ b/lib/Unicode/UCD.pm
@@ -91,7 +91,6 @@ unlimited): you may have more than 4 hexdigits.
 
 my $UNICODEFH;
 my $BLOCKSFH;
-my $SCRIPTSFH;
 my $VERSIONFH;
 my $COMPEXCLFH;
 my $CASEFOLDFH;
@@ -546,22 +545,9 @@ my @SCRIPTS;
 my %SCRIPTS;
 
 sub _charscripts {
-    unless (@SCRIPTS) {
-	if (openunicode(\$SCRIPTSFH, "Scripts.txt")) {
-	    local $_;
-	    while (<$SCRIPTSFH>) {
-		if (/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+)/) {
-		    my ($lo, $hi) = (hex($1), $2 ? hex($2) : hex($1));
-		    my $script = lc($3);
-		    $script =~ s/\b(\w)/uc($1)/ge;
-		    my $subrange = [ $lo, $hi, $script ];
-		    push @SCRIPTS, $subrange;
-		    push @{$SCRIPTS{$script}}, $subrange;
-		}
-	    }
-	    close($SCRIPTSFH);
-	    @SCRIPTS = sort { $a->[0] <=> $b->[0] } @SCRIPTS;
-	}
+    @SCRIPTS =_read_table("unicore/To/Sc.pl") unless @SCRIPTS;
+    foreach my $entry (@SCRIPTS) {
+        push @{$SCRIPTS{$entry->[2]}}, $entry;
     }
 }
 
@@ -573,14 +559,14 @@ sub charscript {
     my $code = _getcode($arg);
 
     if (defined $code) {
-	_search(\@SCRIPTS, 0, $#SCRIPTS, $code);
-    } else {
-	if (exists $SCRIPTS{$arg}) {
-	    return dclone $SCRIPTS{$arg};
-	} else {
-	    return;
-	}
+	my $result = _search(\@SCRIPTS, 0, $#SCRIPTS, $code);
+        return $result if defined $result;
+        #return $utf8::SwashInfo{'ToSc'}{'missing'};
+    } elsif (exists $SCRIPTS{$arg}) {
+        return dclone $SCRIPTS{$arg};
     }
+
+    return;
 }
 
 =head2 B<charblocks()>
diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t
index 63d0aad..933fbbf 100644
--- a/lib/Unicode/UCD.t
+++ b/lib/Unicode/UCD.t
@@ -266,8 +266,8 @@ is($charscript, 'Ethiopic');
 my $ranges;
 
 $ranges = charscript('Ogham');
-is($ranges->[1]->[0], hex('1681'), 'Ogham charscript');
-is($ranges->[1]->[1], hex('169a'));
+is($ranges->[0]->[0], hex('1680'), 'Ogham charscript');
+is($ranges->[0]->[1], hex('169C'));
 
 use Unicode::UCD qw(charinrange);
 
@@ -423,7 +423,7 @@ is(Unicode::UCD::_getcode('U+123x'),  undef, "_getcode(x123)");
 {
     my $r1 = charscript('Latin');
     my $n1 = @$r1;
-    is($n1, 45, "number of ranges in Latin script (Unicode 6.0.0)");
+    is($n1, 30, "number of ranges in Latin script (Unicode 6.0.0)");
     shift @$r1 while @$r1;
     my $r2 = charscript('Latin');
     is(@$r2, $n1, "modifying results should not mess up internal caches");