From: Karl Williamson Date: Tue, 1 Mar 2011 15:53:05 +0000 (-0700) Subject: UCD.pm: Convert charscript to use mktables tables X-Git-Tag: accepted/trunk/20130322.191538~5231 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7bccef0b00916eda11696a7ea88cfd578df216cd;p=platform%2Fupstream%2Fperl.git UCD.pm: Convert charscript to use mktables tables This removes the need for Scripts.txt --- diff --git a/lib/Unicode/UCD.pm b/lib/Unicode/UCD.pm index 90fda96..160511b 100644 --- a/lib/Unicode/UCD.pm +++ b/lib/Unicode/UCD.pm @@ -91,7 +91,6 @@ unlimited): you may have more than 4 hexdigits. my $UNICODEFH; my $BLOCKSFH; -my $SCRIPTSFH; my $VERSIONFH; my $COMPEXCLFH; my $CASEFOLDFH; @@ -546,22 +545,9 @@ my @SCRIPTS; my %SCRIPTS; sub _charscripts { - unless (@SCRIPTS) { - if (openunicode(\$SCRIPTSFH, "Scripts.txt")) { - local $_; - while (<$SCRIPTSFH>) { - if (/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s+;\s+(\w+)/) { - my ($lo, $hi) = (hex($1), $2 ? hex($2) : hex($1)); - my $script = lc($3); - $script =~ s/\b(\w)/uc($1)/ge; - my $subrange = [ $lo, $hi, $script ]; - push @SCRIPTS, $subrange; - push @{$SCRIPTS{$script}}, $subrange; - } - } - close($SCRIPTSFH); - @SCRIPTS = sort { $a->[0] <=> $b->[0] } @SCRIPTS; - } + @SCRIPTS =_read_table("unicore/To/Sc.pl") unless @SCRIPTS; + foreach my $entry (@SCRIPTS) { + push @{$SCRIPTS{$entry->[2]}}, $entry; } } @@ -573,14 +559,14 @@ sub charscript { my $code = _getcode($arg); if (defined $code) { - _search(\@SCRIPTS, 0, $#SCRIPTS, $code); - } else { - if (exists $SCRIPTS{$arg}) { - return dclone $SCRIPTS{$arg}; - } else { - return; - } + my $result = _search(\@SCRIPTS, 0, $#SCRIPTS, $code); + return $result if defined $result; + #return $utf8::SwashInfo{'ToSc'}{'missing'}; + } elsif (exists $SCRIPTS{$arg}) { + return dclone $SCRIPTS{$arg}; } + + return; } =head2 B diff --git a/lib/Unicode/UCD.t b/lib/Unicode/UCD.t index 63d0aad..933fbbf 100644 --- a/lib/Unicode/UCD.t +++ b/lib/Unicode/UCD.t @@ -266,8 +266,8 @@ is($charscript, 'Ethiopic'); my $ranges; $ranges = charscript('Ogham'); -is($ranges->[1]->[0], hex('1681'), 'Ogham charscript'); -is($ranges->[1]->[1], hex('169a')); +is($ranges->[0]->[0], hex('1680'), 'Ogham charscript'); +is($ranges->[0]->[1], hex('169C')); use Unicode::UCD qw(charinrange); @@ -423,7 +423,7 @@ is(Unicode::UCD::_getcode('U+123x'), undef, "_getcode(x123)"); { my $r1 = charscript('Latin'); my $n1 = @$r1; - is($n1, 45, "number of ranges in Latin script (Unicode 6.0.0)"); + is($n1, 30, "number of ranges in Latin script (Unicode 6.0.0)"); shift @$r1 while @$r1; my $r2 = charscript('Latin'); is(@$r2, $n1, "modifying results should not mess up internal caches");