X-Git-Url: http://review.tizen.org/git/?a=blobdiff_plain;f=glib%2Fgen-unicode-tables.pl;h=ebcb4a44c6211c51adb5596d8627c86938a77e40;hb=30ed5f53e205e6bfc35126a9d3c62dac8a9c5dad;hp=0e3b26b4ffe2b65a3d89ab7afe442cd56b58f618;hpb=d85b722734a6fcfe94032f6113de9e5c190fd7c3;p=platform%2Fupstream%2Fglib.git diff --git a/glib/gen-unicode-tables.pl b/glib/gen-unicode-tables.pl index 0e3b26b..ebcb4a4 100755 --- a/glib/gen-unicode-tables.pl +++ b/glib/gen-unicode-tables.pl @@ -14,9 +14,7 @@ # GNU General Public License for more details. # You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA -# 02111-1307, USA. +# along with this program; if not, see . # Contributer(s): # Andrew Taylor @@ -118,6 +116,7 @@ $FOLDING_MAPPING = 2; 'BB' => "G_UNICODE_BREAK_BEFORE", 'BK' => "G_UNICODE_BREAK_MANDATORY", 'CB' => "G_UNICODE_BREAK_CONTINGENT", + 'CJ' => "G_UNICODE_BREAK_CONDITIONAL_JAPANESE_STARTER", 'CL' => "G_UNICODE_BREAK_CLOSE_PUNCTUATION", 'CM' => "G_UNICODE_BREAK_COMBINING_MARK", 'CP' => "G_UNICODE_BREAK_CLOSE_PARANTHESIS", @@ -126,6 +125,7 @@ $FOLDING_MAPPING = 2; 'GL' => "G_UNICODE_BREAK_NON_BREAKING_GLUE", 'H2' => "G_UNICODE_BREAK_HANGUL_LV_SYLLABLE", 'H3' => "G_UNICODE_BREAK_HANGUL_LVT_SYLLABLE", + 'HL' => "G_UNICODE_BREAK_HEBREW_LETTER", 'HY' => "G_UNICODE_BREAK_HYPHEN", 'ID' => "G_UNICODE_BREAK_IDEOGRAPHIC", 'IN' => "G_UNICODE_BREAK_INSEPARABLE", @@ -141,6 +141,7 @@ $FOLDING_MAPPING = 2; 'PO' => "G_UNICODE_BREAK_POSTFIX", 'PR' => "G_UNICODE_BREAK_PREFIX", 'QU' => "G_UNICODE_BREAK_QUOTATION", + 'RI' => "G_UNICODE_BREAK_REGIONAL_INDICATOR", 'SA' => "G_UNICODE_BREAK_COMPLEX_CONTEXT", 'SG' => "G_UNICODE_BREAK_SURROGATE", 'SP' => "G_UNICODE_BREAK_SPACE", @@ -160,8 +161,17 @@ my @special_cases; my @special_case_offsets; my $special_case_offset = 0; +# Scripts + +my @scripts; + +# East asian widths + +my @eawidths; + $do_decomp = 0; $do_props = 1; +$do_scripts = 1; if (@ARGV && $ARGV[0] eq '-decomp') { $do_decomp = 1; @@ -176,10 +186,11 @@ elsif (@ARGV && $ARGV[0] eq '-both') if (@ARGV != 2) { $0 =~ s@.*/@@; - die "\nUsage: $0 [-decomp | -both] UNICODE-VERSION DIRECTORY\n\n DIRECTORY should contain the following Unicode data files:\n UnicodeData.txt, LineBreak.txt, SpecialCasing.txt, CaseFolding.txt,\n CompositionExclusions.txt\n\n"; + die "\nUsage: $0 [-decomp | -both] UNICODE-VERSION DIRECTORY\n\n DIRECTORY should contain the following Unicode data files:\n UnicodeData.txt, LineBreak.txt, SpecialCasing.txt, CaseFolding.txt,\n CompositionExclusions.txt Scripts.txt extracted/DerivedEastAsianWidth.txt \n\n"; } -my ($unicodedatatxt, $linebreaktxt, $specialcasingtxt, $casefoldingtxt, $compositionexclusionstxt); +my ($unicodedatatxt, $linebreaktxt, $specialcasingtxt, $casefoldingtxt, $compositionexclusionstxt, + $scriptstxt, $derivedeastasianwidth); my $d = $ARGV[1]; opendir (my $dir, $d) or die "Cannot open Unicode data dir $d: $!\n"; @@ -190,6 +201,14 @@ for my $f (readdir ($dir)) $specialcasingtxt = "$d/$f" if ($f =~ /^SpecialCasing.*\.txt/); $casefoldingtxt = "$d/$f" if ($f =~ /^CaseFolding.*\.txt/); $compositionexclusionstxt = "$d/$f" if ($f =~ /^CompositionExclusions.*\.txt/); + $scriptstxt = "$d/$f" if ($f =~ /^Scripts.*\.txt/); +} + +my $extd = $ARGV[1] . "/extracted"; +opendir (my $extdir, $extd) or die "Cannot open Unicode/extracted data dir $extd: $!\n"; +for my $f (readdir ($extdir)) +{ + $derivedeastasianwidthtxt = "$extd/$f" if ($f =~ /^DerivedEastAsianWidth.*\.txt/); } defined $unicodedatatxt or die "Did not find UnicodeData file"; @@ -197,6 +216,8 @@ defined $linebreaktxt or die "Did not find LineBreak file"; defined $specialcasingtxt or die "Did not find SpecialCasing file"; defined $casefoldingtxt or die "Did not find CaseFolding file"; defined $compositionexclusionstxt or die "Did not find CompositionExclusions file"; +defined $scriptstxt or die "Did not find Scripts file"; +defined $derivedeastasianwidthtxt or die "Did not find DerivedEastAsianWidth file"; print "Creating decomp table\n" if ($do_decomp); print "Creating property table\n" if ($do_props); @@ -488,6 +509,51 @@ while () close INPUT; +print "Reading scripts\n"; + +open (INPUT, "< $scriptstxt") || exit 1; + +while () { + s/#.*//; + next if /^\s*$/; + if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)\s*$/) { + die "Cannot parse line: '$_'\n"; + } + + if (defined $2) { + push @scripts, [ hex $1, hex $2, uc $3 ]; + } else { + push @scripts, [ hex $1, hex $1, uc $3 ]; + } +} + +close INPUT; + +print "Reading derived east asian widths\n"; + +open (INPUT, "< $derivedeastasianwidthtxt") || exit 1; + +while () +{ + my ($start_code, $end_code); + + chop; + + s/#.*//; + next if /^\s*$/; + if (!/^([0-9A-F]+)(?:\.\.([0-9A-F]+))?\s*;\s*([A-Za-z_]+)\s*$/) { + die "Cannot parse line: '$_'\n"; + } + + if (defined $2) { + push @eawidths, [ hex $1, hex $2, $3 ]; + } else { + push @eawidths, [ hex $1, hex $1, $3 ]; + } +} + +close INPUT; + if ($do_props) { &print_tables ($last_code) } @@ -495,9 +561,12 @@ if ($do_decomp) { &print_decomp ($last_code); &output_composition_table; } - &print_line_break ($last_code); +if ($do_scripts) { + &print_scripts +} + exit 0; @@ -663,6 +732,11 @@ sub print_tables &output_special_case_table (\*OUT); &output_casefold_table (\*OUT); + # + # And the widths tables + # + &output_width_tables (\*OUT); + print OUT "#endif /* CHARTABLES_H */\n"; close (OUT); @@ -1249,12 +1323,9 @@ sub output_composition_table # Output second singletons - print OUT "static const guint16 compose_second_single[][2] = {\n"; + print OUT "static const gunichar compose_second_single[][2] = {\n"; $i = 0; for $record (@second_singletons) { - if ($record->[1] > 0xFFFF or $record->[2] > 0xFFFF) { - die "time to switch compose_second_single to gunichar"; - } print OUT ",\n" if $i++ > 0; printf OUT " { %#06x, %#06x }", $record->[1], $record->[2]; } @@ -1333,5 +1404,146 @@ EOT printf "Generated %d bytes for casefold table\n", $recordlen * @casefold; } - +sub output_one_width_table +{ + my ($out, $name, $wpe) = @_; + my $start; + my $end; + my $wp; + my $rex; + + print $out "static const struct Interval g_unicode_width_table_${name}[] = {\n"; + + $rex = qr/$wpe/; + + for (my $i = 0; $i <= $#eawidths; $i++) { + $start = $eawidths[$i]->[0]; + $end = $eawidths[$i]->[1]; + $wp = $eawidths[$i]->[2]; + + next if ($wp !~ $rex); + + while ($i <= $#eawidths - 1 && + $eawidths[$i + 1]->[0] == $end + 1 && + ($eawidths[$i + 1]->[2] =~ $rex)) { + $i++; + $end = $eawidths[$i]->[1]; + } + + printf $out "{0x%04X, 0x%04X},\n", $start, $end; + } + + printf $out "};\n\n"; +} + +sub output_width_tables +{ + my $out = shift; + + @eawidths = sort { $a->[0] <=> $b->[0] } @eawidths; + + print $out <gscripttable.h" or die "Cannot open gscripttable.h: $!\n"; + + print OUT<[0] <=> $b->[0] } @scripts; + + $easy_range = 0x2000; + + print OUT< $end) { + $start = $scripts[$i]->[0]; + $end = $scripts[$i]->[1]; + $script = $scripts[$i]->[2]; + $i++; + } + + if ($c < $start) { + printf OUT " G_UNICODE_SCRIPT_UNKNOWN,"; + } else { + printf OUT " G_UNICODE_SCRIPT_%s,", $script; + } + } + + if ($end >= $easy_range) { + $i--; + $scripts[$i]->[0] = $easy_range; + } + + print OUT<[0]; + $end = $scripts[$i]->[1]; + $script = $scripts[$i]->[2]; + + while ($i <= $#scripts - 1 && + $scripts[$i + 1]->[0] == $end + 1 && + $scripts[$i + 1]->[2] eq $script) { + $i++; + $end = $scripts[$i]->[1]; + } + printf OUT " { %#06x, %5d, G_UNICODE_SCRIPT_%s },\n", $start, $end - $start + 1, $script; + } + + printf OUT<