#! /usr/bin/perl use strict; my $USAGE = <<__EOF__; usage: mkutable [-n] [-f#] type... [--] [<] UnicodeData.txt -n = take non-matching types -f = zero-based type field (default 2) __EOF__ use vars qw( $opt_f $opt_n ); use Getopt::Std; my $type_field = 2; exit (main() ? 1 : 0); sub main { my $date = `date`; chomp $date; my $args = join ' ', @ARGV; my $header = "/* Generated by \"$0 $args\" on $date */\n"; die $USAGE if not getopts('f:n'); $type_field = $opt_f if $opt_f; my %types; my $arg; while ($arg = shift @ARGV) { last if $arg eq '--'; $types{$arg} = 1; } my %out = ( 'types' => \%types ); my $last_code = 0; print $header; while (<>) { chomp; s/#.*//; my @fields = split /;/; next if not @fields; my $code = hex $fields[0]; my $type = $fields[$type_field]; $type =~ s/\s//g; while (++$last_code < $code) { output(\%out, $last_code, '?'); } output(\%out, $code, $type); } output(\%out, $last_code+1, '?'); } sub output { my ($out, $code, $type) = @_; my $match = ${${$out}{types}}{$type}; my $type_change = (not $$out{start_type} or $type ne $$out{start_type}); $match = not $match if $opt_n; if ($match and (not $$out{in_run} or $type_change)) { end_run($out, $code-1); start_run($out, $code, $type); } elsif (not $match and $$out{in_run}) { end_run($out, $code-1); } } sub start_run { my ($out, $code, $type) = @_; $$out{start_code} = $code; $$out{start_type} = $type; $$out{in_run} = 1; } sub end_run { my ($out, $code) = @_; return if not $$out{in_run}; printf "\t{ 0x%04x, 0x%04x }, /* %s */\n", $$out{start_code}, $code, $$out{start_type}; $$out{in_run} = 0; }