skim/admin/intltool-extract.in

   1 #!@INTLTOOL_PERL@ -w
   2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
   3
   4 #
   5 #  The Intltool Message Extractor
   6 #
   7 #  Copyright (C) 2000-2001, 2003 Free Software Foundation.
   8 #
   9 #  Intltool is free software; you can redistribute it and/or
  10 #  modify it under the terms of the GNU General Public License as
  11 #  published by the Free Software Foundation; either version 2 of the
  12 #  License, or (at your option) any later version.
  13 #
  14 #  Intltool is distributed in the hope that it will be useful,
  15 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 #  General Public License for more details.
  18 #
  19 #  You should have received a copy of the GNU General Public License
  20 #  along with this program; if not, write to the Free Software
  21 #  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 #
  23 #  As a special exception to the GNU General Public License, if you
  24 #  distribute this file as part of a program that contains a
  25 #  configuration script generated by Autoconf, you may include it under
  26 #  the same distribution terms that you use for the rest of that program.
  27 #
  28 #  Authors: Kenneth Christiansen <kenneth@gnu.org>
  29 #           Darin Adler <darin@bentspoon.com>
  30 #
  31
  32 ## Release information
  33 my $PROGRAM      = "intltool-extract";
  34 my $PACKAGE      = "intltool";
  35 my $VERSION      = "0.30";
  36
  37 ## Loaded modules
  38 use strict;
  39 use File::Basename;
  40 use Getopt::Long;
  41
  42 ## Scalars used by the option stuff
  43 my $TYPE_ARG    = "0";
  44 my $LOCAL_ARG   = "0";
  45 my $HELP_ARG    = "0";
  46 my $VERSION_ARG = "0";
  47 my $UPDATE_ARG  = "0";
  48 my $QUIET_ARG   = "0";
  49 my $SRCDIR_ARG  = ".";
  50
  51 my $FILE;
  52 my $OUTFILE;
  53
  54 my $gettext_type = "";
  55 my $input;
  56 my %messages = ();
  57 my %loc = ();
  58 my %count = ();
  59 my %comments = ();
  60 my $strcount = 0;
  61
  62 ## Use this instead of \w for XML files to handle more possible characters.
  63 my $w = "[-A-Za-z0-9._:]";
  64
  65 ## Always print first
  66 $| = 1;
  67
  68 ## Handle options
  69 GetOptions (
  70             "type=s"     => \$TYPE_ARG,
  71             "local|l"    => \$LOCAL_ARG,
  72             "help|h"     => \$HELP_ARG,
  73             "version|v"  => \$VERSION_ARG,
  74             "update"     => \$UPDATE_ARG,
  75             "quiet|q"    => \$QUIET_ARG,
  76             "srcdir=s"   => \$SRCDIR_ARG,
  77             ) or &error;
  78
  79 &split_on_argument;
  80
  81
  82 ## Check for options.
  83 ## This section will check for the different options.
  84
  85 sub split_on_argument {
  86
  87     if ($VERSION_ARG) {
  88         &version;
  89
  90     } elsif ($HELP_ARG) {
  91         &help;
  92
  93     } elsif ($LOCAL_ARG) {
  94         &place_local;
  95         &extract;
  96
  97     } elsif ($UPDATE_ARG) {
  98         &place_normal;
  99         &extract;
 100
 101     } elsif (@ARGV > 0) {
 102         &place_normal;
 103         &message;
 104         &extract;
 105
 106     } else {
 107         &help;
 108
 109     }
 110 }
 111
 112 sub place_normal {
 113     $FILE        = $ARGV[0];
 114     $OUTFILE     = "$FILE.h";
 115 }
 116
 117 sub place_local {
 118     $OUTFILE     = fileparse($FILE, ());
 119     if (!-e "tmp/") {
 120         system("mkdir tmp/");
 121     }
 122     $OUTFILE     = "./tmp/$OUTFILE.h"
 123 }
 124
 125 sub determine_type {
 126    if ($TYPE_ARG =~ /^gettext\/(.*)/) {
 127         $gettext_type=$1
 128    }
 129 }
 130
 131 ## Sub for printing release information
 132 sub version{
 133     print <<_EOF_;
 134 ${PROGRAM} (${PACKAGE}) $VERSION
 135 Copyright (C) 2000, 2003 Free Software Foundation, Inc.
 136 Written by Kenneth Christiansen, 2000.
 137
 138 This is free software; see the source for copying conditions.  There is NO
 139 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 140 _EOF_
 141     exit;
 142 }
 143
 144 ## Sub for printing usage information
 145 sub help {
 146     print <<_EOF_;
 147 Usage: ${PROGRAM} [OPTION]... [FILENAME]
 148 Generates a header file from an XML source file.
 149
 150 It grabs all strings between <_translatable_node> and its end tag in
 151 XML files. Read manpage (man ${PROGRAM}) for more info.
 152
 153       --type=TYPE   Specify the file type of FILENAME. Currently supports:
 154                     "gettext/glade", "gettext/ini", "gettext/keys"
 155                     "gettext/rfc822deb", "gettext/schemas",
 156                     "gettext/scheme", "gettext/xml"
 157   -l, --local       Writes output into current working directory
 158                     (conflicts with --update)
 159       --update      Writes output into the same directory the source file
 160                     reside (conflicts with --local)
 161       --srcdir      Root of the source tree
 162   -v, --version     Output version information and exit
 163   -h, --help        Display this help and exit
 164   -q, --quiet       Quiet mode
 165
 166 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
 167 or send email to <xml-i18n-tools\@gnome.org>.
 168 _EOF_
 169     exit;
 170 }
 171
 172 ## Sub for printing error messages
 173 sub error{
 174     print STDERR "Try `${PROGRAM} --help' for more information.\n";
 175     exit;
 176 }
 177
 178 sub message {
 179     print "Generating C format header file for translation.\n" unless $QUIET_ARG;
 180 }
 181
 182 sub extract {
 183     &determine_type;
 184
 185     &convert;
 186
 187     open OUT, ">$OUTFILE";
 188     &msg_write;
 189     close OUT;
 190
 191     print "Wrote $OUTFILE\n" unless $QUIET_ARG;
 192 }
 193
 194 sub convert {
 195
 196     ## Reading the file
 197     {
 198         local (*IN);
 199         local $/; #slurp mode
 200         open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
 201         $input = <IN>;
 202     }
 203
 204     &type_ini if $gettext_type eq "ini";
 205     &type_keys if $gettext_type eq "keys";
 206     &type_xml if $gettext_type eq "xml";
 207     &type_glade if $gettext_type eq "glade";
 208     &type_scheme if $gettext_type eq "scheme";
 209     &type_schemas  if $gettext_type eq "schemas";
 210     &type_rfc822deb  if $gettext_type eq "rfc822deb";
 211 }
 212
 213 sub entity_decode_minimal
 214 {
 215     local ($_) = @_;
 216
 217     s/&apos;/'/g; # '
 218     s/&quot;/"/g; # "
 219     s/&amp;/&/g;
 220
 221     return $_;
 222 }
 223
 224 sub entity_decode
 225 {
 226     local ($_) = @_;
 227
 228     s/&apos;/'/g; # '
 229     s/&quot;/"/g; # "
 230     s/&amp;/&/g;
 231     s/&lt;/</g;
 232     s/&gt;/>/g;
 233
 234     return $_;
 235 }
 236
 237 sub escape_char
 238 {
 239     return '\"' if $_ eq '"';
 240     return '\n' if $_ eq "\n";
 241     return '\\' if $_ eq '\\';
 242
 243     return $_;
 244 }
 245
 246 sub escape
 247 {
 248     my ($string) = @_;
 249     return join "", map &escape_char, split //, $string;
 250 }
 251
 252 sub type_ini {
 253     ### For generic translatable desktop files ###
 254     while ($input =~ /^_.*=(.*)$/mg) {
 255         $messages{$1} = [];
 256     }
 257 }
 258
 259 sub type_keys {
 260     ### For generic translatable mime/keys files ###
 261     while ($input =~ /^\s*_\w+=(.*)$/mg) {
 262         $messages{$1} = [];
 263     }
 264 }
 265
 266 sub type_xml {
 267     ### For generic translatable XML files ###
 268
 269     while ($input =~ /\s_$w+\s*=\s*\"([^"]+)\"/sg) { # "
 270         $messages{entity_decode_minimal($1)} = [];
 271     }
 272
 273     while ($input =~ /<_($w+)(?: xml:space="($w+)")?>(.+?)<\/_\1>/sg) {
 274         $_ = $3;
 275         if (!defined($2) || $2 ne "preserve") {
 276             s/\s+/ /g;
 277             s/^ //;
 278             s/ $//;
 279         }
 280         $messages{entity_decode_minimal($_)} = [];
 281     }
 282 }
 283
 284 sub type_schemas {
 285     ### For schemas XML files ###
 286
 287     # FIXME: We should handle escaped < (less than)
 288     while ($input =~ /
 289                       <locale\ name="C">\s*
 290                           (<default>\s*(.*?)\s*<\/default>\s*)?
 291                           (<short>\s*(.*?)\s*<\/short>\s*)?
 292                           (<long>\s*(.*?)\s*<\/long>\s*)?
 293                       <\/locale>
 294                      /sgx) {
 295         my @totranslate = ($2,$4,$6);
 296         foreach (@totranslate) {
 297             next if !$_;
 298         s/\s+/ /g;
 299         $messages{entity_decode_minimal($_)} = [];
 300         }
 301     }
 302 }
 303
 304 sub type_rfc822deb {
 305     ### For rfc822-style Debian configuration files ###
 306
 307     my $lineno = 1;
 308     my $type = '';
 309     while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
 310     {
 311         my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
 312         while ($pre =~ m/\n/g)
 313         {
 314             $lineno ++;
 315         }
 316         $lineno += length($newline);
 317         my @str_list = rfc822deb_split(length($underscore), $text);
 318         for my $str (@str_list)
 319         {
 320             $strcount++;
 321             $messages{$str} = [];
 322             $loc{$str} = $lineno;
 323             $count{$str} = $strcount;
 324             my $usercomment = '';
 325             while($pre =~ s/(^|\n)#([^\n]*)$//s)
 326             {
 327                 $usercomment = "\n" . $2 . $usercomment;
 328             }
 329             $comments{$str} = $tag . $usercomment;
 330         }
 331         $lineno += ($text =~ s/\n//g);
 332     }
 333 }
 334
 335 sub rfc822deb_split {
 336     # Debian defines a special way to deal with rfc822-style files:
 337     # when a value contain newlines, it consists of
 338     #   1.  a short form (first line)
 339     #   2.  a long description, all lines begin with a space,
 340     #       and paragraphs are separated by a single dot on a line
 341     # This routine returns an array of all paragraphs, and reformat
 342     # them.
 343     # When first argument is 2, the string is a comma separated list of
 344     # values.
 345     my $type = shift;
 346     my $text = shift;
 347     $text =~ s/^[ \t]//mg;
 348     return (split(/, */, $text, 0)) if $type ne 1;
 349     return ($text) if $text !~ /\n/;
 350
 351     $text =~ s/([^\n]*)\n//;
 352     my @list = ($1);
 353     my $str = '';
 354     for my $line (split (/\n/, $text))
 355     {
 356         chomp $line;
 357         if ($line =~ /^\.\s*$/)
 358         {
 359             #  New paragraph
 360             $str =~ s/\s*$//;
 361             push(@list, $str);
 362             $str = '';
 363         }
 364         elsif ($line =~ /^\s/)
 365         {
 366             #  Line which must not be reformatted
 367             $str .= "\n" if length ($str) && $str !~ /\n$/;
 368             $line =~ s/\s+$//;
 369             $str .= $line."\n";
 370         }
 371         else
 372         {
 373             #  Continuation line, remove newline
 374             $str .= " " if length ($str) && $str !~ /\n$/;
 375             $str .= $line;
 376         }
 377     }
 378     $str =~ s/\s*$//;
 379     push(@list, $str) if length ($str);
 380     return @list;
 381 }
 382
 383 sub type_glade {
 384     ### For translatable Glade XML files ###
 385
 386     my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
 387
 388     while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
 389         # Glade sometimes uses tags that normally mark translatable things for
 390         # little bits of non-translatable content. We work around this by not
 391         # translating strings that only includes something like label4 or window1.
 392         $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
 393     }
 394
 395     while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
 396         for my $item (split (/\n/, $1)) {
 397             $messages{entity_decode($item)} = [];
 398         }
 399     }
 400
 401     ## handle new glade files
 402     while ($input =~ /<(property|atkproperty)\s+[^>]*translatable\s*=\s*"yes"[^>]*>([^<]+)<\/\1>/sg) {
 403         $messages{entity_decode($2)} = [] unless $2 =~ /^(window|label)[0-9]+$/;
 404     }
 405     while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
 406         $messages{entity_decode_minimal($2)} = [];
 407     }
 408 }
 409
 410 sub type_scheme {
 411     while ($input =~ /_\w*\(?"((?:[^"\\]+|\\.)*)"\)?/sg) {
 412         $messages{$1} = [];
 413     }
 414 }
 415
 416 sub msg_write {
 417     my @msgids;
 418     if (%count)
 419     {
 420         @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
 421     }
 422     else
 423     {
 424         @msgids = sort keys %messages;
 425     }
 426     for my $message (@msgids)
 427     {
 428         my $offsetlines = 1;
 429         $offsetlines++ if $message =~ /%/;
 430         if (defined ($comments{$message}))
 431         {
 432                 while ($comments{$message} =~ m/\n/g)
 433                 {
 434                     $offsetlines++;
 435                 }
 436         }
 437         print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
 438                 if defined $loc{$message};
 439         print OUT "/* ".$comments{$message}." */\n"
 440                 if defined $comments{$message};
 441         print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
 442
 443         my @lines = split (/\n/, $message, -1);
 444         for (my $n = 0; $n < @lines; $n++)
 445         {
 446             if ($n == 0)
 447             {
 448                 print OUT "char *s = N_(\"";
 449             }
 450             else
 451             {
 452                 print OUT "             \"";
 453             }
 454
 455             print OUT escape($lines[$n]);
 456
 457             if ($n < @lines - 1)
 458             {
 459                 print OUT "\\n\"\n";
 460             }
 461             else
 462             {
 463                 print OUT "\");\n";
 464             }
 465         }
 466     }
 467 }
 468