intltool-extract.in

   1 #!@INTLTOOL_PERL@ -w
   2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
   3
   4 #
   5 #  The Intltool Message Extractor
   6 #
   7 #  Copyright (C) 2000-2001, 2003 Free Software Foundation.
   8 #
   9 #  Intltool is free software; you can redistribute it and/or
  10 #  modify it under the terms of the GNU General Public License as
  11 #  published by the Free Software Foundation; either version 2 of the
  12 #  License, or (at your option) any later version.
  13 #
  14 #  Intltool is distributed in the hope that it will be useful,
  15 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 #  General Public License for more details.
  18 #
  19 #  You should have received a copy of the GNU General Public License
  20 #  along with this program; if not, write to the Free Software
  21 #  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 #
  23 #  As a special exception to the GNU General Public License, if you
  24 #  distribute this file as part of a program that contains a
  25 #  configuration script generated by Autoconf, you may include it under
  26 #  the same distribution terms that you use for the rest of that program.
  27 #
  28 #  Authors: Kenneth Christiansen <kenneth@gnu.org>
  29 #           Darin Adler <darin@bentspoon.com>
  30 #
  31
  32 ## Release information
  33 my $PROGRAM      = "intltool-extract";
  34 my $PACKAGE      = "@PACKAGE@";
  35 my $VERSION      = "@VERSION@";
  36
  37 ## Loaded modules
  38 use strict;
  39 use File::Basename;
  40 use Getopt::Long;
  41
  42 ## Scalars used by the option stuff
  43 my $TYPE_ARG    = "0";
  44 my $LOCAL_ARG   = "0";
  45 my $HELP_ARG    = "0";
  46 my $VERSION_ARG = "0";
  47 my $UPDATE_ARG  = "0";
  48 my $QUIET_ARG   = "0";
  49 my $SRCDIR_ARG  = ".";
  50 my $NOMSGCTXT_ARG = "0";
  51
  52 my $FILE;
  53 my $OUTFILE;
  54
  55 my $gettext_type = "";
  56 my $input;
  57 my %messages = ();
  58 my @messages_sorted = ();
  59 my %loc = ();
  60 my %count = ();
  61 my %comments = ();
  62 my $strcount = 0;
  63
  64 my $XMLCOMMENT = "";
  65
  66 ## Use this instead of \w for XML files to handle more possible characters.
  67 my $w = "[-A-Za-z0-9._:]";
  68
  69 ## Always print first
  70 $| = 1;
  71
  72 ## Handle options
  73 GetOptions (
  74             "type=s"     => \$TYPE_ARG,
  75             "local|l"    => \$LOCAL_ARG,
  76             "help|h"     => \$HELP_ARG,
  77             "version|v"  => \$VERSION_ARG,
  78             "update"     => \$UPDATE_ARG,
  79             "quiet|q"    => \$QUIET_ARG,
  80             "srcdir=s"   => \$SRCDIR_ARG,
  81             "nomsgctxt"  => \$NOMSGCTXT_ARG,
  82             ) or &error;
  83
  84 &split_on_argument;
  85
  86
  87 ## Check for options.
  88 ## This section will check for the different options.
  89
  90 sub split_on_argument {
  91
  92     if ($VERSION_ARG) {
  93         &version;
  94
  95     } elsif ($HELP_ARG) {
  96         &help;
  97
  98     } elsif ($LOCAL_ARG) {
  99         &place_local;
 100         &extract;
 101
 102     } elsif ($UPDATE_ARG) {
 103         &place_normal;
 104         &extract;
 105
 106     } elsif (@ARGV > 0) {
 107         &place_normal;
 108         &message;
 109         &extract;
 110
 111     } else {
 112         &help;
 113
 114     }
 115 }
 116
 117 sub place_normal {
 118     $FILE        = $ARGV[0];
 119     $OUTFILE     = "$FILE.h";
 120
 121     my $dirname = dirname ($OUTFILE);
 122     if (! -d "$dirname" && $dirname ne "") {
 123         system ("mkdir -p $dirname");
 124     }
 125 }
 126
 127 sub place_local {
 128     $FILE        = $ARGV[0];
 129     $OUTFILE     = fileparse($FILE, ());
 130     if (!-e "tmp/") {
 131         system("mkdir tmp/");
 132     }
 133     $OUTFILE     = "./tmp/$OUTFILE.h"
 134 }
 135
 136 sub determine_type {
 137    if ($TYPE_ARG =~ /^gettext\/(.*)/) {
 138         $gettext_type=$1
 139    }
 140 }
 141
 142 ## Sub for printing release information
 143 sub version{
 144     print <<_EOF_;
 145 ${PROGRAM} (${PACKAGE}) $VERSION
 146 Copyright (C) 2000, 2003 Free Software Foundation, Inc.
 147 Written by Kenneth Christiansen, 2000.
 148
 149 This is free software; see the source for copying conditions.  There is NO
 150 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 151 _EOF_
 152     exit;
 153 }
 154
 155 ## Sub for printing usage information
 156 sub help {
 157     print <<_EOF_;
 158 Usage: ${PROGRAM} [OPTION]... [FILENAME]
 159 Generates a header file from an XML source file.
 160
 161 It grabs all strings between <_translatable_node> and its end tag in
 162 XML files. Read manpage (man ${PROGRAM}) for more info.
 163
 164       --type=TYPE   Specify the file type of FILENAME. Currently supports:
 165                     "gettext/glade", "gettext/ini", "gettext/keys"
 166                     "gettext/rfc822deb", "gettext/schemas",
 167                     "gettext/gsettings", "gettext/xml", "gettext/quoted",
 168                     "gettext/quotedxml", "gettext/tlk", "gettext/qtdesigner"
 169   -l, --local       Writes output into current working directory
 170                     (conflicts with --update)
 171       --update      Writes output into the same directory the source file
 172                     reside (conflicts with --local)
 173       --srcdir      Root of the source tree
 174   -v, --version     Output version information and exit
 175   -h, --help        Display this help and exit
 176   -q, --quiet       Quiet mode
 177
 178 Report bugs to http://bugs.launchpad.net/intltool
 179 _EOF_
 180     exit;
 181 }
 182
 183 ## Sub for printing error messages
 184 sub error{
 185     print STDERR "Try `${PROGRAM} --help' for more information.\n";
 186     exit;
 187 }
 188
 189 sub message {
 190     print "Generating C format header file for translation.\n" unless $QUIET_ARG;
 191 }
 192
 193 sub extract {
 194     &determine_type;
 195
 196     &convert;
 197
 198     open OUT, ">$OUTFILE";
 199     binmode (OUT) if $^O eq 'MSWin32';
 200     &msg_write;
 201     close OUT;
 202
 203     print "Wrote $OUTFILE\n" unless $QUIET_ARG;
 204 }
 205
 206 sub convert {
 207
 208     ## Reading the file
 209     {
 210         local (*IN);
 211         local $/; #slurp mode
 212         open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
 213         binmode (IN);
 214         $input = <IN>;
 215         close IN;
 216     }
 217
 218     &type_ini if $gettext_type eq "ini";
 219     &type_keys if $gettext_type eq "keys";
 220     &type_xml if $gettext_type eq "xml";
 221     &type_glade if $gettext_type eq "glade";
 222     &type_gsettings  if $gettext_type eq "gsettings";
 223     &type_schemas  if $gettext_type eq "schemas";
 224     &type_rfc822deb  if $gettext_type eq "rfc822deb";
 225     &type_quoted if $gettext_type eq "quoted";
 226     &type_quotedxml if $gettext_type eq "quotedxml";
 227     &type_tlk if $gettext_type eq "tlk";
 228     &type_qtdesigner if $gettext_type eq "qtdesigner";
 229 }
 230
 231 sub entity_decode_minimal
 232 {
 233     local ($_) = @_;
 234
 235     s/&apos;/'/g; # '
 236     s/&quot;/"/g; # "
 237     s/&amp;/&/g;
 238
 239     return $_;
 240 }
 241
 242 sub entity_decode
 243 {
 244     local ($_) = @_;
 245
 246     s/&apos;/'/g; # '
 247     s/&quot;/"/g; # "
 248     s/&lt;/</g;
 249     s/&gt;/>/g;
 250     s/&amp;/&/g;
 251
 252     return $_;
 253 }
 254
 255 sub escape_char
 256 {
 257     return '\"' if $_ eq '"';
 258     return '\n' if $_ eq "\n";
 259     return '\\\\' if $_ eq '\\';
 260
 261     return $_;
 262 }
 263
 264 sub escape
 265 {
 266     my ($string) = @_;
 267     return join "", map &escape_char, split //, $string;
 268 }
 269
 270 sub add_message
 271 {
 272     my ($string) = @_;
 273     push @messages_sorted, $string if !defined $messages{$string};
 274     $messages{$string} = [];
 275 }
 276
 277 sub type_ini {
 278     ### For generic translatable desktop files ###
 279     while ($input =~ /^(#(.+)\n)?^_[A-Za-z0-9\-]+\s*=\s*(.*)$/mg) {
 280         if (defined($2))  {
 281             $comments{$3} = $2;
 282         }
 283         add_message($3);
 284     }
 285 }
 286
 287 sub type_keys {
 288     ### For generic translatable mime/keys files ###
 289     while ($input =~ /^\s*_\w+=(.*)$/mg) {
 290         add_message($1);
 291     }
 292 }
 293
 294 sub type_xml {
 295     ### For generic translatable XML files ###
 296     my $tree = readXml($input);
 297     parseTree(0, $tree);
 298 }
 299
 300 sub print_var {
 301     my $var = shift;
 302     my $vartype = ref $var;
 303
 304     if ($vartype =~ /ARRAY/) {
 305         my @arr = @{$var};
 306         print "[ ";
 307         foreach my $el (@arr) {
 308             print_var($el);
 309             print ", ";
 310         }
 311         print "] ";
 312     } elsif ($vartype =~ /HASH/) {
 313         my %hash = %{$var};
 314         print "{ ";
 315         foreach my $key (keys %hash) {
 316             print "$key => ";
 317             print_var($hash{$key});
 318             print ", ";
 319         }
 320         print "} ";
 321     } else {
 322         print $var;
 323     }
 324 }
 325
 326 # Same syntax as getAttributeString in intltool-merge.in.in, similar logic (look for ## differences comment)
 327 sub getAttributeString
 328 {
 329     my $sub = shift;
 330     my $do_translate = shift || 1;
 331     my $language = shift || "";
 332     my $translate = shift;
 333     my $result = "";
 334     foreach my $e (reverse(sort(keys %{ $sub }))) {
 335         my $key    = $e;
 336         my $string = $sub->{$e};
 337         my $quote = '"';
 338
 339         $string =~ s/^[\s]+//;
 340         $string =~ s/[\s]+$//;
 341
 342         if ($string =~ /^'.*'$/)
 343         {
 344             $quote = "'";
 345         }
 346         $string =~ s/^['"]//g;
 347         $string =~ s/['"]$//g;
 348
 349         ## differences from intltool-merge.in.in
 350         if ($key =~ /^_/) {
 351             $comments{entity_decode($string)} = $XMLCOMMENT if $XMLCOMMENT;
 352             add_message(entity_decode($string));
 353             $$translate = 2;
 354         }
 355         ## differences end here from intltool-merge.in.in
 356         $result .= " $key=$quote$string$quote";
 357     }
 358     return $result;
 359 }
 360
 361 # Verbatim copy from intltool-merge.in.in
 362 sub getXMLstring
 363 {
 364     my $ref = shift;
 365     my $spacepreserve = shift || 0;
 366     my @list = @{ $ref };
 367     my $result = "";
 368
 369     my $count = scalar(@list);
 370     my $attrs = $list[0];
 371     my $index = 1;
 372
 373     $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
 374     $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
 375
 376     while ($index < $count) {
 377         my $type = $list[$index];
 378         my $content = $list[$index+1];
 379         if (! $type ) {
 380             # We've got CDATA
 381             if ($content) {
 382                 # lets strip the whitespace here, and *ONLY* here
 383                 $content =~ s/\s+/ /gs if (!$spacepreserve);
 384                 $result .= $content;
 385             }
 386         } elsif ( "$type" ne "1" ) {
 387             # We've got another element
 388             $result .= "<$type";
 389             $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
 390             if ($content) {
 391                 my $subresult = getXMLstring($content, $spacepreserve);
 392                 if ($subresult) {
 393                     $result .= ">".$subresult . "</$type>";
 394                 } else {
 395                     $result .= "/>";
 396                 }
 397             } else {
 398                 $result .= "/>";
 399             }
 400         }
 401         $index += 2;
 402     }
 403     return $result;
 404 }
 405
 406 # Verbatim copy from intltool-merge.in.in, except for MULTIPLE_OUTPUT handling removed
 407 # Translate list of nodes if necessary
 408 sub translate_subnodes
 409 {
 410     my $fh = shift;
 411     my $content = shift;
 412     my $language = shift || "";
 413     my $singlelang = shift || 0;
 414     my $spacepreserve = shift || 0;
 415
 416     my @nodes = @{ $content };
 417
 418     my $count = scalar(@nodes);
 419     my $index = 0;
 420     while ($index < $count) {
 421         my $type = $nodes[$index];
 422         my $rest = $nodes[$index+1];
 423         traverse($fh, $type, $rest, $language, $spacepreserve);
 424         $index += 2;
 425     }
 426 }
 427
 428 # Based on traverse() in intltool-merge.in.in
 429 sub traverse
 430 {
 431     my $fh = shift; # unused, to allow us to sync code between -merge and -extract
 432     my $nodename = shift;
 433     my $content = shift;
 434     my $language = shift || "";
 435     my $spacepreserve = shift || 0;
 436
 437     if ($nodename && "$nodename" eq "1") {
 438         $XMLCOMMENT = $content;
 439     } elsif ($nodename) {
 440         # element
 441         my @all = @{ $content };
 442         my $attrs = shift @all;
 443         my $translate = 0;
 444         my $outattr = getAttributeString($attrs, 1, $language, \$translate);
 445
 446         if ($nodename =~ /^_/) {
 447             $translate = 1;
 448             $nodename =~ s/^_//;
 449         }
 450         my $lookup = '';
 451
 452         $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
 453         $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
 454
 455         if ($translate) {
 456             $lookup = getXMLstring($content, $spacepreserve);
 457             if (!$spacepreserve) {
 458                 $lookup =~ s/^\s+//s;
 459                 $lookup =~ s/\s+$//s;
 460             }
 461             if (exists $attrs->{"msgctxt"}) {
 462                 my $context = entity_decode ($attrs->{"msgctxt"});
 463                 $context =~ s/^["'](.*)["']/$1/;
 464                 $lookup = "$context\004$lookup";
 465             }
 466
 467             if ($lookup && $translate != 2) {
 468                 $comments{$lookup} = $XMLCOMMENT if $XMLCOMMENT;
 469                 add_message($lookup);
 470             } elsif ($translate == 2) {
 471                 translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
 472             }
 473         } else {
 474             $XMLCOMMENT = "";
 475             my $count = scalar(@all);
 476             if ($count > 0) {
 477                 my $index = 0;
 478                 while ($index < $count) {
 479                     my $type = $all[$index];
 480                     my $rest = $all[$index+1];
 481                     traverse($fh, $type, $rest, $language, $spacepreserve);
 482                     $index += 2;
 483                 }
 484             }
 485         }
 486         $XMLCOMMENT = "";
 487     }
 488 }
 489
 490
 491 # Verbatim copy from intltool-merge.in.in, $fh for compatibility
 492 sub parseTree
 493 {
 494     my $fh        = shift;
 495     my $ref       = shift;
 496     my $language  = shift || "";
 497
 498     my $name = shift @{ $ref };
 499     my $cont = shift @{ $ref };
 500
 501     while (!$name || "$name" eq "1") {
 502         $name = shift @{ $ref };
 503         $cont = shift @{ $ref };
 504     }
 505
 506     my $spacepreserve = 0;
 507     my $attrs = @{$cont}[0];
 508     $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
 509
 510     traverse($fh, $name, $cont, $language, $spacepreserve);
 511 }
 512
 513 # Verbatim copy from intltool-merge.in.in
 514 sub intltool_tree_comment
 515 {
 516     my $expat = shift;
 517     my $data  = $expat->original_string();
 518     my $clist = $expat->{Curlist};
 519     my $pos   = $#$clist;
 520
 521     $data =~ s/^<!--//s;
 522     $data =~ s/-->$//s;
 523     push @$clist, 1 => $data;
 524 }
 525
 526 # Verbatim copy from intltool-merge.in.in
 527 sub intltool_tree_cdatastart
 528 {
 529     my $expat    = shift;
 530     my $clist = $expat->{Curlist};
 531     my $pos   = $#$clist;
 532
 533     push @$clist, 0 => $expat->original_string();
 534 }
 535
 536 # Verbatim copy from intltool-merge.in.in
 537 sub intltool_tree_cdataend
 538 {
 539     my $expat    = shift;
 540     my $clist = $expat->{Curlist};
 541     my $pos   = $#$clist;
 542
 543     $clist->[$pos] .= $expat->original_string();
 544 }
 545
 546 # Verbatim copy from intltool-merge.in.in
 547 sub intltool_tree_char
 548 {
 549     my $expat = shift;
 550     my $text  = shift;
 551     my $clist = $expat->{Curlist};
 552     my $pos   = $#$clist;
 553
 554     # Use original_string so that we retain escaped entities
 555     # in CDATA sections.
 556     #
 557     if ($pos > 0 and $clist->[$pos - 1] eq '0') {
 558         $clist->[$pos] .= $expat->original_string();
 559     } else {
 560         push @$clist, 0 => $expat->original_string();
 561     }
 562 }
 563
 564 # Verbatim copy from intltool-merge.in.in
 565 sub intltool_tree_start
 566 {
 567     my $expat    = shift;
 568     my $tag      = shift;
 569     my @origlist = ();
 570
 571     # Use original_string so that we retain escaped entities
 572     # in attribute values.  We must convert the string to an
 573     # @origlist array to conform to the structure of the Tree
 574     # Style.
 575     #
 576     my @original_array = split /\x/, $expat->original_string();
 577     my $source         = $expat->original_string();
 578
 579     # Remove leading tag.
 580     #
 581     $source =~ s|^\s*<\s*(\S+)||s;
 582
 583     # Grab attribute key/value pairs and push onto @origlist array.
 584     #
 585     while ($source)
 586     {
 587        if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
 588        {
 589            $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
 590            push @origlist, $1;
 591            push @origlist, '"' . $2 . '"';
 592        }
 593        elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
 594        {
 595            $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
 596            push @origlist, $1;
 597            push @origlist, "'" . $2 . "'";
 598        }
 599        else
 600        {
 601            last;
 602        }
 603     }
 604
 605     my $ol = [ { @origlist } ];
 606
 607     push @{ $expat->{Lists} }, $expat->{Curlist};
 608     push @{ $expat->{Curlist} }, $tag => $ol;
 609     $expat->{Curlist} = $ol;
 610 }
 611
 612 # Copied from intltool-merge.in.in and added comment handler.
 613 sub readXml
 614 {
 615     my $xmldoc = shift || return;
 616     my $ret = eval 'require XML::Parser';
 617     if(!$ret) {
 618         die "You must have XML::Parser installed to run $0\n\n";
 619     }
 620     my $xp = new XML::Parser(Style => 'Tree');
 621     $xp->setHandlers(Char => \&intltool_tree_char);
 622     $xp->setHandlers(Start => \&intltool_tree_start);
 623     $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
 624     $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
 625
 626     ## differences from intltool-merge.in.in
 627     $xp->setHandlers(Comment => \&intltool_tree_comment);
 628     ## differences end here from intltool-merge.in.in
 629
 630     my $tree = $xp->parse($xmldoc);
 631
 632 # <foo><!-- comment --><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
 633 # would be:
 634 # [foo, [{}, 1, "comment", head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar,
 635 # [{}, 0, "Howdy",  ref, [{}]], 0, "do" ] ]
 636
 637     return $tree;
 638 }
 639
 640 sub type_schemas {
 641     ### For schemas XML files ###
 642
 643     # FIXME: We should handle escaped < (less than)
 644     while ($input =~ /
 645                       <locale\ name="C">\s*
 646                           (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)?
 647                           (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)?
 648                           (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)?
 649                       <\/locale>
 650                      /sgx) {
 651         my @totranslate = ($3,$6,$9);
 652         my @eachcomment = ($2,$5,$8);
 653         foreach (@totranslate) {
 654             my $currentcomment = shift @eachcomment;
 655             next if !$_;
 656             s/\s+/ /g;
 657             add_message(entity_decode_minimal($_));
 658             $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment));
 659         }
 660     }
 661 }
 662
 663 # Parse the tree as returned by readXml() for gschema.xml files.
 664 sub traverse_gsettings {
 665     sub cleanup {
 666         s/^\s+//;
 667         s/\s+$//;
 668         s/\s+/ /g;
 669         return $_;
 670     }
 671
 672     my $nodename = shift;
 673     my $content = shift;
 674     my $comment = shift || 0;
 675     my @list = @{ $content };
 676     my $attrs_ref = shift @list;
 677     my %attrs = %{ $attrs_ref };
 678     if (($nodename eq 'default' and $attrs{'l10n'}) or
 679         ($nodename eq 'summary') or ($nodename eq 'description')) {
 680         # preserve whitespace.  deal with it ourselves, below.
 681         my $message = getXMLstring($content, 1);
 682
 683         if ($nodename eq 'default') {
 684             # for <default> we strip leading and trailing whitespace but
 685             # preserve (possibly quoted) whitespace within
 686             $message =~ s/^\s+//;
 687             $message =~ s/\s+$//;
 688         } else {
 689             # for <summary> and <description>, we normalise all
 690             # whitespace while preserving paragraph boundaries
 691             $message = join "\n\n", map &cleanup, split/\n\s*\n+/, $message;
 692         }
 693
 694         my $context = $attrs{'context'};
 695         $context =~ s/^["'](.*)["']/$1/ if $context;
 696         $message = $context . "\004" . $message if $context;
 697         add_message($message);
 698         $comments{$message} = $comment if $comment;
 699     } else {
 700         my $index = 0;
 701         my $comment;
 702         while (scalar(@list) > 1) {
 703             my $type = shift @list;
 704             my $content = shift @list;
 705             if (!$type || "$type" eq "1") {
 706                 if ($type == 1) {
 707                     $comment = $content;
 708                 }
 709                 next;
 710             } else {
 711                 traverse_gsettings($type, $content, $comment);
 712                 $comment = 0;
 713             }
 714         }
 715     }
 716 }
 717
 718 sub type_gsettings {
 719     my $tree = readXml($input);
 720     my @tree_nodes = @{ $tree };
 721     my $node = shift @tree_nodes;
 722     while (!$node || "$node" eq "1") {
 723         shift @tree_nodes;
 724         $node = shift @tree_nodes;
 725     }
 726     my $content = shift @tree_nodes;
 727     traverse_gsettings($node, $content);
 728 }
 729
 730 sub type_rfc822deb {
 731     ### For rfc822-style Debian configuration files ###
 732
 733     my $lineno = 1;
 734     my $type = '';
 735     while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
 736     {
 737         my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
 738         while ($pre =~ m/\n/g)
 739         {
 740             $lineno ++;
 741         }
 742         $lineno += length($newline);
 743         my @str_list = rfc822deb_split(length($underscore), $text);
 744         for my $str (@str_list)
 745         {
 746             $strcount++;
 747             add_message($str);
 748             $loc{$str} = $lineno;
 749             $count{$str} = $strcount;
 750             my $usercomment = '';
 751             while($pre =~ s/(^|\n)#([^\n]*)$//s)
 752             {
 753                 $usercomment = "\n" . $2 . $usercomment;
 754             }
 755             $comments{$str} = $tag . $usercomment;
 756         }
 757         $lineno += ($text =~ s/\n//g);
 758     }
 759 }
 760
 761 sub rfc822deb_split {
 762     # Debian defines a special way to deal with rfc822-style files:
 763     # when a value contain newlines, it consists of
 764     #   1.  a short form (first line)
 765     #   2.  a long description, all lines begin with a space,
 766     #       and paragraphs are separated by a single dot on a line
 767     # This routine returns an array of all paragraphs, and reformat
 768     # them.
 769     # When first argument is 2, the string is a comma separated list of
 770     # values.
 771     my $type = shift;
 772     my $text = shift;
 773     $text =~ s/^[ \t]//mg;
 774     return (split(/, */, $text, 0)) if $type ne 1;
 775     return ($text) if $text !~ /\n/;
 776
 777     $text =~ s/([^\n]*)\n//;
 778     my @list = ($1);
 779     my $str = '';
 780     for my $line (split (/\n/, $text))
 781     {
 782         chomp $line;
 783         if ($line =~ /^\.\s*$/)
 784         {
 785             #  New paragraph
 786             $str =~ s/\s*$//;
 787             push(@list, $str);
 788             $str = '';
 789         }
 790         elsif ($line =~ /^\s/)
 791         {
 792             #  Line which must not be reformatted
 793             $str .= "\n" if length ($str) && $str !~ /\n$/;
 794             $line =~ s/\s+$//;
 795             $str .= $line."\n";
 796         }
 797         else
 798         {
 799             #  Continuation line, remove newline
 800             $str .= " " if length ($str) && $str !~ /\n$/;
 801             $str .= $line;
 802         }
 803     }
 804     $str =~ s/\s*$//;
 805     push(@list, $str) if length ($str);
 806     return @list;
 807 }
 808
 809 sub type_quoted {
 810     while ($input =~ /\"(([^\"]|\\\")*[^\\\"])\"/g) {
 811         my $message = $1;
 812         my $before = $`;
 813         $message =~ s/\\\"/\"/g;
 814         $before =~ s/[^\n]//g;
 815         add_message($message);
 816         $loc{$message} = length ($before) + 2;
 817     }
 818 }
 819
 820 sub type_quotedxml {
 821     while ($input =~ /\"(([^\"]|\\\")*[^\\\"])\"/g) {
 822         my $message = $1;
 823         my $before = $`;
 824         $message =~ s/\\\"/\"/g;
 825         $message = entity_decode($message);
 826         $before =~ s/[^\n]//g;
 827         add_message($message);
 828         $loc{$message} = length ($before) + 2;
 829     }
 830 }
 831
 832 # Parse the tree as returned by readXml() for Qt Designer .ui files.
 833 sub traverse_qtdesigner {
 834     my $nodename = shift;
 835     my $content = shift;
 836     my @list = @{ $content };
 837     my $attrs_ref = shift @list;
 838     my %attrs = %{ $attrs_ref };
 839     if ($nodename eq 'string' and !exists $attrs{"notr"}) {
 840         # Preserve whitespace.  Deal with it ourselves, below.
 841         my $message = getXMLstring($content, 1);
 842
 843         # We strip leading and trailing whitespace but
 844         # preserve whitespace within (e.g. newlines)
 845         $message =~ s/^\s+//;
 846         $message =~ s/\s+$//;
 847
 848         my $context = $attrs{'comment'};
 849         # Remove enclosing quotes from msgctxt
 850         $context =~ s/^["'](.*)["']/$1/ if $context;
 851         $message = $context . "\004" . $message if $context;
 852         add_message($message);
 853         my $comment = $attrs{'extracomment'};
 854         # Remove enclosing quotes from developer comments
 855         $comment =~ s/^["'](.*)["']/$1/ if $comment;
 856         $comments{$message} = $comment if $comment;
 857     } else {
 858         my $index = 0;
 859         while (scalar(@list) > 1) {
 860             my $type = shift @list;
 861             my $content = shift @list;
 862             if (!$type || "$type" eq "1") {
 863                 next;
 864             } else {
 865                 traverse_qtdesigner($type, $content);
 866             }
 867         }
 868     }
 869 }
 870
 871 sub type_qtdesigner {
 872     ### For translatable Qt Designer XML files ###
 873     #
 874     # Specs:
 875     #
 876     # - http://qt-project.org/doc/qt-5.0/qtlinguist/linguist-ts-file-format.html
 877     # - http://qt-project.org/doc/qt-5.0/qtdesigner/designer-ui-file-format.html
 878     #
 879     # <string> tag attributes:
 880     #
 881     # notr="true" means the string is not translatable
 882     # extracomment maps to a developer comment in gettext
 883     # comment corresponds to "disambiguation" in the Qt Linguist API, and maps
 884     # to msgctxt in gettext
 885     #
 886     # Example:
 887     #
 888     # <string comment="Button" extracomment="TRANSLATORS: refers to the
 889     # action of accepting something">Ok</string>
 890
 891     my $tree = readXml($input);
 892     my @tree_nodes = @{ $tree };
 893     my $node = shift @tree_nodes;
 894     while (!$node || "$node" eq "1") {
 895         shift @tree_nodes;
 896         $node = shift @tree_nodes;
 897     }
 898     my $content = shift @tree_nodes;
 899     traverse_qtdesigner($node, $content);
 900
 901 }
 902
 903 sub type_glade {
 904     ### For translatable Glade XML files ###
 905
 906     my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
 907
 908     while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
 909         # Glade sometimes uses tags that normally mark translatable things for
 910         # little bits of non-translatable content. We work around this by not
 911         # translating strings that only includes something like label4 or window1.
 912         add_message(entity_decode($2)) unless $2 =~ /^(window|label|dialog)[0-9]+$/;
 913     }
 914
 915     while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
 916         for my $item (split (/\n/, $1)) {
 917             add_message(entity_decode($item));
 918         }
 919     }
 920
 921     ## handle new glade files
 922     while ($input =~ /<(\w+)\s+[^>]*translatable\s*=\s*["']yes["'](?:\s+[^>]*context\s*=\s*["']([^"']*)["'])?(?:\s+[^>]*comments\s*=\s*["']([^"']*)["'])?[^>]*>([^<]+)<\/\1>/sg) {
 923         if (!($4 =~ /^(window|label)[0-9]+$/)) {
 924             my $message = entity_decode($4);
 925             if (defined($2)) {
 926                 $message = entity_decode($2) . "\004" . $message;
 927             }
 928             add_message($message);
 929             if (defined($3)) {
 930                 $comments{$message} = entity_decode($3) ;
 931             }
 932         }
 933     }
 934     while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
 935         add_message(entity_decode_minimal($2));
 936     }
 937 }
 938
 939 sub type_tlk {
 940     my ($ftype, $fvers, $langid, $strcount, $stroff);
 941     my $count = 0;
 942     my $pos = 0;
 943     my @inputa = split (//, $input, 21);
 944     my $foo;
 945     my $strdata;
 946
 947     $ftype = substr ($input, 0, 3);
 948     $fvers = substr ($input, 4, 7);
 949     $langid = unpack ("L", $inputa[8] . $inputa[9] .
 950                       $inputa[10] . $inputa[11]);
 951     $strcount = unpack ("L", $inputa[12] . $inputa[13] .
 952                         $inputa[14] . $inputa[15]);
 953     $stroff = unpack ("L", $inputa[16] . $inputa[17] .
 954                       $inputa[18] . $inputa[19]);
 955
 956     use bytes;
 957     $strdata = bytes::substr ($input, $stroff);
 958
 959     my $sinpos = 20;
 960
 961     $foo = $inputa[$sinpos];
 962     $sinpos = 40 * 2000;
 963     @inputa = split (//, $foo, $sinpos + 1);
 964
 965     $pos = 0;
 966     while ($count < $strcount) {
 967         my ($flags, $soundref, $volvar, $pitch, $offset, $strsize, $sndlen) = 0;
 968
 969         if ($count > 0 && $count % 2000 == 0) {
 970             $foo = $inputa[$sinpos];
 971             my $numleft = ($strcount - $count);
 972             if ($numleft > 2000) {
 973                 $sinpos = 40 * 2000;
 974             } else {
 975                 $sinpos = 40 * $numleft;
 976             }
 977             @inputa = split (//, $foo, $sinpos + 1);
 978             my $numbytes = @inputa;
 979             $pos = 0;
 980         }
 981
 982
 983         $flags = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
 984                          $inputa[$pos + 2] . $inputa[$pos + 3]);
 985         $pos += 4;
 986         if ($flags & 0x0002) {
 987             $soundref = join ('', @inputa[$pos..$pos + 15]);
 988             $soundref =~ s/\0//g;
 989         }
 990         $pos += 16;
 991 # According to the Bioware Aurora Talk Table Format documentation
 992 # the VolumeVariance and PitchVariance DWORDs are not used
 993 # We increment the pos counter, but do not read the data, here
 994 #        $volvar = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
 995 #                          $inputa[$pos + 2] . $inputa[$pos + 3]);
 996         $pos += 4;
 997 #        $pitch = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
 998 #                         $inputa[$pos + 2] . $inputa[$pos + 3]);
 999         $pos += 4;
1000         $offset = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
1001                           $inputa[$pos + 2] . $inputa[$pos + 3])
1002             if ($flags & 0x0001);
1003         $pos += 4;
1004         $strsize = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
1005                            $inputa[$pos + 2] . $inputa[$pos + 3])
1006             if ($flags & 0x0001);
1007         $pos += 4;
1008         $sndlen = unpack ("d", $inputa[$pos] . $inputa[$pos + 1] .
1009                           $inputa[$pos + 2] . $inputa[$pos + 3])
1010             if ($flags & 0x0004);
1011         $pos += 4;
1012
1013         if (defined $strsize && $strsize > 0) {
1014             my $message = substr ($strdata, $offset, $strsize);
1015             if (defined $message) {
1016                 use Encode;
1017                 Encode::from_to ($message, "iso-8859-1", "UTF-8");
1018                 add_message($message);
1019                 if ($message =~ /^Bad Strref$/ ) {
1020                     $comments{$message} = "DO NOT Translate this Entry.";
1021                     $comments{$message} .= "\nTLK:position=$count";
1022                } else {
1023                     $comments{$message} = "TLK:position=$count";
1024                     $comments{$message} .= "; TLK:sndresref=$soundref"
1025                         if (defined $soundref && $soundref ne "");
1026                     $comments{$message} .= "; TLK:sndlen=$sndlen"
1027                         if (defined $sndlen && $sndlen != 0);
1028                 }
1029             } else {
1030                 print STDERR "Missing message? ID: $count\n";
1031             }
1032         }
1033         $count++;
1034     }
1035 }
1036
1037 sub msg_write {
1038     my @msgids;
1039     if (%count)
1040     {
1041         @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
1042     }
1043     else
1044     {
1045         @msgids = @messages_sorted;
1046     }
1047     for my $message (@msgids)
1048     {
1049         my $offsetlines = 1;
1050         my $context = undef;
1051         $offsetlines++ if $message =~ /%/;
1052         if (defined ($comments{$message}))
1053         {
1054                 while ($comments{$message} =~ m/\n/g)
1055                 {
1056                     $offsetlines++;
1057                 }
1058         }
1059         print OUT "# ".($loc{$message} - $offsetlines).  " \"$FILE\"\n"
1060                 if defined $loc{$message};
1061         print OUT "/* ".$comments{$message}." */\n"
1062                 if defined $comments{$message};
1063         print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
1064
1065         if ($message =~ /(.*)\004(.*)/s) {
1066             $context = $1;
1067             $message = $2;
1068         }
1069         my @lines = split (/\n/, $message, -1);
1070         for (my $n = 0; $n < @lines; $n++)
1071         {
1072             if ($n == 0)
1073             {
1074                 if (defined $context)
1075                 {
1076                      if ($NOMSGCTXT_ARG)
1077                      {
1078                           print OUT "char *s = N_(\"", $context, "|";
1079                      }
1080                      else
1081                      {
1082                           print OUT "char *s = C_(\"", $context, "\", \"";
1083                      }
1084                 }
1085                 else
1086                 {
1087                      print OUT "char *s = N_(\"";
1088                 }
1089             }
1090             else
1091             {
1092                 print OUT "             \"";
1093             }
1094
1095             print OUT escape($lines[$n]);
1096
1097             if ($n < @lines - 1)
1098             {
1099                 print OUT "\\n\"\n";
1100             }
1101             else
1102             {
1103                 print OUT "\");\n";
1104             }
1105         }
1106     }
1107 }
1108