intltool-merge.in

   1 #!@INTLTOOL_PERL@ -w
   2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4  -*-
   3
   4 #
   5 #  The Intltool Message Merger
   6 #
   7 #  Copyright (C) 2000, 2003 Free Software Foundation.
   8 #  Copyright (C) 2000, 2001 Eazel, Inc
   9 #
  10 #  Intltool is free software; you can redistribute it and/or
  11 #  modify it under the terms of the GNU General Public License
  12 #  version 2 published by the Free Software Foundation.
  13 #
  14 #  Intltool is distributed in the hope that it will be useful,
  15 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17 #  General Public License for more details.
  18 #
  19 #  You should have received a copy of the GNU General Public License
  20 #  along with this program; if not, write to the Free Software
  21 #  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  22 #
  23 #  As a special exception to the GNU General Public License, if you
  24 #  distribute this file as part of a program that contains a
  25 #  configuration script generated by Autoconf, you may include it under
  26 #  the same distribution terms that you use for the rest of that program.
  27 #
  28 #  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
  29 #            Kenneth Christiansen <kenneth@gnu.org>
  30 #            Darin Adler <darin@bentspoon.com>
  31 #
  32 #  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
  33 #
  34
  35 ## Release information
  36 my $PROGRAM = "intltool-merge";
  37 my $PACKAGE = "intltool";
  38 my $VERSION = "0.33";
  39
  40 ## Loaded modules
  41 use strict;
  42 use Getopt::Long;
  43 use Text::Wrap;
  44 use File::Basename;
  45
  46 my $must_end_tag      = -1;
  47 my $last_depth        = -1;
  48 my $translation_depth = -1;
  49 my @tag_stack = ();
  50 my @entered_tag = ();
  51 my @translation_strings = ();
  52 my $leading_space = "";
  53
  54 ## Scalars used by the option stuff
  55 my $HELP_ARG = 0;
  56 my $VERSION_ARG = 0;
  57 my $BA_STYLE_ARG = 0;
  58 my $XML_STYLE_ARG = 0;
  59 my $KEYS_STYLE_ARG = 0;
  60 my $DESKTOP_STYLE_ARG = 0;
  61 my $SCHEMAS_STYLE_ARG = 0;
  62 my $RFC822DEB_STYLE_ARG = 0;
  63 my $QUIET_ARG = 0;
  64 my $PASS_THROUGH_ARG = 0;
  65 my $UTF8_ARG = 0;
  66 my $MULTIPLE_OUTPUT = 0;
  67 my $cache_file;
  68
  69 ## Handle options
  70 GetOptions
  71 (
  72  "help" => \$HELP_ARG,
  73  "version" => \$VERSION_ARG,
  74  "quiet|q" => \$QUIET_ARG,
  75  "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
  76  "ba-style|b" => \$BA_STYLE_ARG,
  77  "xml-style|x" => \$XML_STYLE_ARG,
  78  "keys-style|k" => \$KEYS_STYLE_ARG,
  79  "desktop-style|d" => \$DESKTOP_STYLE_ARG,
  80  "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
  81  "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
  82  "pass-through|p" => \$PASS_THROUGH_ARG,
  83  "utf8|u" => \$UTF8_ARG,
  84  "multiple-output|m" => \$MULTIPLE_OUTPUT,
  85  "cache|c=s" => \$cache_file
  86  ) or &error;
  87
  88 my $PO_DIR;
  89 my $FILE;
  90 my $OUTFILE;
  91
  92 my %po_files_by_lang = ();
  93 my %translations = ();
  94 my $iconv = $ENV{"ICONV"} || $ENV{"INTLTOOL_ICONV"} || "/usr/bin/iconv";
  95
  96 # Use this instead of \w for XML files to handle more possible characters.
  97 my $w = "[-A-Za-z0-9._:]";
  98
  99 # XML quoted string contents
 100 my $q = "[^\\\"]*";
 101
 102 ## Check for options.
 103
 104 if ($VERSION_ARG)
 105 {
 106         &print_version;
 107 }
 108 elsif ($HELP_ARG)
 109 {
 110         &print_help;
 111 }
 112 elsif ($BA_STYLE_ARG && @ARGV > 2)
 113 {
 114         &utf8_sanity_check;
 115         &preparation;
 116         &print_message;
 117         &ba_merge_translations;
 118         &finalize;
 119 }
 120 elsif ($XML_STYLE_ARG && @ARGV > 2)
 121 {
 122         &utf8_sanity_check;
 123         &preparation;
 124         &print_message;
 125         &xml_merge_output;
 126         &finalize;
 127 }
 128 elsif ($KEYS_STYLE_ARG && @ARGV > 2)
 129 {
 130         &utf8_sanity_check;
 131         &preparation;
 132         &print_message;
 133         &keys_merge_translations;
 134         &finalize;
 135 }
 136 elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
 137 {
 138         &utf8_sanity_check;
 139         &preparation;
 140         &print_message;
 141         &desktop_merge_translations;
 142         &finalize;
 143 }
 144 elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
 145 {
 146         &utf8_sanity_check;
 147         &preparation;
 148         &print_message;
 149         &schemas_merge_translations;
 150         &finalize;
 151 }
 152 elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
 153 {
 154         &preparation;
 155         &print_message;
 156         &rfc822deb_merge_translations;
 157         &finalize;
 158 }
 159 else
 160 {
 161         &print_help;
 162 }
 163
 164 exit;
 165
 166 ## Sub for printing release information
 167 sub print_version
 168 {
 169     print <<_EOF_;
 170 ${PROGRAM} (${PACKAGE}) ${VERSION}
 171 Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
 172
 173 Copyright (C) 2000-2003 Free Software Foundation, Inc.
 174 Copyright (C) 2000-2001 Eazel, Inc.
 175 This is free software; see the source for copying conditions.  There is NO
 176 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 177 _EOF_
 178     exit;
 179 }
 180
 181 ## Sub for printing usage information
 182 sub print_help
 183 {
 184     print <<_EOF_;
 185 Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
 186 Generates an output file that includes some localized attributes from an
 187 untranslated source file.
 188
 189 Mandatory options: (exactly one must be specified)
 190   -b, --ba-style         includes translations in the bonobo-activation style
 191   -d, --desktop-style    includes translations in the desktop style
 192   -k, --keys-style       includes translations in the keys style
 193   -s, --schemas-style    includes translations in the schemas style
 194   -r, --rfc822deb-style  includes translations in the RFC822 style
 195   -x, --xml-style        includes translations in the standard xml style
 196
 197 Other options:
 198   -u, --utf8             convert all strings to UTF-8 before merging
 199                          (default for everything except RFC822 style)
 200   -p, --pass-through     deprecated, does nothing and issues a warning
 201   -m, --multiple-output  output one localized file per locale, instead of
 202                          a single file containing all localized elements
 203   -c, --cache=FILE       specify cache file name
 204                          (usually \$top_builddir/po/.intltool-merge-cache)
 205   -q, --quiet            suppress most messages
 206       --help             display this help and exit
 207       --version          output version information and exit
 208
 209 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
 210 or send email to <xml-i18n-tools\@gnome.org>.
 211 _EOF_
 212     exit;
 213 }
 214
 215
 216 ## Sub for printing error messages
 217 sub print_error
 218 {
 219     print STDERR "Try `${PROGRAM} --help' for more information.\n";
 220     exit;
 221 }
 222
 223
 224 sub print_message
 225 {
 226     print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
 227 }
 228
 229
 230 sub preparation
 231 {
 232     $PO_DIR = $ARGV[0];
 233     $FILE = $ARGV[1];
 234     $OUTFILE = $ARGV[2];
 235
 236     &gather_po_files;
 237     &get_translation_database;
 238 }
 239
 240 # General-purpose code for looking up translations in .po files
 241
 242 sub po_file2lang
 243 {
 244     my ($tmp) = @_;
 245     $tmp =~ s/^.*\/(.*)\.po$/$1/;
 246     return $tmp;
 247 }
 248
 249 sub gather_po_files
 250 {
 251     for my $po_file (glob "$PO_DIR/*.po") {
 252         $po_files_by_lang{po_file2lang($po_file)} = $po_file;
 253     }
 254 }
 255
 256 sub get_local_charset
 257 {
 258     my ($encoding) = @_;
 259     my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
 260
 261     # seek character encoding aliases in charset.alias (glib)
 262
 263     if (open CHARSET_ALIAS, $alias_file)
 264     {
 265         while (<CHARSET_ALIAS>)
 266         {
 267             next if /^\#/;
 268             return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
 269         }
 270
 271         close CHARSET_ALIAS;
 272     }
 273
 274     # if not found, return input string
 275
 276     return $encoding;
 277 }
 278
 279 sub get_po_encoding
 280 {
 281     my ($in_po_file) = @_;
 282     my $encoding = "";
 283
 284     open IN_PO_FILE, $in_po_file or die;
 285     while (<IN_PO_FILE>)
 286     {
 287         ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
 288         if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
 289         {
 290             $encoding = $1;
 291             last;
 292         }
 293     }
 294     close IN_PO_FILE;
 295
 296     if (!$encoding)
 297     {
 298         print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
 299         $encoding = "ISO-8859-1";
 300     }
 301
 302     system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
 303     if ($?) {
 304         $encoding = get_local_charset($encoding);
 305     }
 306
 307     return $encoding
 308 }
 309
 310 sub utf8_sanity_check
 311 {
 312     print STDERR "Warning: option --pass-through has been removed.\n" if $PASS_THROUGH_ARG;
 313     $UTF8_ARG = 1;
 314 }
 315
 316 sub get_translation_database
 317 {
 318     if ($cache_file) {
 319         &get_cached_translation_database;
 320     } else {
 321         &create_translation_database;
 322     }
 323 }
 324
 325 sub get_newest_po_age
 326 {
 327     my $newest_age;
 328
 329     foreach my $file (values %po_files_by_lang)
 330     {
 331         my $file_age = -M $file;
 332         $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
 333     }
 334
 335     $newest_age = 0 if !$newest_age;
 336
 337     return $newest_age;
 338 }
 339
 340 sub create_cache
 341 {
 342     print "Generating and caching the translation database\n" unless $QUIET_ARG;
 343
 344     &create_translation_database;
 345
 346     open CACHE, ">$cache_file" || die;
 347     print CACHE join "\x01", %translations;
 348     close CACHE;
 349 }
 350
 351 sub load_cache
 352 {
 353     print "Found cached translation database\n" unless $QUIET_ARG;
 354
 355     my $contents;
 356     open CACHE, "<$cache_file" || die;
 357     {
 358         local $/;
 359         $contents = <CACHE>;
 360     }
 361     close CACHE;
 362     %translations = split "\x01", $contents;
 363 }
 364
 365 sub get_cached_translation_database
 366 {
 367     my $cache_file_age = -M $cache_file;
 368     if (defined $cache_file_age)
 369     {
 370         if ($cache_file_age <= &get_newest_po_age)
 371         {
 372             &load_cache;
 373             return;
 374         }
 375         print "Found too-old cached translation database\n" unless $QUIET_ARG;
 376     }
 377
 378     &create_cache;
 379 }
 380
 381 sub create_translation_database
 382 {
 383     for my $lang (keys %po_files_by_lang)
 384     {
 385         my $po_file = $po_files_by_lang{$lang};
 386
 387         if ($UTF8_ARG)
 388         {
 389             my $encoding = get_po_encoding ($po_file);
 390
 391             if (lc $encoding eq "utf-8")
 392             {
 393                 open PO_FILE, "<$po_file";
 394             }
 395             else
 396             {
 397                 print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
 398
 399                 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
 400             }
 401         }
 402         else
 403         {
 404             open PO_FILE, "<$po_file";
 405         }
 406
 407         my $nextfuzzy = 0;
 408         my $inmsgid = 0;
 409         my $inmsgstr = 0;
 410         my $msgid = "";
 411         my $msgstr = "";
 412
 413         while (<PO_FILE>)
 414         {
 415             $nextfuzzy = 1 if /^#, fuzzy/;
 416
 417             if (/^msgid "((\\.|[^\\])*)"/ )
 418             {
 419                 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 420                 $msgid = "";
 421                 $msgstr = "";
 422
 423                 if ($nextfuzzy) {
 424                     $inmsgid = 0;
 425                 } else {
 426                     $msgid = unescape_po_string($1);
 427                     $inmsgid = 1;
 428                 }
 429                 $inmsgstr = 0;
 430                 $nextfuzzy = 0;
 431             }
 432
 433             if (/^msgstr "((\\.|[^\\])*)"/)
 434             {
 435                 $msgstr = unescape_po_string($1);
 436                 $inmsgstr = 1;
 437                 $inmsgid = 0;
 438             }
 439
 440             if (/^"((\\.|[^\\])*)"/)
 441             {
 442                 $msgid .= unescape_po_string($1) if $inmsgid;
 443                 $msgstr .= unescape_po_string($1) if $inmsgstr;
 444             }
 445         }
 446         $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 447     }
 448 }
 449
 450 sub finalize
 451 {
 452 }
 453
 454 sub unescape_one_sequence
 455 {
 456     my ($sequence) = @_;
 457
 458     return "\\" if $sequence eq "\\\\";
 459     return "\"" if $sequence eq "\\\"";
 460     return "\n" if $sequence eq "\\n";
 461     return "\r" if $sequence eq "\\r";
 462     return "\t" if $sequence eq "\\t";
 463     return "\b" if $sequence eq "\\b";
 464     return "\f" if $sequence eq "\\f";
 465     return "\a" if $sequence eq "\\a";
 466     return chr(11) if $sequence eq "\\v"; # vertical tab, see ascii(7)
 467
 468     return chr(hex($1)) if ($sequence =~ /\\x([0-9a-fA-F]{2})/);
 469     return chr(oct($1)) if ($sequence =~ /\\([0-7]{3})/);
 470
 471     # FIXME: Is \0 supported as well? Kenneth and Rodney don't want it, see bug #48489
 472
 473     return $sequence;
 474 }
 475
 476 sub unescape_po_string
 477 {
 478     my ($string) = @_;
 479
 480     $string =~ s/(\\x[0-9a-fA-F]{2}|\\[0-7]{3}|\\.)/unescape_one_sequence($1)/eg;
 481
 482     return $string;
 483 }
 484
 485 ## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
 486 ## > in the entity. For further info please look at #84738.
 487 sub entity_decode
 488 {
 489     local ($_) = @_;
 490
 491     s/&apos;/'/g; # '
 492     s/&quot;/"/g; # "
 493     s/&amp;/&/g;
 494     s/&lt;/</g;
 495
 496     return $_;
 497 }
 498
 499 # entity_encode: (string)
 500 #
 501 # Encode the given string to XML format (encode '<' etc).
 502
 503 sub entity_encode
 504 {
 505     my ($pre_encoded) = @_;
 506
 507     my @list_of_chars = unpack ('C*', $pre_encoded);
 508
 509     # with UTF-8 we only encode minimalistic
 510     return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
 511 }
 512
 513 sub entity_encode_int_minimalist
 514 {
 515     return "&quot;" if $_ == 34;
 516     return "&amp;" if $_ == 38;
 517     return "&apos;" if $_ == 39;
 518     return "&lt;" if $_ == 60;
 519     return chr $_;
 520 }
 521
 522 sub entity_encoded_translation
 523 {
 524     my ($lang, $string) = @_;
 525
 526     my $translation = $translations{$lang, $string};
 527     return $string if !$translation;
 528     return entity_encode ($translation);
 529 }
 530
 531 ## XML (bonobo-activation specific) merge code
 532
 533 sub ba_merge_translations
 534 {
 535     my $source;
 536
 537     {
 538        local $/; # slurp mode
 539        open INPUT, "<$FILE" or die "can't open $FILE: $!";
 540        $source = <INPUT>;
 541        close INPUT;
 542     }
 543
 544     open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
 545
 546     while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
 547     {
 548         print OUTPUT $1;
 549
 550         my $node = $2 . "\n";
 551
 552         my @strings = ();
 553         $_ = $node;
 554         while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
 555              push @strings, entity_decode($3);
 556         }
 557         print OUTPUT;
 558
 559         my %langs;
 560         for my $string (@strings)
 561         {
 562             for my $lang (keys %po_files_by_lang)
 563             {
 564                 $langs{$lang} = 1 if $translations{$lang, $string};
 565             }
 566         }
 567
 568         for my $lang (sort keys %langs)
 569         {
 570             $_ = $node;
 571             s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
 572             s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
 573             print OUTPUT;
 574         }
 575     }
 576
 577     print OUTPUT $source;
 578
 579     close OUTPUT;
 580 }
 581
 582
 583 ## XML (non-bonobo-activation) merge code
 584
 585
 586 # Process tag attributes
 587 #   Only parameter is a HASH containing attributes -> values mapping
 588 sub getAttributeString
 589 {
 590     my $sub = shift;
 591     my $do_translate = shift || 0;
 592     my $language = shift || "";
 593     my $result = "";
 594     my $translate = shift;
 595     foreach my $e (reverse(sort(keys %{ $sub }))) {
 596         my $key    = $e;
 597         my $string = $sub->{$e};
 598         my $quote = '"';
 599
 600         $string =~ s/^[\s]+//;
 601         $string =~ s/[\s]+$//;
 602
 603         if ($string =~ /^'.*'$/)
 604         {
 605             $quote = "'";
 606         }
 607         $string =~ s/^['"]//g;
 608         $string =~ s/['"]$//g;
 609
 610         if ($do_translate && $key =~ /^_/) {
 611             $key =~ s|^_||g;
 612             if ($language) {
 613
 614                 # Handle translation
 615                 #
 616                 my $decode_string = entity_decode($string);
 617                 my $translation = $translations{$language, $decode_string};
 618                 if ($translation) {
 619                     $translation = entity_encode($translation);
 620                     $string = $translation;
 621                     $$translate = 2;
 622                 } else {
 623                     $$translate = 2; # we still want translations for deep nesting (FIXME: this will cause
 624                                      # problems since we might get untranslated duplicated entries, but with xml:lang set)
 625                     # Fix would be to set it here to eg. 3, and do a check in traverse() to see if any of the containing tags
 626                     # really need translation, and only emit "translation" if there is (this means parsing same data twice)
 627                 }
 628             } else {
 629                  $$translate = 2 if ($translate && (!$$translate)); # watch not to "overwrite" if $translate == 2
 630             }
 631         }
 632
 633         $result .= " $key=$quote$string$quote";
 634     }
 635     return $result;
 636 }
 637
 638 # Returns a translatable string from XML node, it works on contents of every node in XML::Parser tree
 639 #   doesn't support nesting of translatable tags (i.e. <_blah>this <_doh>doesn't</_doh> work</_blah> -- besides
 640 #   can you define the correct semantics for this?)
 641 #
 642
 643 sub getXMLstring
 644 {
 645     my $ref = shift;
 646     my @list = @{ $ref };
 647     my $result = "";
 648
 649     my $count = scalar(@list);
 650     my $attrs = $list[0];
 651     my $index = 1;
 652
 653     while ($index < $count) {
 654         my $type = $list[$index];
 655         my $content = $list[$index+1];
 656         if (! $type ) {
 657             # We've got CDATA
 658             if ($content) {
 659                 # lets strip the whitespace here, and *ONLY* here
 660                 $content =~ s/\s+/ /gs if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/)));
 661                 $result .= ($content);
 662             } else {
 663                 #print "no cdata content when expected it\n"; # is this possible, is this ok?
 664                 # what to do if this happens?
 665                 # Did I mention that I hate XML::Parser tree style?
 666             }
 667         } else {
 668             # We've got another element
 669             $result .= "<$type";
 670             $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
 671             if ($content) {
 672                 my $subresult = getXMLstring($content);
 673                 if ($subresult) {
 674                     $result .= ">".$subresult . "</$type>";
 675                 } else {
 676                     $result .= "/>";
 677                 }
 678             } else {
 679                 $result .= "/>";
 680             }
 681         }
 682         $index += 2;
 683     }
 684     return $result;
 685 }
 686
 687 # Translate list of nodes if necessary
 688 sub translate_subnodes
 689 {
 690     my $fh = shift;
 691     my $content = shift;
 692     my $language = shift || "";
 693     my $singlelang = shift || 0;
 694
 695     my @nodes = @{ $content };
 696
 697     my $count = scalar(@nodes);
 698     my $index = 0;
 699     while ($index < $count) {
 700         my $type = $nodes[$index];
 701         my $rest = $nodes[$index+1];
 702         if ($singlelang) {
 703             my $oldMO = $MULTIPLE_OUTPUT;
 704             $MULTIPLE_OUTPUT = 1;
 705             traverse($fh, $type, $rest, $language);
 706             $MULTIPLE_OUTPUT = $oldMO;
 707         } else {
 708             traverse($fh, $type, $rest, $language);
 709         }
 710         $index += 2;
 711     }
 712 }
 713
 714 sub traverse
 715 {
 716     my $fh = shift;
 717     my $nodename = shift;
 718     my $content = shift;
 719     my $language = shift || "";
 720
 721     if (!$nodename) {
 722         if ($content =~ /^[\s]*$/) {
 723             $leading_space .= $content;
 724         }
 725         print $fh $content;
 726     } else {
 727         # element
 728         my @all = @{ $content };
 729         my $attrs = shift @all;
 730         my $translate = 0;
 731         my $outattr = getAttributeString($attrs, 1, $language, \$translate);
 732
 733         if ($nodename =~ /^_/) {
 734             $translate = 1;
 735             $nodename =~ s/^_//;
 736         }
 737         my $lookup = '';
 738         print $fh "<$nodename", $outattr;
 739         if ($translate) {
 740             $lookup = getXMLstring($content);
 741             if (!((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/))) {
 742                 $lookup =~ s/^\s+//s;
 743                 $lookup =~ s/\s+$//s;
 744             }
 745
 746             if ($lookup || $translate == 2) {
 747                 my $translation = $translations{$language, $lookup};
 748                 if ($MULTIPLE_OUTPUT && ($translation || $translate == 2)) {
 749                     $translation = $lookup if (!$translation);
 750                     print $fh " xml:lang=\"", $language, "\"" if $language;
 751                     print $fh ">";
 752                     if ($translate == 2) {
 753                         translate_subnodes($fh, \@all, $language, 1);
 754                     } else {
 755                         print $fh $translation;
 756                     }
 757                     print $fh "</$nodename>";
 758
 759                     return; # this means there will be no same translation with xml:lang="$language"...
 760                             # if we want them both, just remove this "return"
 761                 } else {
 762                     print $fh ">";
 763                     if ($translate == 2) {
 764                         translate_subnodes($fh, \@all, $language, 1);
 765                     } else {
 766                         print $fh $lookup;
 767                     }
 768                     print $fh "</$nodename>";
 769                 }
 770             } else {
 771                 print $fh "/>";
 772             }
 773
 774             for my $lang (sort keys %po_files_by_lang) {
 775                     if ($MULTIPLE_OUTPUT && $lang ne "$language") {
 776                         next;
 777                     }
 778                     if ($lang) {
 779                         # Handle translation
 780                         #
 781                         my $translate = 0;
 782                         my $localattrs = getAttributeString($attrs, 1, $lang, \$translate);
 783                         my $translation = $translations{$lang, $lookup};
 784                         if ($translate && !$translation) {
 785                             $translation = $lookup;
 786                         }
 787
 788                         if ($translation || $translate) {
 789                             print $fh "\n";
 790                             $leading_space =~ s/.*\n//g;
 791                             print $fh $leading_space;
 792                             print $fh "<", $nodename, " xml:lang=\"", $lang, "\"", $localattrs, ">";
 793                             if ($translate == 2) {
 794                                translate_subnodes($fh, \@all, $lang, 1);
 795                             } else {
 796                                 print $fh $translation;
 797                             }
 798                             print $fh "</$nodename>";
 799                         }
 800                     }
 801             }
 802
 803         } else {
 804             my $count = scalar(@all);
 805             if ($count > 0) {
 806                 print $fh ">";
 807                 my $index = 0;
 808                 while ($index < $count) {
 809                     my $type = $all[$index];
 810                     my $rest = $all[$index+1];
 811                     traverse($fh, $type, $rest, $language);
 812                     $index += 2;
 813                 }
 814                 print $fh "</$nodename>";
 815             } else {
 816                 print $fh "/>";
 817             }
 818         }
 819     }
 820 }
 821
 822 sub intltool_tree_cdatastart
 823 {
 824     my $expat    = shift;
 825     my $clist = $expat->{Curlist};
 826     my $pos   = $#$clist;
 827
 828     push @$clist, 0 => $expat->original_string();
 829 }
 830
 831 sub intltool_tree_cdataend
 832 {
 833     my $expat    = shift;
 834     my $clist = $expat->{Curlist};
 835     my $pos   = $#$clist;
 836
 837     $clist->[$pos] .= $expat->original_string();
 838 }
 839
 840 sub intltool_tree_char
 841 {
 842     my $expat = shift;
 843     my $text  = shift;
 844     my $clist = $expat->{Curlist};
 845     my $pos   = $#$clist;
 846
 847     # Use original_string so that we retain escaped entities
 848     # in CDATA sections.
 849     #
 850     if ($pos > 0 and $clist->[$pos - 1] eq '0') {
 851         $clist->[$pos] .= $expat->original_string();
 852     } else {
 853         push @$clist, 0 => $expat->original_string();
 854     }
 855 }
 856
 857 sub intltool_tree_start
 858 {
 859     my $expat    = shift;
 860     my $tag      = shift;
 861     my @origlist = ();
 862
 863     # Use original_string so that we retain escaped entities
 864     # in attribute values.  We must convert the string to an
 865     # @origlist array to conform to the structure of the Tree
 866     # Style.
 867     #
 868     my @original_array = split /\x/, $expat->original_string();
 869     my $source         = $expat->original_string();
 870
 871     # Remove leading tag.
 872     #
 873     $source =~ s|^\s*<\s*(\S+)||s;
 874
 875     # Grab attribute key/value pairs and push onto @origlist array.
 876     #
 877     while ($source)
 878     {
 879        if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
 880        {
 881            $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
 882            push @origlist, $1;
 883            push @origlist, '"' . $2 . '"';
 884        }
 885        elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
 886        {
 887            $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
 888            push @origlist, $1;
 889            push @origlist, "'" . $2 . "'";
 890        }
 891        else
 892        {
 893            last;
 894        }
 895     }
 896
 897     my $ol = [ { @origlist } ];
 898
 899     push @{ $expat->{Lists} }, $expat->{Curlist};
 900     push @{ $expat->{Curlist} }, $tag => $ol;
 901     $expat->{Curlist} = $ol;
 902 }
 903
 904 sub readXml
 905 {
 906     my $filename = shift || return;
 907     if(!-f $filename) {
 908         die "ERROR Cannot find filename: $filename\n";
 909     }
 910
 911     my $ret = eval 'require XML::Parser';
 912     if(!$ret) {
 913         die "You must have XML::Parser installed to run $0\n\n";
 914     }
 915     my $xp = new XML::Parser(Style => 'Tree');
 916     $xp->setHandlers(Char => \&intltool_tree_char);
 917     $xp->setHandlers(Start => \&intltool_tree_start);
 918     $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
 919     $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
 920     my $tree = $xp->parsefile($filename);
 921
 922 # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
 923 # would be:
 924 # [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
 925 # 0, "Howdy",  ref, [{}]], 0, "do" ] ]
 926
 927     return $tree;
 928 }
 929
 930 sub print_header
 931 {
 932     my $infile = shift;
 933     my $fh = shift;
 934     my $source;
 935
 936     if(!-f $infile) {
 937         die "ERROR Cannot find filename: $infile\n";
 938     }
 939
 940     print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
 941     {
 942         local $/;
 943         open DOCINPUT, "<${FILE}" or die;
 944         $source = <DOCINPUT>;
 945         close DOCINPUT;
 946     }
 947     if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
 948     {
 949         print $fh "$1\n";
 950     }
 951     elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
 952     {
 953         print $fh "$1\n";
 954     }
 955 }
 956
 957 sub parseTree
 958 {
 959     my $fh        = shift;
 960     my $ref       = shift;
 961     my $language  = shift || "";
 962
 963     my $name = shift @{ $ref };
 964     my $cont = shift @{ $ref };
 965     traverse($fh, $name, $cont, $language);
 966 }
 967
 968 sub xml_merge_output
 969 {
 970     my $source;
 971
 972     if ($MULTIPLE_OUTPUT) {
 973         for my $lang (sort keys %po_files_by_lang) {
 974             if ( ! -e $lang ) {
 975                 mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
 976             }
 977             open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
 978             my $tree = readXml($FILE);
 979             print_header($FILE, \*OUTPUT);
 980             parseTree(\*OUTPUT, $tree, $lang);
 981             close OUTPUT;
 982             print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
 983         }
 984     }
 985     open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
 986     my $tree = readXml($FILE);
 987     print_header($FILE, \*OUTPUT);
 988     parseTree(\*OUTPUT, $tree);
 989     close OUTPUT;
 990     print "CREATED $OUTFILE\n" unless $QUIET_ARG;
 991 }
 992
 993 sub keys_merge_translations
 994 {
 995     open INPUT, "<${FILE}" or die;
 996     open OUTPUT, ">${OUTFILE}" or die;
 997
 998     while (<INPUT>)
 999     {
1000         if (s/^(\s*)_(\w+=(.*))/$1$2/)
1001         {
1002             my $string = $3;
1003
1004             print OUTPUT;
1005
1006             my $non_translated_line = $_;
1007
1008             for my $lang (sort keys %po_files_by_lang)
1009             {
1010                 my $translation = $translations{$lang, $string};
1011                 next if !$translation;
1012
1013                 $_ = $non_translated_line;
1014                 s/(\w+)=.*/[$lang]$1=$translation/;
1015                 print OUTPUT;
1016             }
1017         }
1018         else
1019         {
1020             print OUTPUT;
1021         }
1022     }
1023
1024     close OUTPUT;
1025     close INPUT;
1026 }
1027
1028 sub desktop_merge_translations
1029 {
1030     open INPUT, "<${FILE}" or die;
1031     open OUTPUT, ">${OUTFILE}" or die;
1032
1033     while (<INPUT>)
1034     {
1035         if (s/^(\s*)_(\w+=(.*))/$1$2/)
1036         {
1037             my $string = $3;
1038
1039             print OUTPUT;
1040
1041             my $non_translated_line = $_;
1042
1043             for my $lang (sort keys %po_files_by_lang)
1044             {
1045                 my $translation = $translations{$lang, $string};
1046                 next if !$translation;
1047
1048                 $_ = $non_translated_line;
1049                 s/(\w+)=.*/${1}[$lang]=$translation/;
1050                 print OUTPUT;
1051             }
1052         }
1053         else
1054         {
1055             print OUTPUT;
1056         }
1057     }
1058
1059     close OUTPUT;
1060     close INPUT;
1061 }
1062
1063 sub schemas_merge_translations
1064 {
1065     my $source;
1066
1067     {
1068        local $/; # slurp mode
1069        open INPUT, "<$FILE" or die "can't open $FILE: $!";
1070        $source = <INPUT>;
1071        close INPUT;
1072     }
1073
1074     open OUTPUT, ">$OUTFILE" or die;
1075
1076     # FIXME: support attribute translations
1077
1078     # Empty nodes never need translation, so unmark all of them.
1079     # For example, <_foo/> is just replaced by <foo/>.
1080     $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1081
1082     while ($source =~ s/
1083                         (.*?)
1084                         (\s+)(<locale\ name="C">(\s*)
1085                             (<default>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/default>)?(\s*)
1086                             (<short>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/short>)?(\s*)
1087                             (<long>\s*(?:<!--[^>]*?-->\s*)?(.*?)\s*<\/long>)?(\s*)
1088                         <\/locale>)
1089                        //sx)
1090     {
1091         print OUTPUT $1;
1092
1093         my $locale_start_spaces = $2 ? $2 : '';
1094         my $default_spaces = $4 ? $4 : '';
1095         my $short_spaces = $7 ? $7 : '';
1096         my $long_spaces = $10 ? $10 : '';
1097         my $locale_end_spaces = $13 ? $13 : '';
1098         my $c_default_block = $3 ? $3 : '';
1099         my $default_string = $6 ? $6 : '';
1100         my $short_string = $9 ? $9 : '';
1101         my $long_string = $12 ? $12 : '';
1102
1103         print OUTPUT "$locale_start_spaces$c_default_block";
1104
1105         $default_string =~ s/\s+/ /g;
1106         $default_string = entity_decode($default_string);
1107         $short_string =~ s/\s+/ /g;
1108         $short_string = entity_decode($short_string);
1109         $long_string =~ s/\s+/ /g;
1110         $long_string = entity_decode($long_string);
1111
1112         for my $lang (sort keys %po_files_by_lang)
1113         {
1114             my $default_translation = $translations{$lang, $default_string};
1115             my $short_translation = $translations{$lang, $short_string};
1116             my $long_translation  = $translations{$lang, $long_string};
1117
1118             next if (!$default_translation && !$short_translation &&
1119                      !$long_translation);
1120
1121             print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1122
1123         print OUTPUT "$default_spaces";
1124
1125         if ($default_translation)
1126         {
1127             $default_translation = entity_encode($default_translation);
1128             print OUTPUT "<default>$default_translation</default>";
1129         }
1130
1131             print OUTPUT "$short_spaces";
1132
1133             if ($short_translation)
1134             {
1135                         $short_translation = entity_encode($short_translation);
1136                         print OUTPUT "<short>$short_translation</short>";
1137             }
1138
1139             print OUTPUT "$long_spaces";
1140
1141             if ($long_translation)
1142             {
1143                         $long_translation = entity_encode($long_translation);
1144                         print OUTPUT "<long>$long_translation</long>";
1145             }
1146
1147             print OUTPUT "$locale_end_spaces</locale>";
1148         }
1149     }
1150
1151     print OUTPUT $source;
1152
1153     close OUTPUT;
1154 }
1155
1156 sub rfc822deb_merge_translations
1157 {
1158     my %encodings = ();
1159     for my $lang (keys %po_files_by_lang) {
1160         $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1161     }
1162
1163     my $source;
1164
1165     $Text::Wrap::huge = 'overflow';
1166     $Text::Wrap::break = qr/\n|\s(?=\S)/;
1167
1168     {
1169        local $/; # slurp mode
1170        open INPUT, "<$FILE" or die "can't open $FILE: $!";
1171        $source = <INPUT>;
1172        close INPUT;
1173     }
1174
1175     open OUTPUT, ">${OUTFILE}" or die;
1176
1177     while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1178     {
1179             my $sep = $1;
1180             my $non_translated_line = $3.$4;
1181             my $string = $5;
1182             my $underscore = length($2);
1183             next if $underscore eq 0 && $non_translated_line =~ /^#/;
1184             #  Remove [] dummy strings
1185             my $stripped = $string;
1186             $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1187             $stripped =~ s/\[\s[^\[\]]*\]$//;
1188             $non_translated_line .= $stripped;
1189
1190             print OUTPUT $sep.$non_translated_line;
1191
1192             if ($underscore)
1193             {
1194                 my @str_list = rfc822deb_split($underscore, $string);
1195
1196                 for my $lang (sort keys %po_files_by_lang)
1197                 {
1198                     my $is_translated = 1;
1199                     my $str_translated = '';
1200                     my $first = 1;
1201
1202                     for my $str (@str_list)
1203                     {
1204                         my $translation = $translations{$lang, $str};
1205
1206                         if (!$translation)
1207                         {
1208                             $is_translated = 0;
1209                             last;
1210                         }
1211
1212                         #  $translation may also contain [] dummy
1213                         #  strings, mostly to indicate an empty string
1214                         $translation =~ s/\[\s[^\[\]]*\]$//;
1215
1216                         if ($first)
1217                         {
1218                             if ($underscore eq 2)
1219                             {
1220                                 $str_translated .= $translation;
1221                             }
1222                             else
1223                             {
1224                                 $str_translated .=
1225                                     Text::Tabs::expand($translation) .
1226                                     "\n";
1227                             }
1228                         }
1229                         else
1230                         {
1231                             if ($underscore eq 2)
1232                             {
1233                                 $str_translated .= ', ' . $translation;
1234                             }
1235                             else
1236                             {
1237                                 $str_translated .= Text::Tabs::expand(
1238                                     Text::Wrap::wrap(' ', ' ', $translation)) .
1239                                     "\n .\n";
1240                             }
1241                         }
1242                         $first = 0;
1243
1244                         #  To fix some problems with Text::Wrap::wrap
1245                         $str_translated =~ s/(\n )+\n/\n .\n/g;
1246                     }
1247                     next unless $is_translated;
1248
1249                     $str_translated =~ s/\n \.\n$//;
1250                     $str_translated =~ s/\s+$//;
1251
1252                     $_ = $non_translated_line;
1253                     s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1254                     print OUTPUT;
1255                 }
1256             }
1257     }
1258     print OUTPUT "\n";
1259
1260     close OUTPUT;
1261     close INPUT;
1262 }
1263
1264 sub rfc822deb_split
1265 {
1266     # Debian defines a special way to deal with rfc822-style files:
1267     # when a value contain newlines, it consists of
1268     #   1.  a short form (first line)
1269     #   2.  a long description, all lines begin with a space,
1270     #       and paragraphs are separated by a single dot on a line
1271     # This routine returns an array of all paragraphs, and reformat
1272     # them.
1273     # When first argument is 2, the string is a comma separated list of
1274     # values.
1275     my $type = shift;
1276     my $text = shift;
1277     $text =~ s/^[ \t]//mg;
1278     return (split(/, */, $text, 0)) if $type ne 1;
1279     return ($text) if $text !~ /\n/;
1280
1281     $text =~ s/([^\n]*)\n//;
1282     my @list = ($1);
1283     my $str = '';
1284
1285     for my $line (split (/\n/, $text))
1286     {
1287         chomp $line;
1288         if ($line =~ /^\.\s*$/)
1289         {
1290             #  New paragraph
1291             $str =~ s/\s*$//;
1292             push(@list, $str);
1293             $str = '';
1294         }
1295         elsif ($line =~ /^\s/)
1296         {
1297             #  Line which must not be reformatted
1298             $str .= "\n" if length ($str) && $str !~ /\n$/;
1299             $line =~ s/\s+$//;
1300             $str .= $line."\n";
1301         }
1302         else
1303         {
1304             #  Continuation line, remove newline
1305             $str .= " " if length ($str) && $str !~ /\n$/;
1306             $str .= $line;
1307         }
1308     }
1309
1310     $str =~ s/\s*$//;
1311     push(@list, $str) if length ($str);
1312
1313     return @list;
1314 }
1315