skim/admin/intltool-merge.in

   1 #!@INTLTOOL_PERL@ -w
   2
   3 #
   4 #  The Intltool Message Merger
   5 #
   6 #  Copyright (C) 2000, 2003 Free Software Foundation.
   7 #  Copyright (C) 2000, 2001 Eazel, Inc
   8 #
   9 #  Intltool is free software; you can redistribute it and/or
  10 #  modify it under the terms of the GNU General Public License
  11 #  version 2 published by the Free Software Foundation.
  12 #
  13 #  Intltool is distributed in the hope that it will be useful,
  14 #  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 #  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  16 #  General Public License for more details.
  17 #
  18 #  You should have received a copy of the GNU General Public License
  19 #  along with this program; if not, write to the Free Software
  20 #  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  21 #
  22 #  As a special exception to the GNU General Public License, if you
  23 #  distribute this file as part of a program that contains a
  24 #  configuration script generated by Autoconf, you may include it under
  25 #  the same distribution terms that you use for the rest of that program.
  26 #
  27 #  Authors:  Maciej Stachowiak <mjs@noisehavoc.org>
  28 #            Kenneth Christiansen <kenneth@gnu.org>
  29 #            Darin Adler <darin@bentspoon.com>
  30 #
  31 #  Proper XML UTF-8'ification written by Cyrille Chepelov <chepelov@calixo.net>
  32 #
  33
  34 ## Release information
  35 my $PROGRAM = "intltool-merge";
  36 my $PACKAGE = "intltool";
  37 my $VERSION = "0.30";
  38
  39 ## Loaded modules
  40 use strict;
  41 use Getopt::Long;
  42 use Text::Wrap;
  43 use File::Basename;
  44
  45 my $must_end_tag      = -1;
  46 my $last_depth        = -1;
  47 my $translation_depth = -1;
  48 my @tag_stack = ();
  49 my @entered_tag = ();
  50 my @translation_strings = ();
  51 my $leading_space = "";
  52
  53 ## Scalars used by the option stuff
  54 my $HELP_ARG = 0;
  55 my $VERSION_ARG = 0;
  56 my $BA_STYLE_ARG = 0;
  57 my $XML_STYLE_ARG = 0;
  58 my $KEYS_STYLE_ARG = 0;
  59 my $DESKTOP_STYLE_ARG = 0;
  60 my $SCHEMAS_STYLE_ARG = 0;
  61 my $RFC822DEB_STYLE_ARG = 0;
  62 my $QUIET_ARG = 0;
  63 my $PASS_THROUGH_ARG = 0;
  64 my $UTF8_ARG = 0;
  65 my $MULTIPLE_OUTPUT = 0;
  66 my $cache_file;
  67
  68 ## Handle options
  69 GetOptions
  70 (
  71  "help" => \$HELP_ARG,
  72  "version" => \$VERSION_ARG,
  73  "quiet|q" => \$QUIET_ARG,
  74  "oaf-style|o" => \$BA_STYLE_ARG, ## for compatibility
  75  "ba-style|b" => \$BA_STYLE_ARG,
  76  "xml-style|x" => \$XML_STYLE_ARG,
  77  "keys-style|k" => \$KEYS_STYLE_ARG,
  78  "desktop-style|d" => \$DESKTOP_STYLE_ARG,
  79  "schemas-style|s" => \$SCHEMAS_STYLE_ARG,
  80  "rfc822deb-style|r" => \$RFC822DEB_STYLE_ARG,
  81  "pass-through|p" => \$PASS_THROUGH_ARG,
  82  "utf8|u" => \$UTF8_ARG,
  83  "multiple-output|m" => \$MULTIPLE_OUTPUT,
  84  "cache|c=s" => \$cache_file
  85  ) or &error;
  86
  87 my $PO_DIR;
  88 my $FILE;
  89 my $OUTFILE;
  90
  91 my %po_files_by_lang = ();
  92 my %translations = ();
  93 my $iconv = $ENV{"INTLTOOL_ICONV"} || "iconv";
  94
  95 # Use this instead of \w for XML files to handle more possible characters.
  96 my $w = "[-A-Za-z0-9._:]";
  97
  98 # XML quoted string contents
  99 my $q = "[^\\\"]*";
 100
 101 ## Check for options.
 102
 103 if ($VERSION_ARG)
 104 {
 105         &print_version;
 106 }
 107 elsif ($HELP_ARG)
 108 {
 109         &print_help;
 110 }
 111 elsif ($BA_STYLE_ARG && @ARGV > 2)
 112 {
 113         &preparation;
 114         &print_message;
 115         &ba_merge_translations;
 116         &finalize;
 117 }
 118 elsif ($XML_STYLE_ARG && @ARGV > 2)
 119 {
 120         &utf8_sanity_check;
 121         &preparation;
 122         &print_message;
 123
 124     &xml_merge_output;
 125
 126         &finalize;
 127 }
 128 elsif ($KEYS_STYLE_ARG && @ARGV > 2)
 129 {
 130         &utf8_sanity_check;
 131         &preparation;
 132         &print_message;
 133         &keys_merge_translations;
 134         &finalize;
 135 }
 136 elsif ($DESKTOP_STYLE_ARG && @ARGV > 2)
 137 {
 138         &preparation;
 139         &print_message;
 140         &desktop_merge_translations;
 141         &finalize;
 142 }
 143 elsif ($SCHEMAS_STYLE_ARG && @ARGV > 2)
 144 {
 145         &preparation;
 146         &print_message;
 147         &schemas_merge_translations;
 148         &finalize;
 149 }
 150 elsif ($RFC822DEB_STYLE_ARG && @ARGV > 2)
 151 {
 152         &preparation;
 153         &print_message;
 154         &rfc822deb_merge_translations;
 155         &finalize;
 156 }
 157 else
 158 {
 159         &print_help;
 160 }
 161
 162 exit;
 163
 164 ## Sub for printing release information
 165 sub print_version
 166 {
 167     print <<_EOF_;
 168 ${PROGRAM} (${PACKAGE}) ${VERSION}
 169 Written by Maciej Stachowiak, Darin Adler and Kenneth Christiansen.
 170
 171 Copyright (C) 2000-2003 Free Software Foundation, Inc.
 172 Copyright (C) 2000-2001 Eazel, Inc.
 173 This is free software; see the source for copying conditions.  There is NO
 174 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
 175 _EOF_
 176     exit;
 177 }
 178
 179 ## Sub for printing usage information
 180 sub print_help
 181 {
 182     print <<_EOF_;
 183 Usage: ${PROGRAM} [OPTION]... PO_DIRECTORY FILENAME OUTPUT_FILE
 184 Generates an output file that includes some localized attributes from an
 185 untranslated source file.
 186
 187 Mandatory options: (exactly one must be specified)
 188   -b, --ba-style         includes translations in the bonobo-activation style
 189   -d, --desktop-style    includes translations in the desktop style
 190   -k, --keys-style       includes translations in the keys style
 191   -s, --schemas-style    includes translations in the schemas style
 192   -r, --rfc822deb-style  includes translations in the RFC822 style
 193   -x, --xml-style        includes translations in the standard xml style
 194
 195 Other options:
 196   -u, --utf8             convert all strings to UTF-8 before merging
 197   -p, --pass-through     use strings as found in .po files, without
 198                          conversion (STRONGLY unrecommended with -x)
 199   -m, --multiple-output  output one localized file per locale, instead of
 200                          a single file containing all localized elements
 201   -c, --cache=FILE       specify cache file name
 202                          (usually \$top_builddir/po/.intltool-merge-cache)
 203   -q, --quiet            suppress most messages
 204       --help             display this help and exit
 205       --version          output version information and exit
 206
 207 Report bugs to http://bugzilla.gnome.org/ (product name "$PACKAGE")
 208 or send email to <xml-i18n-tools\@gnome.org>.
 209 _EOF_
 210     exit;
 211 }
 212
 213
 214 ## Sub for printing error messages
 215 sub print_error
 216 {
 217     print STDERR "Try `${PROGRAM} --help' for more information.\n";
 218     exit;
 219 }
 220
 221
 222 sub print_message
 223 {
 224     print "Merging translations into $OUTFILE.\n" unless $QUIET_ARG;
 225 }
 226
 227
 228 sub preparation
 229 {
 230     $PO_DIR = $ARGV[0];
 231     $FILE = $ARGV[1];
 232     $OUTFILE = $ARGV[2];
 233
 234     &gather_po_files;
 235     &get_translation_database;
 236 }
 237
 238 # General-purpose code for looking up translations in .po files
 239
 240 sub po_file2lang
 241 {
 242     my ($tmp) = @_;
 243     $tmp =~ s/^.*\/(.*)\.po$/$1/;
 244     return $tmp;
 245 }
 246
 247 sub gather_po_files
 248 {
 249     for my $po_file (glob "$PO_DIR/*.po") {
 250         $po_files_by_lang{po_file2lang($po_file)} = $po_file;
 251     }
 252 }
 253
 254 sub get_local_charset
 255 {
 256     my ($encoding) = @_;
 257     my $alias_file = $ENV{"G_CHARSET_ALIAS"} || "/usr/lib/charset.alias";
 258
 259     # seek character encoding aliases in charset.alias (glib)
 260
 261     if (open CHARSET_ALIAS, $alias_file)
 262     {
 263         while (<CHARSET_ALIAS>)
 264         {
 265             next if /^\#/;
 266             return $1 if (/^\s*([-._a-zA-Z0-9]+)\s+$encoding\b/i)
 267         }
 268
 269         close CHARSET_ALIAS;
 270     }
 271
 272     # if not found, return input string
 273
 274     return $encoding;
 275 }
 276
 277 sub get_po_encoding
 278 {
 279     my ($in_po_file) = @_;
 280     my $encoding = "";
 281
 282     open IN_PO_FILE, $in_po_file or die;
 283     while (<IN_PO_FILE>)
 284     {
 285         ## example: "Content-Type: text/plain; charset=ISO-8859-1\n"
 286         if (/Content-Type\:.*charset=([-a-zA-Z0-9]+)\\n/)
 287         {
 288             $encoding = $1;
 289             last;
 290         }
 291     }
 292     close IN_PO_FILE;
 293
 294     if (!$encoding)
 295     {
 296         print STDERR "Warning: no encoding found in $in_po_file. Assuming ISO-8859-1\n" unless $QUIET_ARG;
 297         $encoding = "ISO-8859-1";
 298     }
 299
 300     system ("$iconv -f $encoding -t UTF-8 </dev/null 2>/dev/null");
 301     if ($?) {
 302         $encoding = get_local_charset($encoding);
 303     }
 304
 305     return $encoding
 306 }
 307
 308 sub utf8_sanity_check
 309 {
 310     if (!$UTF8_ARG)
 311     {
 312         if (!$PASS_THROUGH_ARG)
 313         {
 314             $PASS_THROUGH_ARG="1";
 315         }
 316     }
 317 }
 318
 319 sub get_translation_database
 320 {
 321     if ($cache_file) {
 322         &get_cached_translation_database;
 323     } else {
 324         &create_translation_database;
 325     }
 326 }
 327
 328 sub get_newest_po_age
 329 {
 330     my $newest_age;
 331
 332     foreach my $file (values %po_files_by_lang)
 333     {
 334         my $file_age = -M $file;
 335         $newest_age = $file_age if !$newest_age || $file_age < $newest_age;
 336     }
 337
 338     $newest_age = 0 if !$newest_age;
 339
 340     return $newest_age;
 341 }
 342
 343 sub create_cache
 344 {
 345     print "Generating and caching the translation database\n" unless $QUIET_ARG;
 346
 347     &create_translation_database;
 348
 349     open CACHE, ">$cache_file" || die;
 350     print CACHE join "\x01", %translations;
 351     close CACHE;
 352 }
 353
 354 sub load_cache
 355 {
 356     print "Found cached translation database\n" unless $QUIET_ARG;
 357
 358     my $contents;
 359     open CACHE, "<$cache_file" || die;
 360     {
 361         local $/;
 362         $contents = <CACHE>;
 363     }
 364     close CACHE;
 365     %translations = split "\x01", $contents;
 366 }
 367
 368 sub get_cached_translation_database
 369 {
 370     my $cache_file_age = -M $cache_file;
 371     if (defined $cache_file_age)
 372     {
 373         if ($cache_file_age <= &get_newest_po_age)
 374         {
 375             &load_cache;
 376             return;
 377         }
 378         print "Found too-old cached translation database\n" unless $QUIET_ARG;
 379     }
 380
 381     &create_cache;
 382 }
 383
 384 sub create_translation_database
 385 {
 386     for my $lang (keys %po_files_by_lang)
 387     {
 388         my $po_file = $po_files_by_lang{$lang};
 389
 390         if ($UTF8_ARG)
 391         {
 392             my $encoding = get_po_encoding ($po_file);
 393
 394             if (lc $encoding eq "utf-8")
 395             {
 396                 open PO_FILE, "<$po_file";
 397             }
 398             else
 399             {
 400                 print STDERR "WARNING: $po_file is not in UTF-8 but $encoding, converting...\n" unless $QUIET_ARG;;
 401
 402                 open PO_FILE, "$iconv -f $encoding -t UTF-8 $po_file|";
 403             }
 404         }
 405         else
 406         {
 407             open PO_FILE, "<$po_file";
 408         }
 409
 410         my $nextfuzzy = 0;
 411         my $inmsgid = 0;
 412         my $inmsgstr = 0;
 413         my $msgid = "";
 414         my $msgstr = "";
 415
 416         while (<PO_FILE>)
 417         {
 418             $nextfuzzy = 1 if /^#, fuzzy/;
 419
 420             if (/^msgid "((\\.|[^\\])*)"/ )
 421             {
 422                 $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 423                 $msgid = "";
 424                 $msgstr = "";
 425
 426                 if ($nextfuzzy) {
 427                     $inmsgid = 0;
 428                 } else {
 429                     $msgid = unescape_po_string($1);
 430                     $inmsgid = 1;
 431                 }
 432                 $inmsgstr = 0;
 433                 $nextfuzzy = 0;
 434             }
 435
 436             if (/^msgstr "((\\.|[^\\])*)"/)
 437             {
 438                 $msgstr = unescape_po_string($1);
 439                 $inmsgstr = 1;
 440                 $inmsgid = 0;
 441             }
 442
 443             if (/^"((\\.|[^\\])*)"/)
 444             {
 445                 $msgid .= unescape_po_string($1) if $inmsgid;
 446                 $msgstr .= unescape_po_string($1) if $inmsgstr;
 447             }
 448         }
 449         $translations{$lang, $msgid} = $msgstr if $inmsgstr && $msgid && $msgstr;
 450     }
 451 }
 452
 453 sub finalize
 454 {
 455 }
 456
 457 sub unescape_one_sequence
 458 {
 459     my ($sequence) = @_;
 460
 461     return "\\" if $sequence eq "\\\\";
 462     return "\"" if $sequence eq "\\\"";
 463     return "\n" if $sequence eq "\\n";
 464
 465     # gettext also handles \n, \t, \b, \r, \f, \v, \a, \xxx (octal),
 466     # \xXX (hex) and has a comment saying they want to handle \u and \U.
 467
 468     return $sequence;
 469 }
 470
 471 sub unescape_po_string
 472 {
 473     my ($string) = @_;
 474
 475     $string =~ s/(\\.)/unescape_one_sequence($1)/eg;
 476
 477     return $string;
 478 }
 479
 480 ## NOTE: deal with < - &lt; but not > - &gt;  because it seems its ok to have
 481 ## > in the entity. For further info please look at #84738.
 482 sub entity_decode
 483 {
 484     local ($_) = @_;
 485
 486     s/&apos;/'/g; # '
 487     s/&quot;/"/g; # "
 488     s/&amp;/&/g;
 489     s/&lt;/</g;
 490
 491     return $_;
 492 }
 493
 494 # entity_encode: (string)
 495 #
 496 # Encode the given string to XML format (encode '<' etc). It also
 497 # encodes high bit if not in UTF-8 mode.
 498
 499 sub entity_encode
 500 {
 501     my ($pre_encoded) = @_;
 502
 503     my @list_of_chars = unpack ('C*', $pre_encoded);
 504
 505     if ($PASS_THROUGH_ARG)
 506     {
 507         return join ('', map (&entity_encode_int_even_high_bit, @list_of_chars));
 508     }
 509     else
 510     {
 511         # with UTF-8 we only encode minimalistic
 512         return join ('', map (&entity_encode_int_minimalist, @list_of_chars));
 513     }
 514 }
 515
 516 sub entity_encode_int_minimalist
 517 {
 518     return "&quot;" if $_ == 34;
 519     return "&amp;" if $_ == 38;
 520     return "&apos;" if $_ == 39;
 521     return "&lt;" if $_ == 60;
 522     return chr $_;
 523 }
 524
 525 sub entity_encode_int_even_high_bit
 526 {
 527     if ($_ > 127 || $_ == 34 || $_ == 38 || $_ == 39 || $_ == 60)
 528     {
 529         # the ($_ > 127) should probably be removed
 530         return "&#" . $_ . ";";
 531     }
 532     else
 533     {
 534         return chr $_;
 535     }
 536 }
 537
 538 sub entity_encoded_translation
 539 {
 540     my ($lang, $string) = @_;
 541
 542     my $translation = $translations{$lang, $string};
 543     return $string if !$translation;
 544     return entity_encode ($translation);
 545 }
 546
 547 ## XML (bonobo-activation specific) merge code
 548
 549 sub ba_merge_translations
 550 {
 551     my $source;
 552
 553     {
 554        local $/; # slurp mode
 555        open INPUT, "<$FILE" or die "can't open $FILE: $!";
 556        $source = <INPUT>;
 557        close INPUT;
 558     }
 559
 560     open OUTPUT, ">$OUTFILE" or die "can't open $OUTFILE: $!";
 561
 562     while ($source =~ s|^(.*?)([ \t]*<\s*$w+\s+($w+\s*=\s*"$q"\s*)+/?>)([ \t]*\n)?||s)
 563     {
 564         print OUTPUT $1;
 565
 566         my $node = $2 . "\n";
 567
 568         my @strings = ();
 569         $_ = $node;
 570         while (s/(\s)_($w+\s*=\s*"($q)")/$1$2/s) {
 571              push @strings, entity_decode($3);
 572         }
 573         print OUTPUT;
 574
 575         my %langs;
 576         for my $string (@strings)
 577         {
 578             for my $lang (keys %po_files_by_lang)
 579             {
 580                 $langs{$lang} = 1 if $translations{$lang, $string};
 581             }
 582         }
 583
 584         for my $lang (sort keys %langs)
 585         {
 586             $_ = $node;
 587             s/(\sname\s*=\s*)"($q)"/$1"$2-$lang"/s;
 588             s/(\s)_($w+\s*=\s*")($q)"/$1 . $2 . entity_encoded_translation($lang, $3) . '"'/seg;
 589             print OUTPUT;
 590         }
 591     }
 592
 593     print OUTPUT $source;
 594
 595     close OUTPUT;
 596 }
 597
 598
 599 ## XML (non-bonobo-activation) merge code
 600
 601 sub parseTree
 602 {
 603     my $fh        = shift;
 604     my $ref       = shift;
 605     my $depth     = shift || 0;
 606     my $language  = shift || "";
 607     my $not_cdata = 0;
 608     my $has_children;
 609
 610     $entered_tag[$depth + 1] = 0;
 611
 612     # Check to see if this is not a CDATA element.
 613     #
 614     foreach my $sub (@{ $ref }) {
 615         if (ref $sub eq 'ARRAY') {
 616             $not_cdata = 1;
 617         }
 618     }
 619
 620     foreach my $sub (@{ $ref }) {
 621
 622         # Handle empty nodes.
 623         #
 624         if (! $sub) {
 625             next;
 626         }
 627
 628         if (ref $sub eq 'ARRAY') {
 629
 630             # Process subnodes
 631             #
 632             $has_children = 0;
 633
 634             # Check to see if current tag has any elements that need to be translated.
 635             #
 636             if ($translation_depth == -1) {
 637                 foreach my $subsub (@{ $sub } ) {
 638                     if (ref $subsub eq 'HASH') {
 639                         foreach my $e (reverse(keys %{ $subsub })) {
 640                             if ($e =~ /^_/) {
 641                                 $translation_depth = $depth;
 642                             }
 643                         }
 644                     } elsif (ref $subsub eq 'ARRAY') {
 645                         $has_children = 1;
 646                     }
 647                 }
 648             }
 649
 650             my $current_tag = pop(@tag_stack);
 651             push @tag_stack, $current_tag;
 652             @translation_strings = ();
 653
 654             $must_end_tag = $depth;
 655             print $fh "<", $current_tag;
 656
 657             parseTree($fh, $sub, $depth + 1, $language);
 658
 659             # Close any open tags
 660             #
 661             if ($must_end_tag != -1) {
 662                 if ($must_end_tag < $depth) {
 663                     print $fh ">";
 664                 } else {
 665                     print $fh " />";
 666                         pop(@tag_stack);
 667                     if ($depth == $translation_depth) {
 668                         $translation_depth = -1;
 669                     }
 670                 }
 671                 $must_end_tag = -1;
 672             }
 673
 674             # Add ending tag(s), if needed
 675             #
 676             if ($entered_tag[$depth + 1] == 1) {
 677                 while ($last_depth > $depth) {
 678                     $last_depth--;
 679
 680                     print $fh "</", pop(@tag_stack), ">";
 681                 }
 682             }
 683             $last_depth = $depth;
 684
 685             # If beginning a translation block, then process for each language.
 686             #
 687             if ($translation_depth == $depth) {
 688                 my $do_language;
 689
 690                 # Skip languages that do not have translations.  Since we only
 691                 # do this check when $translation_depth == $depth, it will never
 692                 # happen for nodes inside a node with a translated element.
 693                 #
 694                 for my $lang (sort keys %po_files_by_lang) {
 695                     if ($has_children == 1) {
 696                         $do_language = 1;
 697                     } else {
 698                         # Skip this language if there is no translation
 699                         #
 700                         $do_language = 0;
 701                         foreach my $string (@translation_strings) {
 702                             my $decode_string = entity_decode($string);
 703                             my $translation = $translations{$lang, $decode_string};
 704                             if ($translation) {
 705                                 $do_language = 1;
 706                                 last;
 707                             }
 708                         }
 709                     }
 710
 711                     if ($do_language == 0) {
 712                        next;
 713                     }
 714
 715                     print $fh "\n";
 716                     $leading_space =~ s/.*\n//g;
 717                     print $fh $leading_space;
 718                     if ($MULTIPLE_OUTPUT && $lang ne "$language") {
 719                         next;
 720                     }
 721
 722                     print $fh "<", $current_tag, " xml:lang=\"", $lang, "\"";
 723                     $must_end_tag = $depth;
 724
 725                     parseTree($fh, $sub, $depth + 1, $lang);
 726
 727                     # Close any open tags
 728                     #
 729                     if ($must_end_tag != -1) {
 730                         if ($must_end_tag < $depth) {
 731                             print $fh ">";
 732                         } else {
 733                             print $fh " />";
 734                             pop(@tag_stack);
 735                             if ($depth == $translation_depth) {
 736                                 $translation_depth = -1;
 737                             }
 738                         }
 739                         $must_end_tag = -1;
 740                     }
 741
 742                     # Add ending tag(s), if needed
 743                     #
 744                     if ($entered_tag[$depth + 1] == 1) {
 745                         while ($last_depth > $depth + 1) {
 746                             $last_depth--;
 747
 748                             print $fh "</", pop(@tag_stack), ">";
 749                         }
 750                         print $fh "</", $current_tag, ">";
 751                     }
 752                 }
 753                 $translation_depth = -1;
 754                 $last_depth = $depth;
 755             }
 756             $leading_space = "";
 757
 758         } elsif (ref $sub eq 'HASH') {
 759
 760             # Process tag elements
 761             #
 762             foreach my $e (reverse(keys %{ $sub })) {
 763                 my $key    = $e;
 764                 my $string = $sub->{$e};
 765                 my $quote = '"';
 766
 767                 $string =~ s/^[\s]+//;
 768                 $string =~ s/[\s]+$//;
 769
 770                 if ($string =~ /^'.*'$/)
 771                 {
 772                    $quote = "'";
 773                 }
 774                 $string =~ s/^['"]//g;
 775                 $string =~ s/['"]$//g;
 776
 777                 if ($key =~ /^_/) {
 778                     $key =~ s|^_||g;
 779
 780                     if ($language) {
 781
 782                         # Handle translation
 783                         #
 784                         my $decode_string = entity_decode($string);
 785                         my $translation = $translations{$language, $decode_string};
 786                         if ($translation) {
 787                             $translation = entity_encode($translation);
 788                             $string = $translation;
 789                         }
 790                     }
 791                 }
 792
 793                 print $fh " $key=$quote$string$quote";
 794             }
 795
 796         } else {
 797
 798             # Handle tags and CDATA values
 799
 800             # Mark parent tag as having been entered.
 801             #
 802             $entered_tag[$depth] = 1;
 803
 804             # The last_depth flag allows us to determine if this tag should be
 805             # closed with "/>" or ">"
 806             #
 807             $last_depth = $depth;
 808
 809             # Close any open tags
 810             #
 811             if ($must_end_tag != -1) {
 812                 if ($must_end_tag < $depth) {
 813                     print $fh ">";
 814                 } else {
 815                     print $fh " />";
 816                     pop(@tag_stack);
 817                 }
 818                 $must_end_tag = -1;
 819             }
 820
 821             if ($sub =~ /^[\s]*$/) {
 822                 $leading_space .= $sub;
 823                 print $fh $sub;
 824             } elsif ($not_cdata) {
 825
 826                 # Handle tags
 827                 #
 828                 my $temp_tag = $sub;
 829
 830                 # Display key
 831                 #
 832                 if ($sub =~ /^_/) {
 833
 834                     $temp_tag =~ s|^_||g;
 835
 836                     if ($translation_depth == -1) {
 837                         $translation_depth = $depth;
 838                     }
 839                 }
 840
 841                 # Push the tag on the stack, it will get handled when the ARRAY
 842                 # for this tag is processed.
 843                 #
 844                 push(@tag_stack, $temp_tag);
 845
 846             } else {
 847
 848                 # Handle CDATA
 849                 #
 850                 my $string = $sub;
 851
 852                 $string =~ s/^[\s]+//;
 853                 $string =~ s/[\s]+$//;
 854
 855                 push(@translation_strings, $string);
 856
 857                 # Display CDATA
 858                 #
 859                 if ($language) {
 860
 861                     # Handle translation
 862                     #
 863                     my $decode_string = entity_decode($string);
 864                     my $translation = $translations{$language, $decode_string};
 865                     if ($translation) {
 866                         $translation = entity_encode($translation);
 867                         $string = $translation;
 868                     }
 869                 }
 870
 871                 print $fh $string;
 872             }
 873         }
 874     }
 875 }
 876
 877 sub intltool_tree_char
 878 {
 879     my $expat = shift;
 880     my $text  = shift;
 881     my $clist = $expat->{Curlist};
 882     my $pos   = $#$clist;
 883
 884     # Use original_string so that we retain escaped entities
 885     # in CDATA sections.
 886     #
 887     if ($pos > 0 and $clist->[$pos - 1] eq '0') {
 888         $clist->[$pos] .= $expat->original_string();
 889     } else {
 890         push @$clist, 0 => $expat->original_string();
 891     }
 892 }
 893
 894 sub intltool_tree_start
 895 {
 896     my $expat    = shift;
 897     my $tag      = shift;
 898     my @origlist = ();
 899
 900     # Use original_string so that we retain escaped entities
 901     # in attribute values.  We must convert the string to an
 902     # @origlist array to conform to the structure of the Tree
 903     # Style.
 904     #
 905     my @original_array = split /\x/, $expat->original_string();
 906     my $source         = $expat->original_string();
 907
 908     # Remove leading tag.
 909     #
 910     $source =~ s|^\s*<\s*(\S+)||s;
 911
 912     # Grab attribute key/value pairs and push onto @origlist array.
 913     #
 914     while ($source)
 915     {
 916        if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
 917        {
 918            $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
 919            push @origlist, $1;
 920            push @origlist, '"' . $2 . '"';
 921        }
 922        elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
 923        {
 924            $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
 925            push @origlist, $1;
 926            push @origlist, "'" . $2 . "'";
 927        }
 928        else
 929        {
 930            last;
 931        }
 932     }
 933
 934     my $ol = [ { @origlist } ];
 935
 936     push @{ $expat->{Lists} }, $expat->{Curlist};
 937     push @{ $expat->{Curlist} }, $tag => $ol;
 938     $expat->{Curlist} = $ol;
 939 }
 940
 941 sub readXml
 942 {
 943     my $filename = shift || return;
 944     if(!-f $filename) {
 945         die "ERROR Cannot find filename: $filename\n";
 946     }
 947
 948     my $ret = eval 'require XML::Parser';
 949     if(!$ret) {
 950         die "You must have XML::Parser installed to run $0\n\n";
 951     }
 952     my $xp = new XML::Parser(Style => 'Tree');
 953     $xp->setHandlers(Char => \&intltool_tree_char);
 954     $xp->setHandlers(Start => \&intltool_tree_start);
 955     my $tree = $xp->parsefile($filename);
 956
 957 # <foo><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
 958 # would be:
 959 # [foo, [{}, head, [{id => "a"}, 0, "Hello ",  em, [{}, 0, "there"]], bar, [{},
 960 # 0, "Howdy",  ref, [{}]], 0, "do" ] ]
 961
 962     return $tree;
 963 }
 964
 965 sub print_header
 966 {
 967     my $infile = shift;
 968     my $fh = shift;
 969     my $source;
 970
 971     if(!-f $infile) {
 972         die "ERROR Cannot find filename: $infile\n";
 973     }
 974
 975     print $fh qq{<?xml version="1.0" encoding="UTF-8"?>\n};
 976     {
 977         local $/;
 978         open DOCINPUT, "<${FILE}" or die;
 979         $source = <DOCINPUT>;
 980         close DOCINPUT;
 981     }
 982     if ($source =~ /(<!DOCTYPE.*\[.*\]\s*>)/s)
 983     {
 984         print $fh "$1\n";
 985     }
 986     elsif ($source =~ /(<!DOCTYPE[^>]*>)/s)
 987     {
 988         print $fh "$1\n";
 989     }
 990 }
 991
 992 sub xml_merge_output
 993 {
 994     my $source;
 995
 996     if ($MULTIPLE_OUTPUT) {
 997         for my $lang (sort keys %po_files_by_lang) {
 998             if ( ! -e $lang ) {
 999                 mkdir $lang or die "Cannot create subdirectory $lang: $!\n";
1000             }
1001             open OUTPUT, ">$lang/$OUTFILE" or die "Cannot open $lang/$OUTFILE: $!\n";
1002             my $tree = readXml($FILE);
1003             print_header($FILE, \*OUTPUT);
1004             parseTree(\*OUTPUT, $tree, 0, $lang);
1005             close OUTPUT;
1006             print "CREATED $lang/$OUTFILE\n" unless $QUIET_ARG;
1007         }
1008     }
1009     open OUTPUT, ">$OUTFILE" or die "Cannot open $OUTFILE: $!\n";
1010     my $tree = readXml($FILE);
1011     print_header($FILE, \*OUTPUT);
1012     parseTree(\*OUTPUT, $tree, 0);
1013     close OUTPUT;
1014     print "CREATED $OUTFILE\n" unless $QUIET_ARG;
1015 }
1016
1017 sub keys_merge_translations
1018 {
1019     open INPUT, "<${FILE}" or die;
1020     open OUTPUT, ">${OUTFILE}" or die;
1021
1022     while (<INPUT>)
1023     {
1024         if (s/^(\s*)_(\w+=(.*))/$1$2/)
1025         {
1026             my $string = $3;
1027
1028             print OUTPUT;
1029
1030             my $non_translated_line = $_;
1031
1032             for my $lang (sort keys %po_files_by_lang)
1033             {
1034                 my $translation = $translations{$lang, $string};
1035                 next if !$translation;
1036
1037                 $_ = $non_translated_line;
1038                 s/(\w+)=.*/[$lang]$1=$translation/;
1039                 print OUTPUT;
1040             }
1041         }
1042         else
1043         {
1044             print OUTPUT;
1045         }
1046     }
1047
1048     close OUTPUT;
1049     close INPUT;
1050 }
1051
1052 sub desktop_merge_translations
1053 {
1054     open INPUT, "<${FILE}" or die;
1055     open OUTPUT, ">${OUTFILE}" or die;
1056
1057     while (<INPUT>)
1058     {
1059         if (s/^(\s*)_(\w+=(.*))/$1$2/)
1060         {
1061             my $string = $3;
1062
1063             print OUTPUT;
1064
1065             my $non_translated_line = $_;
1066
1067             for my $lang (sort keys %po_files_by_lang)
1068             {
1069                 my $translation = $translations{$lang, $string};
1070                 next if !$translation;
1071
1072                 $_ = $non_translated_line;
1073                 s/(\w+)=.*/${1}[$lang]=$translation/;
1074                 print OUTPUT;
1075             }
1076         }
1077         else
1078         {
1079             print OUTPUT;
1080         }
1081     }
1082
1083     close OUTPUT;
1084     close INPUT;
1085 }
1086
1087 sub schemas_merge_translations
1088 {
1089     my $source;
1090
1091     {
1092        local $/; # slurp mode
1093        open INPUT, "<$FILE" or die "can't open $FILE: $!";
1094        $source = <INPUT>;
1095        close INPUT;
1096     }
1097
1098     open OUTPUT, ">$OUTFILE" or die;
1099
1100     # FIXME: support attribute translations
1101
1102     # Empty nodes never need translation, so unmark all of them.
1103     # For example, <_foo/> is just replaced by <foo/>.
1104     $source =~ s|<\s*_($w+)\s*/>|<$1/>|g;
1105
1106     while ($source =~ s/
1107                         (.*?)
1108                         (\s+)(<locale\ name="C">(\s*)
1109                             (<default>\s*(.*?)\s*<\/default>)?(\s*)
1110                             (<short>\s*(.*?)\s*<\/short>)?(\s*)
1111                             (<long>\s*(.*?)\s*<\/long>)?(\s*)
1112                         <\/locale>)
1113                        //sx)
1114     {
1115         print OUTPUT $1;
1116
1117         my $locale_start_spaces = $2 ? $2 : '';
1118         my $default_spaces = $4 ? $4 : '';
1119         my $short_spaces = $7 ? $7 : '';
1120         my $long_spaces = $10 ? $10 : '';
1121         my $locale_end_spaces = $13 ? $13 : '';
1122         my $c_default_block = $3 ? $3 : '';
1123         my $default_string = $6 ? $6 : '';
1124         my $short_string = $9 ? $9 : '';
1125         my $long_string = $12 ? $12 : '';
1126
1127         $c_default_block =~ s/default>\[.*?\]/default>/s;
1128
1129         print OUTPUT "$locale_start_spaces$c_default_block";
1130
1131         $default_string =~ s/\s+/ /g;
1132         $default_string = entity_decode($default_string);
1133         $short_string =~ s/\s+/ /g;
1134         $short_string = entity_decode($short_string);
1135         $long_string =~ s/\s+/ /g;
1136         $long_string = entity_decode($long_string);
1137
1138         for my $lang (sort keys %po_files_by_lang)
1139         {
1140             my $default_translation = $translations{$lang, $default_string};
1141             my $short_translation = $translations{$lang, $short_string};
1142             my $long_translation  = $translations{$lang, $long_string};
1143
1144             next if (!$default_translation && !$short_translation &&
1145                      !$long_translation);
1146
1147             print OUTPUT "\n$locale_start_spaces<locale name=\"$lang\">";
1148
1149         print OUTPUT "$default_spaces";
1150
1151         if ($default_translation)
1152         {
1153             $default_translation = entity_encode($default_translation);
1154             print OUTPUT "<default>$default_translation</default>";
1155         }
1156
1157             print OUTPUT "$short_spaces";
1158
1159             if ($short_translation)
1160             {
1161                         $short_translation = entity_encode($short_translation);
1162                         print OUTPUT "<short>$short_translation</short>";
1163             }
1164
1165             print OUTPUT "$long_spaces";
1166
1167             if ($long_translation)
1168             {
1169                         $long_translation = entity_encode($long_translation);
1170                         print OUTPUT "<long>$long_translation</long>";
1171             }
1172
1173             print OUTPUT "$locale_end_spaces</locale>";
1174         }
1175     }
1176
1177     print OUTPUT $source;
1178
1179     close OUTPUT;
1180 }
1181
1182 sub rfc822deb_merge_translations
1183 {
1184     my %encodings = ();
1185     for my $lang (keys %po_files_by_lang) {
1186         $encodings{$lang} = ($UTF8_ARG ? 'UTF-8' : get_po_encoding($po_files_by_lang{$lang}));
1187     }
1188
1189     my $source;
1190
1191     $Text::Wrap::huge = 'overflow';
1192     $Text::Wrap::break = qr/\n|\s(?=\S)/;
1193
1194     {
1195        local $/; # slurp mode
1196        open INPUT, "<$FILE" or die "can't open $FILE: $!";
1197        $source = <INPUT>;
1198        close INPUT;
1199     }
1200
1201     open OUTPUT, ">${OUTFILE}" or die;
1202
1203     while ($source =~ /(^|\n+)(_*)([^:\s]+)(:[ \t]*)(.*?)(?=\n[\S\n]|$)/sg)
1204     {
1205             my $sep = $1;
1206             my $non_translated_line = $3.$4;
1207             my $string = $5;
1208             my $underscore = length($2);
1209             next if $underscore eq 0 && $non_translated_line =~ /^#/;
1210             #  Remove [] dummy strings
1211             my $stripped = $string;
1212             $stripped =~ s/\[\s[^\[\]]*\],/,/g if $underscore eq 2;
1213             $stripped =~ s/\[\s[^\[\]]*\]$//;
1214             $non_translated_line .= $stripped;
1215
1216             print OUTPUT $sep.$non_translated_line;
1217
1218             if ($underscore)
1219             {
1220                 my @str_list = rfc822deb_split($underscore, $string);
1221
1222                 for my $lang (sort keys %po_files_by_lang)
1223                 {
1224                     my $is_translated = 1;
1225                     my $str_translated = '';
1226                     my $first = 1;
1227
1228                     for my $str (@str_list)
1229                     {
1230                         my $translation = $translations{$lang, $str};
1231
1232                         if (!$translation)
1233                         {
1234                             $is_translated = 0;
1235                             last;
1236                         }
1237
1238                         #  $translation may also contain [] dummy
1239                         #  strings, mostly to indicate an empty string
1240                         $translation =~ s/\[\s[^\[\]]*\]$//;
1241
1242                         if ($first)
1243                         {
1244                             if ($underscore eq 2)
1245                             {
1246                                 $str_translated .= $translation;
1247                             }
1248                             else
1249                             {
1250                                 $str_translated .=
1251                                     Text::Tabs::expand($translation) .
1252                                     "\n";
1253                             }
1254                         }
1255                         else
1256                         {
1257                             if ($underscore eq 2)
1258                             {
1259                                 $str_translated .= ', ' . $translation;
1260                             }
1261                             else
1262                             {
1263                                 $str_translated .= Text::Tabs::expand(
1264                                     Text::Wrap::wrap(' ', ' ', $translation)) .
1265                                     "\n .\n";
1266                             }
1267                         }
1268                         $first = 0;
1269
1270                         #  To fix some problems with Text::Wrap::wrap
1271                         $str_translated =~ s/(\n )+\n/\n .\n/g;
1272                     }
1273                     next unless $is_translated;
1274
1275                     $str_translated =~ s/\n \.\n$//;
1276                     $str_translated =~ s/\s+$//;
1277
1278                     $_ = $non_translated_line;
1279                     s/^(\w+):\s*.*/$sep${1}-$lang.$encodings{$lang}: $str_translated/s;
1280                     print OUTPUT;
1281                 }
1282             }
1283     }
1284     print OUTPUT "\n";
1285
1286     close OUTPUT;
1287     close INPUT;
1288 }
1289
1290 sub rfc822deb_split
1291 {
1292     # Debian defines a special way to deal with rfc822-style files:
1293     # when a value contain newlines, it consists of
1294     #   1.  a short form (first line)
1295     #   2.  a long description, all lines begin with a space,
1296     #       and paragraphs are separated by a single dot on a line
1297     # This routine returns an array of all paragraphs, and reformat
1298     # them.
1299     # When first argument is 2, the string is a comma separated list of
1300     # values.
1301     my $type = shift;
1302     my $text = shift;
1303     $text =~ s/^[ \t]//mg;
1304     return (split(/, */, $text, 0)) if $type ne 1;
1305     return ($text) if $text !~ /\n/;
1306
1307     $text =~ s/([^\n]*)\n//;
1308     my @list = ($1);
1309     my $str = '';
1310
1311     for my $line (split (/\n/, $text))
1312     {
1313         chomp $line;
1314         if ($line =~ /^\.\s*$/)
1315         {
1316             #  New paragraph
1317             $str =~ s/\s*$//;
1318             push(@list, $str);
1319             $str = '';
1320         }
1321         elsif ($line =~ /^\s/)
1322         {
1323             #  Line which must not be reformatted
1324             $str .= "\n" if length ($str) && $str !~ /\n$/;
1325             $line =~ s/\s+$//;
1326             $str .= $line."\n";
1327         }
1328         else
1329         {
1330             #  Continuation line, remove newline
1331             $str .= " " if length ($str) && $str !~ /\n$/;
1332             $str .= $line;
1333         }
1334     }
1335
1336     $str =~ s/\s*$//;
1337     push(@list, $str) if length ($str);
1338
1339     return @list;
1340 }
1341