2 # -*- Mode: perl; indent-tabs-mode: nil; c-basic-offset: 4 -*-
5 # The Intltool Message Extractor
7 # Copyright (C) 2000-2001, 2003 Free Software Foundation.
9 # Intltool is free software; you can redistribute it and/or
10 # modify it under the terms of the GNU General Public License as
11 # published by the Free Software Foundation; either version 2 of the
12 # License, or (at your option) any later version.
14 # Intltool is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 # General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with this program; if not, write to the Free Software
21 # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23 # As a special exception to the GNU General Public License, if you
24 # distribute this file as part of a program that contains a
25 # configuration script generated by Autoconf, you may include it under
26 # the same distribution terms that you use for the rest of that program.
28 # Authors: Kenneth Christiansen <kenneth@gnu.org>
29 # Darin Adler <darin@bentspoon.com>
32 ## Release information
33 my $PROGRAM = "intltool-extract";
34 my $PACKAGE = "@PACKAGE@";
35 my $VERSION = "@VERSION@";
42 ## Scalars used by the option stuff
46 my $VERSION_ARG = "0";
50 my $NOMSGCTXT_ARG = "0";
55 my $gettext_type = "";
58 my @messages_sorted = ();
66 ## Use this instead of \w for XML files to handle more possible characters.
67 my $w = "[-A-Za-z0-9._:]";
74 "type=s" => \$TYPE_ARG,
75 "local|l" => \$LOCAL_ARG,
76 "help|h" => \$HELP_ARG,
77 "version|v" => \$VERSION_ARG,
78 "update" => \$UPDATE_ARG,
79 "quiet|q" => \$QUIET_ARG,
80 "srcdir=s" => \$SRCDIR_ARG,
81 "nomsgctxt" => \$NOMSGCTXT_ARG,
88 ## This section will check for the different options.
90 sub split_on_argument {
98 } elsif ($LOCAL_ARG) {
102 } elsif ($UPDATE_ARG) {
106 } elsif (@ARGV > 0) {
119 $OUTFILE = "$FILE.h";
121 my $dirname = dirname ($OUTFILE);
122 if (! -d "$dirname" && $dirname ne "") {
123 system ("mkdir -p $dirname");
129 $OUTFILE = fileparse($FILE, ());
131 system("mkdir tmp/");
133 $OUTFILE = "./tmp/$OUTFILE.h"
137 if ($TYPE_ARG =~ /^gettext\/(.*)/) {
142 ## Sub for printing release information
145 ${PROGRAM} (${PACKAGE}) $VERSION
146 Copyright (C) 2000, 2003 Free Software Foundation, Inc.
147 Written by Kenneth Christiansen, 2000.
149 This is free software; see the source for copying conditions. There is NO
150 warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
155 ## Sub for printing usage information
158 Usage: ${PROGRAM} [OPTION]... [FILENAME]
159 Generates a header file from an XML source file.
161 It grabs all strings between <_translatable_node> and its end tag in
162 XML files. Read manpage (man ${PROGRAM}) for more info.
164 --type=TYPE Specify the file type of FILENAME. Currently supports:
165 "gettext/glade", "gettext/ini", "gettext/keys"
166 "gettext/rfc822deb", "gettext/schemas",
167 "gettext/gsettings", "gettext/xml", "gettext/quoted",
168 "gettext/quotedxml", "gettext/tlk"
169 -l, --local Writes output into current working directory
170 (conflicts with --update)
171 --update Writes output into the same directory the source file
172 reside (conflicts with --local)
173 --srcdir Root of the source tree
174 -v, --version Output version information and exit
175 -h, --help Display this help and exit
176 -q, --quiet Quiet mode
178 Report bugs to http://bugs.launchpad.net/intltool
183 ## Sub for printing error messages
185 print STDERR "Try `${PROGRAM} --help' for more information.\n";
190 print "Generating C format header file for translation.\n" unless $QUIET_ARG;
198 open OUT, ">$OUTFILE";
199 binmode (OUT) if $^O eq 'MSWin32';
203 print "Wrote $OUTFILE\n" unless $QUIET_ARG;
211 local $/; #slurp mode
212 open (IN, "<$SRCDIR_ARG/$FILE") || die "can't open $SRCDIR_ARG/$FILE: $!";
218 &type_ini if $gettext_type eq "ini";
219 &type_keys if $gettext_type eq "keys";
220 &type_xml if $gettext_type eq "xml";
221 &type_glade if $gettext_type eq "glade";
222 &type_gsettings if $gettext_type eq "gsettings";
223 &type_schemas if $gettext_type eq "schemas";
224 &type_rfc822deb if $gettext_type eq "rfc822deb";
225 &type_quoted if $gettext_type eq "quoted";
226 &type_quotedxml if $gettext_type eq "quotedxml";
227 &type_tlk if $gettext_type eq "tlk";
230 sub entity_decode_minimal
256 return '\"' if $_ eq '"';
257 return '\n' if $_ eq "\n";
258 return '\\\\' if $_ eq '\\';
266 return join "", map &escape_char, split //, $string;
272 push @messages_sorted, $string if !defined $messages{$string};
273 $messages{$string} = [];
277 ### For generic translatable desktop files ###
278 while ($input =~ /^(#(.+)\n)?^_.*=(.*)$/mg) {
287 ### For generic translatable mime/keys files ###
288 while ($input =~ /^\s*_\w+=(.*)$/mg) {
294 ### For generic translatable XML files ###
295 my $tree = readXml($input);
301 my $vartype = ref $var;
303 if ($vartype =~ /ARRAY/) {
306 foreach my $el (@arr) {
311 } elsif ($vartype =~ /HASH/) {
314 foreach my $key (keys %hash) {
316 print_var($hash{$key});
325 # Same syntax as getAttributeString in intltool-merge.in.in, similar logic (look for ## differences comment)
326 sub getAttributeString
329 my $do_translate = shift || 1;
330 my $language = shift || "";
331 my $translate = shift;
333 foreach my $e (reverse(sort(keys %{ $sub }))) {
335 my $string = $sub->{$e};
338 $string =~ s/^[\s]+//;
339 $string =~ s/[\s]+$//;
341 if ($string =~ /^'.*'$/)
345 $string =~ s/^['"]//g;
346 $string =~ s/['"]$//g;
348 ## differences from intltool-merge.in.in
350 $comments{entity_decode($string)} = $XMLCOMMENT if $XMLCOMMENT;
351 add_message(entity_decode($string));
354 ## differences end here from intltool-merge.in.in
355 $result .= " $key=$quote$string$quote";
360 # Verbatim copy from intltool-merge.in.in
364 my $spacepreserve = shift || 0;
365 my @list = @{ $ref };
368 my $count = scalar(@list);
369 my $attrs = $list[0];
372 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
373 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
375 while ($index < $count) {
376 my $type = $list[$index];
377 my $content = $list[$index+1];
381 # lets strip the whitespace here, and *ONLY* here
382 $content =~ s/\s+/ /gs if (!$spacepreserve);
385 } elsif ( "$type" ne "1" ) {
386 # We've got another element
388 $result .= getAttributeString(@{$content}[0], 0); # no nested translatable elements
390 my $subresult = getXMLstring($content, $spacepreserve);
392 $result .= ">".$subresult . "</$type>";
405 # Verbatim copy from intltool-merge.in.in, except for MULTIPLE_OUTPUT handling removed
406 # Translate list of nodes if necessary
407 sub translate_subnodes
411 my $language = shift || "";
412 my $singlelang = shift || 0;
413 my $spacepreserve = shift || 0;
415 my @nodes = @{ $content };
417 my $count = scalar(@nodes);
419 while ($index < $count) {
420 my $type = $nodes[$index];
421 my $rest = $nodes[$index+1];
422 traverse($fh, $type, $rest, $language, $spacepreserve);
427 # Based on traverse() in intltool-merge.in.in
430 my $fh = shift; # unused, to allow us to sync code between -merge and -extract
431 my $nodename = shift;
433 my $language = shift || "";
434 my $spacepreserve = shift || 0;
436 if ($nodename && "$nodename" eq "1") {
437 $XMLCOMMENT = $content;
438 } elsif ($nodename) {
440 my @all = @{ $content };
441 my $attrs = shift @all;
443 my $outattr = getAttributeString($attrs, 1, $language, \$translate);
445 if ($nodename =~ /^_/) {
451 $spacepreserve = 0 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?default["']?$/));
452 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
455 $lookup = getXMLstring($content, $spacepreserve);
456 if (!$spacepreserve) {
457 $lookup =~ s/^\s+//s;
458 $lookup =~ s/\s+$//s;
460 if (exists $attrs->{"msgctxt"}) {
461 my $context = entity_decode ($attrs->{"msgctxt"});
462 $context =~ s/^["'](.*)["']/$1/;
463 $lookup = "$context\004$lookup";
466 if ($lookup && $translate != 2) {
467 $comments{$lookup} = $XMLCOMMENT if $XMLCOMMENT;
468 add_message($lookup);
469 } elsif ($translate == 2) {
470 translate_subnodes($fh, \@all, $language, 1, $spacepreserve);
474 my $count = scalar(@all);
477 while ($index < $count) {
478 my $type = $all[$index];
479 my $rest = $all[$index+1];
480 traverse($fh, $type, $rest, $language, $spacepreserve);
490 # Verbatim copy from intltool-merge.in.in, $fh for compatibility
495 my $language = shift || "";
497 my $name = shift @{ $ref };
498 my $cont = shift @{ $ref };
500 while (!$name || "$name" eq "1") {
501 $name = shift @{ $ref };
502 $cont = shift @{ $ref };
505 my $spacepreserve = 0;
506 my $attrs = @{$cont}[0];
507 $spacepreserve = 1 if ((exists $attrs->{"xml:space"}) && ($attrs->{"xml:space"} =~ /^["']?preserve["']?$/));
509 traverse($fh, $name, $cont, $language, $spacepreserve);
512 # Verbatim copy from intltool-merge.in.in
513 sub intltool_tree_comment
516 my $data = $expat->original_string();
517 my $clist = $expat->{Curlist};
522 push @$clist, 1 => $data;
525 # Verbatim copy from intltool-merge.in.in
526 sub intltool_tree_cdatastart
529 my $clist = $expat->{Curlist};
532 push @$clist, 0 => $expat->original_string();
535 # Verbatim copy from intltool-merge.in.in
536 sub intltool_tree_cdataend
539 my $clist = $expat->{Curlist};
542 $clist->[$pos] .= $expat->original_string();
545 # Verbatim copy from intltool-merge.in.in
546 sub intltool_tree_char
550 my $clist = $expat->{Curlist};
553 # Use original_string so that we retain escaped entities
556 if ($pos > 0 and $clist->[$pos - 1] eq '0') {
557 $clist->[$pos] .= $expat->original_string();
559 push @$clist, 0 => $expat->original_string();
563 # Verbatim copy from intltool-merge.in.in
564 sub intltool_tree_start
570 # Use original_string so that we retain escaped entities
571 # in attribute values. We must convert the string to an
572 # @origlist array to conform to the structure of the Tree
575 my @original_array = split /\x/, $expat->original_string();
576 my $source = $expat->original_string();
578 # Remove leading tag.
580 $source =~ s|^\s*<\s*(\S+)||s;
582 # Grab attribute key/value pairs and push onto @origlist array.
586 if ($source =~ /^\s*([\w:-]+)\s*[=]\s*["]/)
588 $source =~ s|^\s*([\w:-]+)\s*[=]\s*["]([^"]*)["]||s;
590 push @origlist, '"' . $2 . '"';
592 elsif ($source =~ /^\s*([\w:-]+)\s*[=]\s*[']/)
594 $source =~ s|^\s*([\w:-]+)\s*[=]\s*[']([^']*)[']||s;
596 push @origlist, "'" . $2 . "'";
604 my $ol = [ { @origlist } ];
606 push @{ $expat->{Lists} }, $expat->{Curlist};
607 push @{ $expat->{Curlist} }, $tag => $ol;
608 $expat->{Curlist} = $ol;
611 # Copied from intltool-merge.in.in and added comment handler.
614 my $xmldoc = shift || return;
615 my $ret = eval 'require XML::Parser';
617 die "You must have XML::Parser installed to run $0\n\n";
619 my $xp = new XML::Parser(Style => 'Tree');
620 $xp->setHandlers(Char => \&intltool_tree_char);
621 $xp->setHandlers(Start => \&intltool_tree_start);
622 $xp->setHandlers(CdataStart => \&intltool_tree_cdatastart);
623 $xp->setHandlers(CdataEnd => \&intltool_tree_cdataend);
625 ## differences from intltool-merge.in.in
626 $xp->setHandlers(Comment => \&intltool_tree_comment);
627 ## differences end here from intltool-merge.in.in
629 my $tree = $xp->parse($xmldoc);
631 # <foo><!-- comment --><head id="a">Hello <em>there</em></head><bar>Howdy<ref/></bar>do</foo>
633 # [foo, [{}, 1, "comment", head, [{id => "a"}, 0, "Hello ", em, [{}, 0, "there"]], bar,
634 # [{}, 0, "Howdy", ref, [{}]], 0, "do" ] ]
640 ### For schemas XML files ###
642 # FIXME: We should handle escaped < (less than)
644 <locale\ name="C">\s*
645 (<default>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/default>\s*)?
646 (<short>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/short>\s*)?
647 (<long>\s*(?:<!--([^>]*?)-->\s*)?(.*?)\s*<\/long>\s*)?
650 my @totranslate = ($3,$6,$9);
651 my @eachcomment = ($2,$5,$8);
652 foreach (@totranslate) {
653 my $currentcomment = shift @eachcomment;
656 add_message(entity_decode_minimal($_));
657 $comments{entity_decode_minimal($_)} = $currentcomment if (defined($currentcomment));
662 # Parse the tree as returned by readXml() for gschema.xml files.
663 sub traverse_gsettings {
671 my $nodename = shift;
673 my $comment = shift || 0;
674 my @list = @{ $content };
675 my $attrs_ref = shift @list;
676 my %attrs = %{ $attrs_ref };
677 if (($nodename eq 'default' and $attrs{'l10n'}) or
678 ($nodename eq 'summary') or ($nodename eq 'description')) {
679 # preserve whitespace. deal with it ourselves, below.
680 my $message = getXMLstring($content, 1);
682 if ($nodename eq 'default') {
683 # for <default> we strip leading and trailing whitespace but
684 # preserve (possibly quoted) whitespace within
685 $message =~ s/^\s+//;
686 $message =~ s/\s+$//;
688 # for <summary> and <description>, we normalise all
689 # whitespace while preserving paragraph boundaries
690 $message = join "\n\n", map &cleanup, split/\n\s*\n+/, $message;
693 my $context = $attrs{'context'};
694 $context =~ s/^["'](.*)["']/$1/ if $context;
695 $message = $context . "\004" . $message if $context;
696 add_message($message);
697 $comments{$message} = $comment if $comment;
701 while (scalar(@list) > 1) {
702 my $type = shift @list;
703 my $content = shift @list;
704 if (!$type || "$type" eq "1") {
710 traverse_gsettings($type, $content, $comment);
718 my $tree = readXml($input);
719 my @tree_nodes = @{ $tree };
720 my $node = shift @tree_nodes;
721 while (!$node || "$node" eq "1") {
723 $node = shift @tree_nodes;
725 my $content = shift @tree_nodes;
726 traverse_gsettings($node, $content);
730 ### For rfc822-style Debian configuration files ###
734 while ($input =~ /\G(.*?)(^|\n)(_+)([^:]+):[ \t]*(.*?)(?=\n\S|$)/sg)
736 my ($pre, $newline, $underscore, $tag, $text) = ($1, $2, $3, $4, $5);
737 while ($pre =~ m/\n/g)
741 $lineno += length($newline);
742 my @str_list = rfc822deb_split(length($underscore), $text);
743 for my $str (@str_list)
747 $loc{$str} = $lineno;
748 $count{$str} = $strcount;
749 my $usercomment = '';
750 while($pre =~ s/(^|\n)#([^\n]*)$//s)
752 $usercomment = "\n" . $2 . $usercomment;
754 $comments{$str} = $tag . $usercomment;
756 $lineno += ($text =~ s/\n//g);
760 sub rfc822deb_split {
761 # Debian defines a special way to deal with rfc822-style files:
762 # when a value contain newlines, it consists of
763 # 1. a short form (first line)
764 # 2. a long description, all lines begin with a space,
765 # and paragraphs are separated by a single dot on a line
766 # This routine returns an array of all paragraphs, and reformat
768 # When first argument is 2, the string is a comma separated list of
772 $text =~ s/^[ \t]//mg;
773 return (split(/, */, $text, 0)) if $type ne 1;
774 return ($text) if $text !~ /\n/;
776 $text =~ s/([^\n]*)\n//;
779 for my $line (split (/\n/, $text))
782 if ($line =~ /^\.\s*$/)
789 elsif ($line =~ /^\s/)
791 # Line which must not be reformatted
792 $str .= "\n" if length ($str) && $str !~ /\n$/;
798 # Continuation line, remove newline
799 $str .= " " if length ($str) && $str !~ /\n$/;
804 push(@list, $str) if length ($str);
809 while ($input =~ /\"(([^\"]|\\\")*[^\\\"])\"/g) {
812 $message =~ s/\\\"/\"/g;
813 $before =~ s/[^\n]//g;
814 add_message($message);
815 $loc{$message} = length ($before) + 2;
820 while ($input =~ /\"(([^\"]|\\\")*[^\\\"])\"/g) {
823 $message =~ s/\\\"/\"/g;
824 $message = entity_decode($message);
825 $before =~ s/[^\n]//g;
826 add_message($message);
827 $loc{$message} = length ($before) + 2;
832 ### For translatable Glade XML files ###
834 my $tags = "label|title|text|format|copyright|comments|preview_text|tooltip|message";
836 while ($input =~ /<($tags)>([^<]+)<\/($tags)>/sg) {
837 # Glade sometimes uses tags that normally mark translatable things for
838 # little bits of non-translatable content. We work around this by not
839 # translating strings that only includes something like label4 or window1.
840 add_message(entity_decode($2)) unless $2 =~ /^(window|label|dialog)[0-9]+$/;
843 while ($input =~ /<items>(..[^<]*)<\/items>/sg) {
844 for my $item (split (/\n/, $1)) {
845 add_message(entity_decode($item));
849 ## handle new glade files
850 while ($input =~ /<(\w+)\s+[^>]*translatable\s*=\s*"yes"(?:\s+[^>]*context\s*=\s*"([^"]*)")?(?:\s+[^>]*comments\s*=\s*"([^"]*)")?[^>]*>([^<]+)<\/\1>/sg) {
851 if (!($4 =~ /^(window|label)[0-9]+$/)) {
852 my $message = entity_decode($4);
854 $message = entity_decode($2) . "\004" . $message;
856 add_message($message);
858 $comments{$message} = entity_decode($3) ;
862 while ($input =~ /<atkaction\s+action_name="([^>]*)"\s+description="([^>]+)"\/>/sg) {
863 add_message(entity_decode_minimal($2));
868 my ($ftype, $fvers, $langid, $strcount, $stroff);
871 my @inputa = split (//, $input, 21);
875 $ftype = substr ($input, 0, 3);
876 $fvers = substr ($input, 4, 7);
877 $langid = unpack ("L", $inputa[8] . $inputa[9] .
878 $inputa[10] . $inputa[11]);
879 $strcount = unpack ("L", $inputa[12] . $inputa[13] .
880 $inputa[14] . $inputa[15]);
881 $stroff = unpack ("L", $inputa[16] . $inputa[17] .
882 $inputa[18] . $inputa[19]);
885 $strdata = bytes::substr ($input, $stroff);
889 $foo = $inputa[$sinpos];
891 @inputa = split (//, $foo, $sinpos + 1);
894 while ($count < $strcount) {
895 my ($flags, $soundref, $volvar, $pitch, $offset, $strsize, $sndlen) = 0;
897 if ($count > 0 && $count % 2000 == 0) {
898 $foo = $inputa[$sinpos];
899 my $numleft = ($strcount - $count);
900 if ($numleft > 2000) {
903 $sinpos = 40 * $numleft;
905 @inputa = split (//, $foo, $sinpos + 1);
906 my $numbytes = @inputa;
911 $flags = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
912 $inputa[$pos + 2] . $inputa[$pos + 3]);
914 if ($flags & 0x0002) {
915 $soundref = join ('', @inputa[$pos..$pos + 15]);
916 $soundref =~ s/\0//g;
919 # According to the Bioware Aurora Talk Table Format documentation
920 # the VolumeVariance and PitchVariance DWORDs are not used
921 # We increment the pos counter, but do not read the data, here
922 # $volvar = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
923 # $inputa[$pos + 2] . $inputa[$pos + 3]);
925 # $pitch = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
926 # $inputa[$pos + 2] . $inputa[$pos + 3]);
928 $offset = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
929 $inputa[$pos + 2] . $inputa[$pos + 3])
930 if ($flags & 0x0001);
932 $strsize = unpack ("L", $inputa[$pos] . $inputa[$pos + 1] .
933 $inputa[$pos + 2] . $inputa[$pos + 3])
934 if ($flags & 0x0001);
936 $sndlen = unpack ("d", $inputa[$pos] . $inputa[$pos + 1] .
937 $inputa[$pos + 2] . $inputa[$pos + 3])
938 if ($flags & 0x0004);
941 if (defined $strsize && $strsize > 0) {
942 my $message = substr ($strdata, $offset, $strsize);
943 if (defined $message) {
945 Encode::from_to ($message, "iso-8859-1", "UTF-8");
946 add_message($message);
947 if ($message =~ /^Bad Strref$/ ) {
948 $comments{$message} = "DO NOT Translate this Entry.";
949 $comments{$message} .= "\nTLK:position=$count";
951 $comments{$message} = "TLK:position=$count";
952 $comments{$message} .= "; TLK:sndresref=$soundref"
953 if (defined $soundref && $soundref ne "");
954 $comments{$message} .= "; TLK:sndlen=$sndlen"
955 if (defined $sndlen && $sndlen != 0);
958 print STDERR "Missing message? ID: $count\n";
969 @msgids = sort { $count{$a} <=> $count{$b} } keys %count;
973 @msgids = @messages_sorted;
975 for my $message (@msgids)
979 $offsetlines++ if $message =~ /%/;
980 if (defined ($comments{$message}))
982 while ($comments{$message} =~ m/\n/g)
987 print OUT "# ".($loc{$message} - $offsetlines). " \"$FILE\"\n"
988 if defined $loc{$message};
989 print OUT "/* ".$comments{$message}." */\n"
990 if defined $comments{$message};
991 print OUT "/* xgettext:no-c-format */\n" if $message =~ /%/;
993 if ($message =~ /(.*)\004(.*)/s) {
997 my @lines = split (/\n/, $message, -1);
998 for (my $n = 0; $n < @lines; $n++)
1002 if (defined $context)
1006 print OUT "char *s = N_(\"", $context, "|";
1010 print OUT "char *s = C_(\"", $context, "\", \"";
1015 print OUT "char *s = N_(\"";
1023 print OUT escape($lines[$n]);
1025 if ($n < @lines - 1)
1027 print OUT "\\n\"\n";