1 /* Extract some translations of a translation catalog.
2 Copyright (C) 2001-2007, 2009-2010 Free Software Foundation, Inc.
3 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
34 #if defined _MSC_VER || defined __MINGW32__
43 #include "error-progname.h"
45 #include "relocatable.h"
48 #include "read-catalog.h"
50 #include "read-properties.h"
51 #include "read-stringtable.h"
52 #include "write-catalog.h"
54 #include "write-properties.h"
55 #include "write-stringtable.h"
58 #include "msgl-charset.h"
62 #include "propername.h"
65 #define _(str) gettext (str)
68 /* Force output of PO file even if empty. */
71 /* Output only non-matching messages. */
72 static bool invert_match = false;
74 /* Selected source files. */
75 static string_list_ty *location_files;
77 /* Selected domain names. */
78 static string_list_ty *domain_names;
80 /* Task for each grep pass. */
86 bool case_insensitive;
87 void *compiled_patterns;
89 static struct grep_task grep_task[5];
92 static const struct option long_options[] =
94 { "add-location", no_argument, &line_comment, 1 },
95 { "color", optional_argument, NULL, CHAR_MAX + 9 },
96 { "comment", no_argument, NULL, 'C' },
97 { "directory", required_argument, NULL, 'D' },
98 { "domain", required_argument, NULL, 'M' },
99 { "escape", no_argument, NULL, CHAR_MAX + 1 },
100 { "extended-regexp", no_argument, NULL, 'E' },
101 { "extracted-comment", no_argument, NULL, 'X' },
102 { "file", required_argument, NULL, 'f' },
103 { "fixed-strings", no_argument, NULL, 'F' },
104 { "force-po", no_argument, &force_po, 1 },
105 { "help", no_argument, NULL, 'h' },
106 { "ignore-case", no_argument, NULL, 'i' },
107 { "indent", no_argument, NULL, CHAR_MAX + 2 },
108 { "invert-match", no_argument, NULL, 'v' },
109 { "location", required_argument, NULL, 'N' },
110 { "msgctxt", no_argument, NULL, 'J' },
111 { "msgid", no_argument, NULL, 'K' },
112 { "msgstr", no_argument, NULL, 'T' },
113 { "no-escape", no_argument, NULL, CHAR_MAX + 3 },
114 { "no-location", no_argument, &line_comment, 0 },
115 { "no-wrap", no_argument, NULL, CHAR_MAX + 6 },
116 { "output-file", required_argument, NULL, 'o' },
117 { "properties-input", no_argument, NULL, 'P' },
118 { "properties-output", no_argument, NULL, 'p' },
119 { "regexp", required_argument, NULL, 'e' },
120 { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 },
121 { "sort-output", no_argument, NULL, CHAR_MAX + 5 },
122 { "strict", no_argument, NULL, 'S' },
123 { "stringtable-input", no_argument, NULL, CHAR_MAX + 7 },
124 { "stringtable-output", no_argument, NULL, CHAR_MAX + 8 },
125 { "style", required_argument, NULL, CHAR_MAX + 10 },
126 { "version", no_argument, NULL, 'V' },
127 { "width", required_argument, NULL, 'w' },
132 /* Forward declaration of local functions. */
133 static void no_pass (int opt)
134 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
135 __attribute__ ((noreturn))
138 static void usage (int status)
139 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
140 __attribute__ ((noreturn))
143 static msgdomain_list_ty *process_msgdomain_list (msgdomain_list_ty *mdlp);
147 main (int argc, char **argv)
153 const char *input_file;
155 msgdomain_list_ty *result;
156 catalog_input_format_ty input_syntax = &input_format_po;
157 catalog_output_format_ty output_syntax = &output_format_po;
158 bool sort_by_filepos = false;
159 bool sort_by_msgid = false;
162 /* Set program name for messages. */
163 set_program_name (argv[0]);
164 error_print_progname = maybe_print_progname;
166 #ifdef HAVE_SETLOCALE
167 /* Set locale via LC_ALL. */
168 setlocale (LC_ALL, "");
171 /* Set the text message domain. */
172 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
173 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
174 textdomain (PACKAGE);
176 /* Ensure that write errors on stdout are detected. */
177 atexit (close_stdout);
179 /* Set default values for variables. */
185 location_files = string_list_alloc ();
186 domain_names = string_list_alloc ();
188 for (i = 0; i < 5; i++)
190 struct grep_task *gt = &grep_task[i];
192 gt->matcher = &matcher_grep;
193 gt->pattern_count = 0;
195 gt->patterns_size = 0;
196 gt->case_insensitive = false;
199 while ((opt = getopt_long (argc, argv, "CD:e:Ef:FhiJKM:N:o:pPTvVw:X",
204 case '\0': /* Long option. */
212 dir_list_append (optarg);
219 struct grep_task *gt = &grep_task[grep_pass];
220 /* Append optarg and a newline to gt->patterns. */
221 size_t len = strlen (optarg);
223 (char *) xrealloc (gt->patterns, gt->patterns_size + len + 1);
224 memcpy (gt->patterns + gt->patterns_size, optarg, len);
225 gt->patterns_size += len;
226 *(gt->patterns + gt->patterns_size) = '\n';
227 gt->patterns_size += 1;
235 grep_task[grep_pass].matcher = &matcher_egrep;
242 struct grep_task *gt = &grep_task[grep_pass];
243 /* Append the contents of the specified file to gt->patterns. */
244 FILE *fp = fopen (optarg, "r");
247 error (EXIT_FAILURE, errno, _("\
248 error while opening \"%s\" for reading"), optarg);
253 size_t count = fread (buf, 1, sizeof buf, fp);
258 error (EXIT_FAILURE, errno, _("\
259 error while reading \"%s\""), optarg);
265 (char *) xrealloc (gt->patterns, gt->patterns_size + count);
266 memcpy (gt->patterns + gt->patterns_size, buf, count);
267 gt->patterns_size += count;
270 /* Append a final newline if file ended in a non-newline. */
271 if (gt->patterns_size > 0
272 && *(gt->patterns + gt->patterns_size - 1) != '\n')
275 (char *) xrealloc (gt->patterns, gt->patterns_size + 1);
276 *(gt->patterns + gt->patterns_size) = '\n';
277 gt->patterns_size += 1;
288 grep_task[grep_pass].matcher = &matcher_fgrep;
298 grep_task[grep_pass].case_insensitive = true;
310 string_list_append (domain_names, optarg);
314 string_list_append (location_files, optarg);
318 output_file = optarg;
322 output_syntax = &output_format_properties;
326 input_syntax = &input_format_properties;
330 message_print_style_uniforum ();
349 value = strtol (optarg, &endp, 10);
351 message_page_width_set (value);
360 message_print_style_escape (true);
364 message_print_style_indent ();
368 message_print_style_escape (false);
372 sort_by_filepos = true;
376 sort_by_msgid = true;
379 case CHAR_MAX + 6: /* --no-wrap */
380 message_page_width_ignore ();
383 case CHAR_MAX + 7: /* --stringtable-input */
384 input_syntax = &input_format_stringtable;
387 case CHAR_MAX + 8: /* --stringtable-output */
388 output_syntax = &output_format_stringtable;
391 case CHAR_MAX + 9: /* --color */
392 if (handle_color_option (optarg) || color_test_mode)
393 usage (EXIT_FAILURE);
396 case CHAR_MAX + 10: /* --style */
397 handle_style_option (optarg);
401 usage (EXIT_FAILURE);
405 /* Version information is requested. */
408 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
409 /* xgettext: no-wrap */
410 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
411 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
412 This is free software: you are free to change and redistribute it.\n\
413 There is NO WARRANTY, to the extent permitted by law.\n\
416 printf (_("Written by %s.\n"), proper_name ("Bruno Haible"));
420 /* Help is requested. */
422 usage (EXIT_SUCCESS);
424 /* Test whether we have an .po file name as argument. */
427 else if (optind + 1 == argc)
428 input_file = argv[optind];
431 error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
432 usage (EXIT_FAILURE);
435 /* Verify selected options. */
436 if (!line_comment && sort_by_filepos)
437 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
438 "--no-location", "--sort-by-file");
440 if (sort_by_msgid && sort_by_filepos)
441 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
442 "--sort-output", "--sort-by-file");
444 /* Compile the patterns. */
445 for (grep_pass = 0; grep_pass < 5; grep_pass++)
447 struct grep_task *gt = &grep_task[grep_pass];
449 if (gt->pattern_count > 0)
451 if (gt->patterns_size > 0)
453 /* Strip trailing newline. */
454 assert (gt->patterns[gt->patterns_size - 1] == '\n');
457 gt->compiled_patterns =
458 gt->matcher->compile (gt->patterns, gt->patterns_size,
459 gt->case_insensitive, false, false, '\n');
463 /* Read input file. */
464 result = read_catalog_file (input_file, input_syntax);
466 if (grep_task[0].pattern_count > 0
467 || grep_task[1].pattern_count > 0
468 || grep_task[2].pattern_count > 0
469 || grep_task[3].pattern_count > 0
470 || grep_task[4].pattern_count > 0)
472 /* Warn if the current locale is not suitable for this PO file. */
473 compare_po_locale_charsets (result);
476 /* Select the messages. */
477 result = process_msgdomain_list (result);
479 /* Sort the results. */
481 msgdomain_list_sort_by_filepos (result);
482 else if (sort_by_msgid)
483 msgdomain_list_sort_by_msgid (result);
485 /* Write the merged message list out. */
486 msgdomain_list_print (result, output_file, output_syntax, force_po, false);
495 error (EXIT_SUCCESS, 0,
496 _("option '%c' cannot be used before 'J' or 'K' or 'T' or 'C' or 'X' has been specified"),
498 usage (EXIT_FAILURE);
502 /* Display usage information and exit. */
506 if (status != EXIT_SUCCESS)
507 fprintf (stderr, _("Try `%s --help' for more information.\n"),
512 Usage: %s [OPTION] [INPUTFILE]\n\
515 /* xgettext: no-wrap */
517 Extracts all messages of a translation catalog that match a given pattern\n\
518 or belong to some given source files.\n\
522 Mandatory arguments to long options are mandatory for short options too.\n"));
525 Input file location:\n"));
527 INPUTFILE input PO file\n"));
529 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
531 If no input file is given or if it is -, standard input is read.\n"));
534 Output file location:\n"));
536 -o, --output-file=FILE write output to specified file\n"));
538 The results are written to standard output if no output file is specified\n\
541 /* xgettext: no-wrap */
543 Message selection:\n\
544 [-N SOURCEFILE]... [-M DOMAINNAME]...\n\
545 [-J MSGCTXT-PATTERN] [-K MSGID-PATTERN] [-T MSGSTR-PATTERN]\n\
546 [-C COMMENT-PATTERN] [-X EXTRACTED-COMMENT-PATTERN]\n\
547 A message is selected if it comes from one of the specified source files,\n\
548 or if it comes from one of the specified domains,\n\
549 or if -J is given and its context (msgctxt) matches MSGCTXT-PATTERN,\n\
550 or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\
551 or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN,\n\
552 or if -C is given and the translator's comment matches COMMENT-PATTERN,\n\
553 or if -X is given and the extracted comment matches EXTRACTED-COMMENT-PATTERN.\n\
555 When more than one selection criterion is specified, the set of selected\n\
556 messages is the union of the selected messages of each criterion.\n\
558 MSGCTXT-PATTERN or MSGID-PATTERN or MSGSTR-PATTERN or COMMENT-PATTERN or\n\
559 EXTRACTED-COMMENT-PATTERN syntax:\n\
560 [-E | -F] [-e PATTERN | -f FILE]...\n\
561 PATTERNs are basic regular expressions by default, or extended regular\n\
562 expressions if -E is given, or fixed strings if -F is given.\n\
564 -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\
565 -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\
566 -J, --msgctxt start of patterns for the msgctxt\n\
567 -K, --msgid start of patterns for the msgid\n\
568 -T, --msgstr start of patterns for the msgstr\n\
569 -C, --comment start of patterns for the translator's comment\n\
570 -X, --extracted-comment start of patterns for the extracted comment\n\
571 -E, --extended-regexp PATTERN is an extended regular expression\n\
572 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
573 -e, --regexp=PATTERN use PATTERN as a regular expression\n\
574 -f, --file=FILE obtain PATTERN from FILE\n\
575 -i, --ignore-case ignore case distinctions\n\
576 -v, --invert-match output only the messages that do not match any\n\
577 selection criterion\n\
581 Input file syntax:\n"));
583 -P, --properties-input input file is in Java .properties syntax\n"));
585 --stringtable-input input file is in NeXTstep/GNUstep .strings syntax\n"));
588 Output details:\n"));
590 --color use colors and other text attributes always\n\
591 --color=WHEN use colors and other text attributes if WHEN.\n\
592 WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
594 --style=STYLEFILE specify CSS style rule file for --color\n"));
596 --no-escape do not use C escapes in output (default)\n"));
598 --escape use C escapes in output, no extended chars\n"));
600 --force-po write PO file even if empty\n"));
602 --indent indented output style\n"));
604 --no-location suppress '#: filename:line' lines\n"));
606 --add-location preserve '#: filename:line' lines (default)\n"));
608 --strict strict Uniforum output style\n"));
610 -p, --properties-output write out a Java .properties file\n"));
612 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
614 -w, --width=NUMBER set output page width\n"));
616 --no-wrap do not break long message lines, longer than\n\
617 the output page width, into several lines\n"));
619 --sort-output generate sorted output\n"));
621 --sort-by-file sort output by file location\n"));
624 Informative output:\n"));
626 -h, --help display this help and exit\n"));
628 -V, --version output version information and exit\n"));
630 /* TRANSLATORS: The placeholder indicates the bug-reporting address
631 for this package. Please add _another line_ saying
632 "Report translation bugs to <...>\n" with the address for translation
633 bugs (typically your translation team's web or email address). */
634 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
642 /* Return 1 if FILENAME is contained in a list of filename patterns,
645 filename_list_match (const string_list_ty *slp, const char *filename)
649 for (j = 0; j < slp->nitems; ++j)
650 if (fnmatch (slp->item[j], filename, FNM_PATHNAME) == 0)
658 /* EINTR handling for close().
659 These functions can return -1/EINTR even though we don't have any
660 signal handlers set up, namely when we get interrupted via SIGSTOP. */
663 nonintr_close (int fd)
669 while (retval < 0 && errno == EINTR);
673 #define close nonintr_close
678 /* Process a string STR of size LEN bytes through grep, and return true
681 is_string_selected (int grep_pass, const char *str, size_t len)
683 const struct grep_task *gt = &grep_task[grep_pass];
685 if (gt->pattern_count > 0)
691 gt->matcher->execute (gt->compiled_patterns, str, len,
693 return (match_offset != (size_t) -1);
700 /* Return true if a message matches, considering only the positive selection
701 criteria and ignoring --invert-match. */
703 is_message_selected_no_invert (const message_ty *mp)
710 /* Test whether one of mp->filepos[] is selected. */
711 for (i = 0; i < mp->filepos_count; i++)
712 if (filename_list_match (location_files, mp->filepos[i].file_name))
715 /* Test msgctxt using the --msgctxt arguments. */
716 if (mp->msgctxt != NULL
717 && is_string_selected (0, mp->msgctxt, strlen (mp->msgctxt)))
720 /* Test msgid and msgid_plural using the --msgid arguments. */
721 if (is_string_selected (1, mp->msgid, strlen (mp->msgid)))
723 if (mp->msgid_plural != NULL
724 && is_string_selected (1, mp->msgid_plural, strlen (mp->msgid_plural)))
727 /* Test msgstr using the --msgstr arguments. */
729 msgstr_len = mp->msgstr_len;
730 /* Process each NUL delimited substring separately. */
731 for (p = msgstr; p < msgstr + msgstr_len; )
733 size_t length = strlen (p);
735 if (is_string_selected (2, p, length))
741 /* Test translator comments using the --comment arguments. */
742 if (grep_task[3].pattern_count > 0
743 && mp->comment != NULL && mp->comment->nitems > 0)
752 for (j = 0; j < mp->comment->nitems; j++)
753 length += strlen (mp->comment->item[j]) + 1;
754 total_comment = (char *) xmalloca (length);
757 for (j = 0; j < mp->comment->nitems; j++)
759 size_t l = strlen (mp->comment->item[j]);
761 memcpy (q, mp->comment->item[j], l);
765 if (q != total_comment + length)
768 selected = is_string_selected (3, total_comment, length);
770 freea (total_comment);
776 /* Test extracted comments using the --extracted-comment arguments. */
777 if (grep_task[4].pattern_count > 0
778 && mp->comment_dot != NULL && mp->comment_dot->nitems > 0)
787 for (j = 0; j < mp->comment_dot->nitems; j++)
788 length += strlen (mp->comment_dot->item[j]) + 1;
789 total_comment = (char *) xmalloca (length);
792 for (j = 0; j < mp->comment_dot->nitems; j++)
794 size_t l = strlen (mp->comment_dot->item[j]);
796 memcpy (q, mp->comment_dot->item[j], l);
800 if (q != total_comment + length)
803 selected = is_string_selected (4, total_comment, length);
805 freea (total_comment);
815 /* Return true if a message matches. */
817 is_message_selected (const message_ty *mp)
821 /* Always keep the header entry. */
825 result = is_message_selected_no_invert (mp);
835 process_message_list (const char *domain, message_list_ty *mlp)
837 if (string_list_member (domain_names, domain))
838 /* Keep all the messages in the list. */
841 /* Keep only the selected messages. */
842 message_list_remove_if_not (mlp, is_message_selected);
846 static msgdomain_list_ty *
847 process_msgdomain_list (msgdomain_list_ty *mdlp)
851 for (k = 0; k < mdlp->nitems; k++)
852 process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages);