1 /* Extract some translations of a translation catalog.
2 Copyright (C) 2001-2007, 2009-2010, 2012, 2015 Free Software
4 Written by Bruno Haible <haible@clisp.cons.org>, 2001.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
35 #if defined _MSC_VER || defined __MINGW32__
44 #include "error-progname.h"
46 #include "relocatable.h"
49 #include "read-catalog.h"
51 #include "read-properties.h"
52 #include "read-stringtable.h"
53 #include "write-catalog.h"
55 #include "write-properties.h"
56 #include "write-stringtable.h"
59 #include "msgl-charset.h"
63 #include "propername.h"
66 #define _(str) gettext (str)
69 /* Force output of PO file even if empty. */
72 /* Output only non-matching messages. */
73 static bool invert_match = false;
75 /* Selected source files. */
76 static string_list_ty *location_files;
78 /* Selected domain names. */
79 static string_list_ty *domain_names;
81 /* Task for each grep pass. */
87 bool case_insensitive;
88 void *compiled_patterns;
90 static struct grep_task grep_task[5];
93 static const struct option long_options[] =
95 { "add-location", optional_argument, NULL, 'n' },
96 { "color", optional_argument, NULL, CHAR_MAX + 9 },
97 { "comment", no_argument, NULL, 'C' },
98 { "directory", required_argument, NULL, 'D' },
99 { "domain", required_argument, NULL, 'M' },
100 { "escape", no_argument, NULL, CHAR_MAX + 1 },
101 { "extended-regexp", no_argument, NULL, 'E' },
102 { "extracted-comment", no_argument, NULL, 'X' },
103 { "file", required_argument, NULL, 'f' },
104 { "fixed-strings", no_argument, NULL, 'F' },
105 { "force-po", no_argument, &force_po, 1 },
106 { "help", no_argument, NULL, 'h' },
107 { "ignore-case", no_argument, NULL, 'i' },
108 { "indent", no_argument, NULL, CHAR_MAX + 2 },
109 { "invert-match", no_argument, NULL, 'v' },
110 { "location", required_argument, NULL, 'N' },
111 { "msgctxt", no_argument, NULL, 'J' },
112 { "msgid", no_argument, NULL, 'K' },
113 { "msgstr", no_argument, NULL, 'T' },
114 { "no-escape", no_argument, NULL, CHAR_MAX + 3 },
115 { "no-location", no_argument, NULL, CHAR_MAX + 11 },
116 { "no-wrap", no_argument, NULL, CHAR_MAX + 6 },
117 { "output-file", required_argument, NULL, 'o' },
118 { "properties-input", no_argument, NULL, 'P' },
119 { "properties-output", no_argument, NULL, 'p' },
120 { "regexp", required_argument, NULL, 'e' },
121 { "sort-by-file", no_argument, NULL, CHAR_MAX + 4 },
122 { "sort-output", no_argument, NULL, CHAR_MAX + 5 },
123 { "strict", no_argument, NULL, 'S' },
124 { "stringtable-input", no_argument, NULL, CHAR_MAX + 7 },
125 { "stringtable-output", no_argument, NULL, CHAR_MAX + 8 },
126 { "style", required_argument, NULL, CHAR_MAX + 10 },
127 { "version", no_argument, NULL, 'V' },
128 { "width", required_argument, NULL, 'w' },
133 /* Forward declaration of local functions. */
134 static void no_pass (int opt)
135 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
136 __attribute__ ((noreturn))
139 static void usage (int status)
140 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
141 __attribute__ ((noreturn))
144 static msgdomain_list_ty *process_msgdomain_list (msgdomain_list_ty *mdlp);
148 main (int argc, char **argv)
154 const char *input_file;
156 msgdomain_list_ty *result;
157 catalog_input_format_ty input_syntax = &input_format_po;
158 catalog_output_format_ty output_syntax = &output_format_po;
159 bool sort_by_filepos = false;
160 bool sort_by_msgid = false;
163 /* Set program name for messages. */
164 set_program_name (argv[0]);
165 error_print_progname = maybe_print_progname;
167 #ifdef HAVE_SETLOCALE
168 /* Set locale via LC_ALL. */
169 setlocale (LC_ALL, "");
172 /* Set the text message domain. */
173 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
174 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
175 textdomain (PACKAGE);
177 /* Ensure that write errors on stdout are detected. */
178 atexit (close_stdout);
180 /* Set default values for variables. */
186 location_files = string_list_alloc ();
187 domain_names = string_list_alloc ();
189 for (i = 0; i < 5; i++)
191 struct grep_task *gt = &grep_task[i];
193 gt->matcher = &matcher_grep;
194 gt->pattern_count = 0;
196 gt->patterns_size = 0;
197 gt->case_insensitive = false;
200 while ((opt = getopt_long (argc, argv, "CD:e:Ef:FhiJKM:n:N:o:pPTvVw:X",
205 case '\0': /* Long option. */
213 dir_list_append (optarg);
220 struct grep_task *gt = &grep_task[grep_pass];
221 /* Append optarg and a newline to gt->patterns. */
222 size_t len = strlen (optarg);
224 (char *) xrealloc (gt->patterns, gt->patterns_size + len + 1);
225 memcpy (gt->patterns + gt->patterns_size, optarg, len);
226 gt->patterns_size += len;
227 *(gt->patterns + gt->patterns_size) = '\n';
228 gt->patterns_size += 1;
236 grep_task[grep_pass].matcher = &matcher_egrep;
243 struct grep_task *gt = &grep_task[grep_pass];
244 /* Append the contents of the specified file to gt->patterns. */
245 FILE *fp = fopen (optarg, "r");
248 error (EXIT_FAILURE, errno, _("\
249 error while opening \"%s\" for reading"), optarg);
254 size_t count = fread (buf, 1, sizeof buf, fp);
259 error (EXIT_FAILURE, errno, _("\
260 error while reading \"%s\""), optarg);
266 (char *) xrealloc (gt->patterns, gt->patterns_size + count);
267 memcpy (gt->patterns + gt->patterns_size, buf, count);
268 gt->patterns_size += count;
271 /* Append a final newline if file ended in a non-newline. */
272 if (gt->patterns_size > 0
273 && *(gt->patterns + gt->patterns_size - 1) != '\n')
276 (char *) xrealloc (gt->patterns, gt->patterns_size + 1);
277 *(gt->patterns + gt->patterns_size) = '\n';
278 gt->patterns_size += 1;
289 grep_task[grep_pass].matcher = &matcher_fgrep;
299 grep_task[grep_pass].case_insensitive = true;
311 string_list_append (domain_names, optarg);
315 if (handle_filepos_comment_option (optarg))
316 usage (EXIT_FAILURE);
320 string_list_append (location_files, optarg);
324 output_file = optarg;
328 output_syntax = &output_format_properties;
332 input_syntax = &input_format_properties;
336 message_print_style_uniforum ();
355 value = strtol (optarg, &endp, 10);
357 message_page_width_set (value);
366 message_print_style_escape (true);
370 message_print_style_indent ();
374 message_print_style_escape (false);
378 sort_by_filepos = true;
382 sort_by_msgid = true;
385 case CHAR_MAX + 6: /* --no-wrap */
386 message_page_width_ignore ();
389 case CHAR_MAX + 7: /* --stringtable-input */
390 input_syntax = &input_format_stringtable;
393 case CHAR_MAX + 8: /* --stringtable-output */
394 output_syntax = &output_format_stringtable;
397 case CHAR_MAX + 9: /* --color */
398 if (handle_color_option (optarg) || color_test_mode)
399 usage (EXIT_FAILURE);
402 case CHAR_MAX + 10: /* --style */
403 handle_style_option (optarg);
406 case CHAR_MAX + 11: /* --no-location */
407 message_print_style_filepos (filepos_comment_none);
411 usage (EXIT_FAILURE);
415 /* Version information is requested. */
418 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
419 /* xgettext: no-wrap */
420 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
421 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
422 This is free software: you are free to change and redistribute it.\n\
423 There is NO WARRANTY, to the extent permitted by law.\n\
426 printf (_("Written by %s.\n"), proper_name ("Bruno Haible"));
430 /* Help is requested. */
432 usage (EXIT_SUCCESS);
434 /* Test whether we have an .po file name as argument. */
437 else if (optind + 1 == argc)
438 input_file = argv[optind];
441 error (EXIT_SUCCESS, 0, _("at most one input file allowed"));
442 usage (EXIT_FAILURE);
445 /* Verify selected options. */
446 if (sort_by_msgid && sort_by_filepos)
447 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
448 "--sort-output", "--sort-by-file");
450 /* Compile the patterns. */
451 for (grep_pass = 0; grep_pass < 5; grep_pass++)
453 struct grep_task *gt = &grep_task[grep_pass];
455 if (gt->pattern_count > 0)
457 if (gt->patterns_size > 0)
459 /* Strip trailing newline. */
460 assert (gt->patterns[gt->patterns_size - 1] == '\n');
463 gt->compiled_patterns =
464 gt->matcher->compile (gt->patterns, gt->patterns_size,
465 gt->case_insensitive, false, false, '\n');
469 /* Read input file. */
470 result = read_catalog_file (input_file, input_syntax);
472 if (grep_task[0].pattern_count > 0
473 || grep_task[1].pattern_count > 0
474 || grep_task[2].pattern_count > 0
475 || grep_task[3].pattern_count > 0
476 || grep_task[4].pattern_count > 0)
478 /* Warn if the current locale is not suitable for this PO file. */
479 compare_po_locale_charsets (result);
482 /* Select the messages. */
483 result = process_msgdomain_list (result);
485 /* Sort the results. */
487 msgdomain_list_sort_by_filepos (result);
488 else if (sort_by_msgid)
489 msgdomain_list_sort_by_msgid (result);
491 /* Write the merged message list out. */
492 msgdomain_list_print (result, output_file, output_syntax, force_po, false);
501 error (EXIT_SUCCESS, 0,
502 _("option '%c' cannot be used before 'J' or 'K' or 'T' or 'C' or 'X' has been specified"),
504 usage (EXIT_FAILURE);
508 /* Display usage information and exit. */
512 if (status != EXIT_SUCCESS)
513 fprintf (stderr, _("Try '%s --help' for more information.\n"),
518 Usage: %s [OPTION] [INPUTFILE]\n\
521 /* xgettext: no-wrap */
523 Extracts all messages of a translation catalog that match a given pattern\n\
524 or belong to some given source files.\n\
528 Mandatory arguments to long options are mandatory for short options too.\n"));
531 Input file location:\n"));
533 INPUTFILE input PO file\n"));
535 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
537 If no input file is given or if it is -, standard input is read.\n"));
540 Output file location:\n"));
542 -o, --output-file=FILE write output to specified file\n"));
544 The results are written to standard output if no output file is specified\n\
547 /* xgettext: no-wrap */
549 Message selection:\n\
550 [-N SOURCEFILE]... [-M DOMAINNAME]...\n\
551 [-J MSGCTXT-PATTERN] [-K MSGID-PATTERN] [-T MSGSTR-PATTERN]\n\
552 [-C COMMENT-PATTERN] [-X EXTRACTED-COMMENT-PATTERN]\n\
553 A message is selected if it comes from one of the specified source files,\n\
554 or if it comes from one of the specified domains,\n\
555 or if -J is given and its context (msgctxt) matches MSGCTXT-PATTERN,\n\
556 or if -K is given and its key (msgid or msgid_plural) matches MSGID-PATTERN,\n\
557 or if -T is given and its translation (msgstr) matches MSGSTR-PATTERN,\n\
558 or if -C is given and the translator's comment matches COMMENT-PATTERN,\n\
559 or if -X is given and the extracted comment matches EXTRACTED-COMMENT-PATTERN.\n\
561 When more than one selection criterion is specified, the set of selected\n\
562 messages is the union of the selected messages of each criterion.\n\
564 MSGCTXT-PATTERN or MSGID-PATTERN or MSGSTR-PATTERN or COMMENT-PATTERN or\n\
565 EXTRACTED-COMMENT-PATTERN syntax:\n\
566 [-E | -F] [-e PATTERN | -f FILE]...\n\
567 PATTERNs are basic regular expressions by default, or extended regular\n\
568 expressions if -E is given, or fixed strings if -F is given.\n\
570 -N, --location=SOURCEFILE select messages extracted from SOURCEFILE\n\
571 -M, --domain=DOMAINNAME select messages belonging to domain DOMAINNAME\n\
572 -J, --msgctxt start of patterns for the msgctxt\n\
573 -K, --msgid start of patterns for the msgid\n\
574 -T, --msgstr start of patterns for the msgstr\n\
575 -C, --comment start of patterns for the translator's comment\n\
576 -X, --extracted-comment start of patterns for the extracted comment\n\
577 -E, --extended-regexp PATTERN is an extended regular expression\n\
578 -F, --fixed-strings PATTERN is a set of newline-separated strings\n\
579 -e, --regexp=PATTERN use PATTERN as a regular expression\n\
580 -f, --file=FILE obtain PATTERN from FILE\n\
581 -i, --ignore-case ignore case distinctions\n\
582 -v, --invert-match output only the messages that do not match any\n\
583 selection criterion\n\
587 Input file syntax:\n"));
589 -P, --properties-input input file is in Java .properties syntax\n"));
591 --stringtable-input input file is in NeXTstep/GNUstep .strings syntax\n"));
594 Output details:\n"));
596 --color use colors and other text attributes always\n\
597 --color=WHEN use colors and other text attributes if WHEN.\n\
598 WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
600 --style=STYLEFILE specify CSS style rule file for --color\n"));
602 --no-escape do not use C escapes in output (default)\n"));
604 --escape use C escapes in output, no extended chars\n"));
606 --force-po write PO file even if empty\n"));
608 --indent indented output style\n"));
610 --no-location suppress '#: filename:line' lines\n"));
612 -n, --add-location preserve '#: filename:line' lines (default)\n"));
614 --strict strict Uniforum output style\n"));
616 -p, --properties-output write out a Java .properties file\n"));
618 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
620 -w, --width=NUMBER set output page width\n"));
622 --no-wrap do not break long message lines, longer than\n\
623 the output page width, into several lines\n"));
625 --sort-output generate sorted output\n"));
627 --sort-by-file sort output by file location\n"));
630 Informative output:\n"));
632 -h, --help display this help and exit\n"));
634 -V, --version output version information and exit\n"));
636 /* TRANSLATORS: The placeholder indicates the bug-reporting address
637 for this package. Please add _another line_ saying
638 "Report translation bugs to <...>\n" with the address for translation
639 bugs (typically your translation team's web or email address). */
640 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
648 /* Return 1 if FILENAME is contained in a list of filename patterns,
651 filename_list_match (const string_list_ty *slp, const char *filename)
655 for (j = 0; j < slp->nitems; ++j)
656 if (fnmatch (slp->item[j], filename, FNM_PATHNAME) == 0)
664 /* EINTR handling for close().
665 These functions can return -1/EINTR even though we don't have any
666 signal handlers set up, namely when we get interrupted via SIGSTOP. */
669 nonintr_close (int fd)
675 while (retval < 0 && errno == EINTR);
679 #define close nonintr_close
684 /* Process a string STR of size LEN bytes through grep, and return true
687 is_string_selected (int grep_pass, const char *str, size_t len)
689 const struct grep_task *gt = &grep_task[grep_pass];
691 if (gt->pattern_count > 0)
697 gt->matcher->execute (gt->compiled_patterns, str, len,
699 return (match_offset != (size_t) -1);
706 /* Return true if a message matches, considering only the positive selection
707 criteria and ignoring --invert-match. */
709 is_message_selected_no_invert (const message_ty *mp)
716 /* Test whether one of mp->filepos[] is selected. */
717 for (i = 0; i < mp->filepos_count; i++)
718 if (filename_list_match (location_files, mp->filepos[i].file_name))
721 /* Test msgctxt using the --msgctxt arguments. */
722 if (mp->msgctxt != NULL
723 && is_string_selected (0, mp->msgctxt, strlen (mp->msgctxt)))
726 /* Test msgid and msgid_plural using the --msgid arguments. */
727 if (is_string_selected (1, mp->msgid, strlen (mp->msgid)))
729 if (mp->msgid_plural != NULL
730 && is_string_selected (1, mp->msgid_plural, strlen (mp->msgid_plural)))
733 /* Test msgstr using the --msgstr arguments. */
735 msgstr_len = mp->msgstr_len;
736 /* Process each NUL delimited substring separately. */
737 for (p = msgstr; p < msgstr + msgstr_len; )
739 size_t length = strlen (p);
741 if (is_string_selected (2, p, length))
747 /* Test translator comments using the --comment arguments. */
748 if (grep_task[3].pattern_count > 0
749 && mp->comment != NULL && mp->comment->nitems > 0)
758 for (j = 0; j < mp->comment->nitems; j++)
759 length += strlen (mp->comment->item[j]) + 1;
760 total_comment = (char *) xmalloca (length);
763 for (j = 0; j < mp->comment->nitems; j++)
765 size_t l = strlen (mp->comment->item[j]);
767 memcpy (q, mp->comment->item[j], l);
771 if (q != total_comment + length)
774 selected = is_string_selected (3, total_comment, length);
776 freea (total_comment);
782 /* Test extracted comments using the --extracted-comment arguments. */
783 if (grep_task[4].pattern_count > 0
784 && mp->comment_dot != NULL && mp->comment_dot->nitems > 0)
793 for (j = 0; j < mp->comment_dot->nitems; j++)
794 length += strlen (mp->comment_dot->item[j]) + 1;
795 total_comment = (char *) xmalloca (length);
798 for (j = 0; j < mp->comment_dot->nitems; j++)
800 size_t l = strlen (mp->comment_dot->item[j]);
802 memcpy (q, mp->comment_dot->item[j], l);
806 if (q != total_comment + length)
809 selected = is_string_selected (4, total_comment, length);
811 freea (total_comment);
821 /* Return true if a message matches. */
823 is_message_selected (const message_ty *mp)
827 /* Always keep the header entry. */
831 result = is_message_selected_no_invert (mp);
841 process_message_list (const char *domain, message_list_ty *mlp)
843 if (string_list_member (domain_names, domain))
844 /* Keep all the messages in the list. */
847 /* Keep only the selected messages. */
848 message_list_remove_if_not (mlp, is_message_selected);
852 static msgdomain_list_ty *
853 process_msgdomain_list (msgdomain_list_ty *mdlp)
857 for (k = 0; k < mdlp->nitems; k++)
858 process_message_list (mdlp->item[k]->domain, mdlp->item[k]->messages);