1 /* Extracts strings from C source file to Uniforum style .po file.
2 Copyright (C) 1995-1998, 2000-2015 Free Software Foundation,
4 Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
39 #include "file-list.h"
42 #include "error-progname.h"
44 #include "relocatable.h"
47 #include "xvasprintf.h"
54 #include "concat-filename.h"
55 #include "c-strcase.h"
56 #include "open-catalog.h"
57 #include "read-catalog-abstract.h"
60 #include "po-charset.h"
61 #include "msgl-iconv.h"
62 #include "msgl-ascii.h"
63 #include "msgl-check.h"
64 #include "po-xerror.h"
66 #include "write-catalog.h"
68 #include "write-properties.h"
69 #include "write-stringtable.h"
72 #include "propername.h"
76 #include "locating-rule.h"
79 /* A convenience macro. I don't like writing gettext() every time. */
80 #define _(str) gettext (str)
91 #include "x-smalltalk.h"
93 #include "x-properties.h"
95 #include "x-appdata.h"
101 #include "x-stringtable.h"
105 #include "x-javascript.h"
107 #include "x-gsettings.h"
108 #include "x-desktop.h"
111 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
112 #define ENDOF(a) ((a) + SIZEOF(a))
115 /* If nonzero add all comments immediately preceding one of the keywords. */
116 static bool add_all_comments = false;
118 /* Tag used in comment of prevailing domain. */
119 static char *comment_tag;
121 /* Name of default domain file. If not set defaults to messages.po. */
122 static const char *default_domain;
124 /* If called with --debug option the output reflects whether format
125 string recognition is done automatically or forced by the user. */
128 /* Content of .po files with symbols to be excluded. */
129 message_list_ty *exclude;
131 /* Force output of PO file even if empty. */
134 /* Copyright holder of the output file and the translations. */
135 static const char *copyright_holder = "THE PACKAGE'S COPYRIGHT HOLDER";
138 static const char *package_name = NULL;
140 /* Package version. */
141 static const char *package_version = NULL;
143 /* Email address or URL for reports of bugs in msgids. */
144 static const char *msgid_bugs_address = NULL;
146 /* String used as prefix for msgstr. */
147 static const char *msgstr_prefix;
149 /* String used as suffix for msgstr. */
150 static const char *msgstr_suffix;
152 /* Directory in which output files are created. */
153 static char *output_dir;
155 /* The output syntax: .pot or .properties or .strings. */
156 static catalog_output_format_ty output_syntax = &output_format_po;
158 /* If nonzero omit header with information about this run. */
159 int xgettext_omit_header;
161 /* Table of flag_context_list_ty tables. */
162 static flag_context_list_table_ty flag_table_c;
163 static flag_context_list_table_ty flag_table_cxx_qt;
164 static flag_context_list_table_ty flag_table_cxx_kde;
165 static flag_context_list_table_ty flag_table_cxx_boost;
166 static flag_context_list_table_ty flag_table_objc;
167 static flag_context_list_table_ty flag_table_gcc_internal;
168 static flag_context_list_table_ty flag_table_sh;
169 static flag_context_list_table_ty flag_table_python;
170 static flag_context_list_table_ty flag_table_lisp;
171 static flag_context_list_table_ty flag_table_elisp;
172 static flag_context_list_table_ty flag_table_librep;
173 static flag_context_list_table_ty flag_table_scheme;
174 static flag_context_list_table_ty flag_table_java;
175 static flag_context_list_table_ty flag_table_csharp;
176 static flag_context_list_table_ty flag_table_awk;
177 static flag_context_list_table_ty flag_table_ycp;
178 static flag_context_list_table_ty flag_table_tcl;
179 static flag_context_list_table_ty flag_table_perl;
180 static flag_context_list_table_ty flag_table_php;
181 static flag_context_list_table_ty flag_table_lua;
182 static flag_context_list_table_ty flag_table_javascript;
183 static flag_context_list_table_ty flag_table_vala;
185 /* If true, recognize Qt format strings. */
186 static bool recognize_format_qt;
188 /* If true, recognize KDE format strings. */
189 static bool recognize_format_kde;
191 /* If true, recognize Boost format strings. */
192 static bool recognize_format_boost;
194 /* Syntax checks enabled by default. */
195 static enum is_syntax_check default_syntax_check[NSYNTAXCHECKS];
197 /* Canonicalized encoding name for all input files. */
198 const char *xgettext_global_source_encoding;
201 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
202 ASCII or UTF-8, when this conversion is a no-op). */
203 iconv_t xgettext_global_source_iconv;
206 /* Canonicalized encoding name for the current input file. */
207 const char *xgettext_current_source_encoding;
210 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
211 ASCII or UTF-8, when this conversion is a no-op). */
212 iconv_t xgettext_current_source_iconv;
215 static locating_rule_list_ty *its_locating_rules;
217 #define ITS_ROOT_UNTRANSLATABLE \
218 "<its:rules xmlns:its=\"http://www.w3.org/2005/11/its\"" \
219 " version=\"2.0\">" \
220 " <its:translateRule selector=\"/*\" translate=\"no\"/>" \
223 /* If nonzero add comments used by itstool. */
224 static bool add_itstool_comments = false;
227 static const struct option long_options[] =
229 { "add-comments", optional_argument, NULL, 'c' },
230 { "add-location", optional_argument, NULL, 'n' },
231 { "boost", no_argument, NULL, CHAR_MAX + 11 },
232 { "c++", no_argument, NULL, 'C' },
233 { "check", required_argument, NULL, CHAR_MAX + 17 },
234 { "color", optional_argument, NULL, CHAR_MAX + 14 },
235 { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
236 { "debug", no_argument, &do_debug, 1 },
237 { "default-domain", required_argument, NULL, 'd' },
238 { "directory", required_argument, NULL, 'D' },
239 { "escape", no_argument, NULL, 'E' },
240 { "exclude-file", required_argument, NULL, 'x' },
241 { "extract-all", no_argument, NULL, 'a' },
242 { "files-from", required_argument, NULL, 'f' },
243 { "flag", required_argument, NULL, CHAR_MAX + 8 },
244 { "force-po", no_argument, &force_po, 1 },
245 { "foreign-user", no_argument, NULL, CHAR_MAX + 2 },
246 { "from-code", required_argument, NULL, CHAR_MAX + 3 },
247 { "help", no_argument, NULL, 'h' },
248 { "indent", no_argument, NULL, 'i' },
249 { "its", required_argument, NULL, CHAR_MAX + 20 },
250 { "itstool", no_argument, NULL, CHAR_MAX + 19 },
251 { "join-existing", no_argument, NULL, 'j' },
252 { "kde", no_argument, NULL, CHAR_MAX + 10 },
253 { "keyword", optional_argument, NULL, 'k' },
254 { "language", required_argument, NULL, 'L' },
255 { "msgid-bugs-address", required_argument, NULL, CHAR_MAX + 5 },
256 { "msgstr-prefix", optional_argument, NULL, 'm' },
257 { "msgstr-suffix", optional_argument, NULL, 'M' },
258 { "no-escape", no_argument, NULL, 'e' },
259 { "no-location", no_argument, NULL, CHAR_MAX + 16 },
260 { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
261 { "omit-header", no_argument, &xgettext_omit_header, 1 },
262 { "output", required_argument, NULL, 'o' },
263 { "output-dir", required_argument, NULL, 'p' },
264 { "package-name", required_argument, NULL, CHAR_MAX + 12 },
265 { "package-version", required_argument, NULL, CHAR_MAX + 13 },
266 { "properties-output", no_argument, NULL, CHAR_MAX + 6 },
267 { "qt", no_argument, NULL, CHAR_MAX + 9 },
268 { "sentence-end", required_argument, NULL, CHAR_MAX + 18 },
269 { "sort-by-file", no_argument, NULL, 'F' },
270 { "sort-output", no_argument, NULL, 's' },
271 { "strict", no_argument, NULL, 'S' },
272 { "string-limit", required_argument, NULL, 'l' },
273 { "stringtable-output", no_argument, NULL, CHAR_MAX + 7 },
274 { "style", required_argument, NULL, CHAR_MAX + 15 },
275 { "trigraphs", no_argument, NULL, 'T' },
276 { "version", no_argument, NULL, 'V' },
277 { "width", required_argument, NULL, 'w', },
282 /* The extractors must all be functions returning void and taking three
283 arguments designating the input stream and one message domain list argument
284 in which to add the messages. */
285 typedef void (*extractor_func) (FILE *fp, const char *real_filename,
286 const char *logical_filename,
287 flag_context_list_table_ty *flag_table,
288 msgdomain_list_ty *mdlp);
290 typedef struct extractor_ty extractor_ty;
294 flag_context_list_table_ty *flag_table;
295 struct formatstring_parser *formatstring_parser1;
296 struct formatstring_parser *formatstring_parser2;
297 struct formatstring_parser *formatstring_parser3;
298 struct literalstring_parser *literalstring_parser;
302 /* Forward declaration of local functions. */
303 static void usage (int status)
304 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ > 4) || __GNUC__ > 2)
305 __attribute__ ((noreturn))
308 static void read_exclusion_file (char *file_name);
309 static void extract_from_file (const char *file_name, extractor_ty extractor,
310 msgdomain_list_ty *mdlp);
311 static void extract_from_xml_file (const char *file_name,
312 its_rule_list_ty *rules,
313 msgdomain_list_ty *mdlp);
314 static message_ty *construct_header (void);
315 static void finalize_header (msgdomain_list_ty *mdlp);
316 static extractor_ty language_to_extractor (const char *name);
317 static const char *extension_to_language (const char *extension);
321 main (int argc, char *argv[])
324 bool do_help = false;
325 bool do_version = false;
326 msgdomain_list_ty *mdlp;
327 bool join_existing = false;
328 bool no_default_keywords = false;
329 bool some_additional_keywords = false;
330 bool sort_by_msgid = false;
331 bool sort_by_filepos = false;
332 char *its_dirs[2] = { NULL, NULL };
333 char *explicit_its_filename = NULL;
334 const char *file_name;
335 const char *files_from = NULL;
336 string_list_ty *file_list;
337 char *output_file = NULL;
338 const char *language = NULL;
339 extractor_ty extractor = { NULL, NULL, NULL, NULL };
343 /* Set program name for messages. */
344 set_program_name (argv[0]);
345 error_print_progname = maybe_print_progname;
347 #ifdef HAVE_SETLOCALE
348 /* Set locale via LC_ALL. */
349 setlocale (LC_ALL, "");
352 /* Set the text message domain. */
353 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
354 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
355 textdomain (PACKAGE);
357 /* Ensure that write errors on stdout are detected. */
358 atexit (close_stdout);
360 /* Set initial value of variables. */
361 default_domain = MESSAGE_DOMAIN_DEFAULT;
362 xgettext_global_source_encoding = po_charset_ascii;
363 init_flag_table_c ();
364 init_flag_table_objc ();
365 init_flag_table_gcc_internal ();
366 init_flag_table_kde ();
367 init_flag_table_sh ();
368 init_flag_table_python ();
369 init_flag_table_lisp ();
370 init_flag_table_elisp ();
371 init_flag_table_librep ();
372 init_flag_table_scheme ();
373 init_flag_table_java ();
374 init_flag_table_csharp ();
375 init_flag_table_awk ();
376 init_flag_table_ycp ();
377 init_flag_table_tcl ();
378 init_flag_table_perl ();
379 init_flag_table_php ();
380 init_flag_table_lua ();
381 init_flag_table_javascript ();
382 init_flag_table_vala ();
384 while ((optchar = getopt_long (argc, argv,
385 "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:W:x:",
386 long_options, NULL)) != EOF)
389 case '\0': /* Long option. */
395 x_python_extract_all ();
396 x_lisp_extract_all ();
397 x_elisp_extract_all ();
398 x_librep_extract_all ();
399 x_scheme_extract_all ();
400 x_java_extract_all ();
401 x_csharp_extract_all ();
402 x_awk_extract_all ();
403 x_tcl_extract_all ();
404 x_perl_extract_all ();
405 x_php_extract_all ();
406 x_lua_extract_all ();
407 x_javascript_extract_all ();
408 x_vala_extract_all ();
414 add_all_comments = true;
419 add_all_comments = false;
420 comment_tag = optarg;
421 /* We ignore leading white space. */
422 while (isspace ((unsigned char) *comment_tag))
432 default_domain = optarg;
436 dir_list_append (optarg);
440 message_print_style_escape (false);
444 message_print_style_escape (true);
452 sort_by_filepos = true;
460 message_print_style_indent ();
464 join_existing = true;
468 if (optarg != NULL && *optarg == '\0')
469 /* Make "--keyword=" work like "--keyword" and "-k". */
471 x_c_keyword (optarg);
472 x_objc_keyword (optarg);
473 x_sh_keyword (optarg);
474 x_python_keyword (optarg);
475 x_lisp_keyword (optarg);
476 x_elisp_keyword (optarg);
477 x_librep_keyword (optarg);
478 x_scheme_keyword (optarg);
479 x_java_keyword (optarg);
480 x_csharp_keyword (optarg);
481 x_awk_keyword (optarg);
482 x_tcl_keyword (optarg);
483 x_perl_keyword (optarg);
484 x_php_keyword (optarg);
485 x_lua_keyword (optarg);
486 x_javascript_keyword (optarg);
487 x_vala_keyword (optarg);
488 x_desktop_keyword (optarg);
490 no_default_keywords = true;
492 some_additional_keywords = true;
496 /* Accepted for backward compatibility with 0.10.35. */
504 /* -m takes an optional argument. If none is given "" is assumed. */
505 msgstr_prefix = optarg == NULL ? "" : optarg;
509 /* -M takes an optional argument. If none is given "" is assumed. */
510 msgstr_suffix = optarg == NULL ? "" : optarg;
514 if (handle_filepos_comment_option (optarg))
515 usage (EXIT_FAILURE);
519 output_file = optarg;
524 size_t len = strlen (optarg);
526 if (output_dir != NULL)
529 if (optarg[len - 1] == '/')
530 output_dir = xstrdup (optarg);
532 output_dir = xasprintf ("%s/", optarg);
537 sort_by_msgid = true;
541 message_print_style_uniforum ();
556 value = strtol (optarg, &endp, 10);
558 message_page_width_set (value);
563 read_exclusion_file (optarg);
566 case CHAR_MAX + 1: /* --copyright-holder */
567 copyright_holder = optarg;
570 case CHAR_MAX + 2: /* --foreign-user */
571 copyright_holder = "";
574 case CHAR_MAX + 3: /* --from-code */
575 xgettext_global_source_encoding = po_charset_canonicalize (optarg);
576 if (xgettext_global_source_encoding == NULL)
578 multiline_warning (xasprintf (_("warning: ")),
580 '%s' is not a valid encoding name. Using ASCII as fallback.\n"),
582 xgettext_global_source_encoding = po_charset_ascii;
586 case CHAR_MAX + 4: /* --no-wrap */
587 message_page_width_ignore ();
590 case CHAR_MAX + 5: /* --msgid-bugs-address */
591 msgid_bugs_address = optarg;
594 case CHAR_MAX + 6: /* --properties-output */
595 output_syntax = &output_format_properties;
598 case CHAR_MAX + 7: /* --stringtable-output */
599 output_syntax = &output_format_stringtable;
602 case CHAR_MAX + 8: /* --flag */
603 xgettext_record_flag (optarg);
606 case CHAR_MAX + 9: /* --qt */
607 recognize_format_qt = true;
610 case CHAR_MAX + 10: /* --kde */
611 recognize_format_kde = true;
612 activate_additional_keywords_kde ();
615 case CHAR_MAX + 11: /* --boost */
616 recognize_format_boost = true;
619 case CHAR_MAX + 12: /* --package-name */
620 package_name = optarg;
623 case CHAR_MAX + 13: /* --package-version */
624 package_version = optarg;
627 case CHAR_MAX + 14: /* --color */
628 if (handle_color_option (optarg) || color_test_mode)
629 usage (EXIT_FAILURE);
632 case CHAR_MAX + 15: /* --style */
633 handle_style_option (optarg);
636 case CHAR_MAX + 16: /* --no-location */
637 message_print_style_filepos (filepos_comment_none);
640 case CHAR_MAX + 17: /* --check */
641 if (strcmp (optarg, "ellipsis-unicode") == 0)
642 default_syntax_check[sc_ellipsis_unicode] = yes;
643 else if (strcmp (optarg, "space-ellipsis") == 0)
644 default_syntax_check[sc_space_ellipsis] = yes;
645 else if (strcmp (optarg, "quote-unicode") == 0)
646 default_syntax_check[sc_quote_unicode] = yes;
648 error (EXIT_FAILURE, 0, _("syntax check '%s' unknown"), optarg);
651 case CHAR_MAX + 18: /* --sentence-end */
652 if (strcmp (optarg, "single-space") == 0)
653 sentence_end_required_spaces = 1;
654 else if (strcmp (optarg, "double-space") == 0)
655 sentence_end_required_spaces = 2;
657 error (EXIT_FAILURE, 0, _("sentence end type '%s' unknown"), optarg);
660 case CHAR_MAX + 20: /* --its */
661 explicit_its_filename = optarg;
664 case CHAR_MAX + 19: /* --itstool */
665 add_itstool_comments = true;
669 usage (EXIT_FAILURE);
673 /* Version information requested. */
676 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
677 /* xgettext: no-wrap */
678 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
679 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
680 This is free software: you are free to change and redistribute it.\n\
681 There is NO WARRANTY, to the extent permitted by law.\n\
683 "1995-1998, 2000-2013");
684 printf (_("Written by %s.\n"), proper_name ("Ulrich Drepper"));
688 /* Help is requested. */
690 usage (EXIT_SUCCESS);
692 /* Verify selected options. */
693 if (sort_by_msgid && sort_by_filepos)
694 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
695 "--sort-output", "--sort-by-file");
697 /* We cannot support both Qt and KDE, or Qt and Boost, or KDE and Boost
698 format strings, because there are only two formatstring parsers per
699 language, and formatstring_c is the first one for C++. */
700 if (recognize_format_qt && recognize_format_kde)
701 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
703 if (recognize_format_qt && recognize_format_boost)
704 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
706 if (recognize_format_kde && recognize_format_boost)
707 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
710 if (join_existing && strcmp (default_domain, "-") == 0)
711 error (EXIT_FAILURE, 0, _("\
712 --join-existing cannot be used when output is written to stdout"));
714 if (no_default_keywords && !some_additional_keywords)
717 xgettext cannot work without keywords to look for"));
718 usage (EXIT_FAILURE);
721 /* Test whether we have some input files given. */
722 if (files_from == NULL && optind >= argc)
724 error (EXIT_SUCCESS, 0, _("no input file given"));
725 usage (EXIT_FAILURE);
729 const char *gettextdatadir;
730 char *versioned_gettextdatadir;
732 /* Make it possible to override the locator file location. This
733 is necessary for running the testsuite before "make
735 gettextdatadir = getenv ("GETTEXTDATADIR");
736 if (gettextdatadir == NULL || gettextdatadir[0] == '\0')
737 gettextdatadir = relocate (GETTEXTDATADIR);
739 its_dirs[0] = xconcatenated_filename (gettextdatadir, "its", NULL);
741 versioned_gettextdatadir =
742 xasprintf ("%s%s", relocate (GETTEXTDATADIR), PACKAGE_SUFFIX);
743 its_dirs[1] = xconcatenated_filename (versioned_gettextdatadir, "its",
745 free (versioned_gettextdatadir);
747 its_locating_rules = locating_rule_list_alloc ();
748 for (i = 0; i < SIZEOF (its_dirs); i++)
749 locating_rule_list_add_from_directory (its_locating_rules, its_dirs[i]);
752 /* Explicit ITS file selection and language specification are
753 mutually exclusive. */
754 if (explicit_its_filename != NULL && language != NULL)
755 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
756 "--its", "--language");
758 /* Determine extractor from language. */
759 if (language != NULL)
760 extractor = language_to_extractor (language);
762 /* Canonize msgstr prefix/suffix. */
763 if (msgstr_prefix != NULL && msgstr_suffix == NULL)
765 else if (msgstr_prefix == NULL && msgstr_suffix != NULL)
768 /* Default output directory is the current directory. */
769 if (output_dir == NULL)
772 /* Construct the name of the output file. If the default domain has
773 the special name "-" we write to stdout. */
776 if (IS_ABSOLUTE_PATH (output_file) || strcmp (output_file, "-") == 0)
777 file_name = xstrdup (output_file);
779 /* Please do NOT add a .po suffix! */
780 file_name = xconcatenated_filename (output_dir, output_file, NULL);
782 else if (strcmp (default_domain, "-") == 0)
785 file_name = xconcatenated_filename (output_dir, default_domain, ".po");
787 /* Determine list of files we have to process. */
788 if (files_from != NULL)
789 file_list = read_names_from_file (files_from);
791 file_list = string_list_alloc ();
792 /* Append names from command line. */
793 for (cnt = optind; cnt < argc; ++cnt)
794 string_list_append_unique (file_list, argv[cnt]);
796 /* Allocate converter from xgettext_global_source_encoding to UTF-8 (except
797 from ASCII or UTF-8, when this conversion is a no-op). */
798 if (xgettext_global_source_encoding != po_charset_ascii
799 && xgettext_global_source_encoding != po_charset_utf8)
804 /* Avoid glibc-2.1 bug with EUC-KR. */
805 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
806 && !defined _LIBICONV_VERSION
807 if (strcmp (xgettext_global_source_encoding, "EUC-KR") == 0)
811 cd = iconv_open (po_charset_utf8, xgettext_global_source_encoding);
812 if (cd == (iconv_t)(-1))
813 error (EXIT_FAILURE, 0, _("\
814 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
815 and iconv() does not support this conversion."),
816 xgettext_global_source_encoding, po_charset_utf8,
817 basename (program_name));
818 xgettext_global_source_iconv = cd;
820 error (EXIT_FAILURE, 0, _("\
821 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
822 This version was built without iconv()."),
823 xgettext_global_source_encoding, po_charset_utf8,
824 basename (program_name));
828 /* Allocate a message list to remember all the messages. */
829 mdlp = msgdomain_list_alloc (true);
831 /* Generate a header, so that we know how and when this PO file was
833 if (!xgettext_omit_header)
834 message_list_append (mdlp->item[0]->messages, construct_header ());
836 /* Read in the old messages, so that we can add to them. */
839 /* Temporarily reset the directory list to empty, because file_name
840 is an output file and therefore should not be searched for. */
841 void *saved_directory_list = dir_list_save_reset ();
842 extractor_ty po_extractor = { extract_po, NULL, NULL, NULL };
844 extract_from_file (file_name, po_extractor, mdlp);
845 if (!is_ascii_msgdomain_list (mdlp))
846 mdlp = iconv_msgdomain_list (mdlp, "UTF-8", true, file_name);
848 dir_list_restore (saved_directory_list);
851 /* Process all input files. */
852 for (i = 0; i < file_list->nitems; i++)
854 const char *filename;
855 extractor_ty this_file_extractor;
856 its_rule_list_ty *its_rules = NULL;
858 filename = file_list->item[i];
861 this_file_extractor = extractor;
862 else if (explicit_its_filename != NULL)
864 its_rules = its_rule_list_alloc ();
865 if (!its_rule_list_add_from_file (its_rules,
866 explicit_its_filename))
868 error (EXIT_FAILURE, 0, _("\
869 warning: ITS rule file '%s' does not exist"), explicit_its_filename);
874 const char *language_from_extension = NULL;
878 base = strrchr (filename, '/');
882 reduced = xstrdup (base);
883 /* Remove a trailing ".in" - it's a generic suffix. */
884 while (strlen (reduced) >= 3
885 && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0)
886 reduced[strlen (reduced) - 3] = '\0';
888 /* If no language is specified with -L, deduce it the extension. */
889 if (language == NULL)
893 /* Work out what the file extension is. */
894 p = reduced + strlen (reduced);
895 for (; p > reduced && language_from_extension == NULL; p--)
899 const char *extension = p + 1;
901 /* Derive the language from the extension, and
902 the extractor function from the language. */
903 language_from_extension =
904 extension_to_language (extension);
909 /* If language is not determined from the file name
910 extension, check ITS locating rules. */
911 if (language_from_extension == NULL
912 && strcmp (filename, "-") != 0)
914 const char *its_basename;
916 its_basename = locating_rule_list_locate (its_locating_rules,
920 if (its_basename != NULL)
924 its_rules = its_rule_list_alloc ();
926 /* If the ITS file is identified by the name,
927 set the root element untranslatable. */
928 if (language != NULL)
929 its_rule_list_add_from_string (its_rules,
930 ITS_ROOT_UNTRANSLATABLE);
932 for (j = 0; j < SIZEOF (its_dirs); j++)
935 xconcatenated_filename (its_dirs[j], its_basename,
940 if (stat (its_filename, &statbuf) == 0)
941 ok = its_rule_list_add_from_file (its_rules,
947 if (j == SIZEOF (its_dirs))
950 warning: ITS rule file '%s' does not exist; check your gettext installation"),
952 its_rule_list_free (its_rules);
958 if (its_rules == NULL)
960 if (language_from_extension == NULL)
962 const char *extension = strrchr (reduced, '.');
963 if (extension == NULL)
968 warning: file '%s' extension '%s' is unknown; will try C"), filename, extension);
969 language_from_extension = "C";
972 this_file_extractor =
973 language_to_extractor (language_from_extension);
979 if (its_rules != NULL)
981 /* Extract the strings from the file, using ITS. */
982 extract_from_xml_file (filename, its_rules, mdlp);
983 its_rule_list_free (its_rules);
986 /* Extract the strings from the file. */
987 extract_from_file (filename, this_file_extractor, mdlp);
989 string_list_free (file_list);
991 /* Finalize the constructed header. */
992 if (!xgettext_omit_header)
993 finalize_header (mdlp);
995 /* Free the allocated converter. */
997 if (xgettext_global_source_encoding != po_charset_ascii
998 && xgettext_global_source_encoding != po_charset_utf8)
999 iconv_close (xgettext_global_source_iconv);
1002 /* Sorting the list of messages. */
1003 if (sort_by_filepos)
1004 msgdomain_list_sort_by_filepos (mdlp);
1005 else if (sort_by_msgid)
1006 msgdomain_list_sort_by_msgid (mdlp);
1008 /* Check syntax of messages. */
1012 for (i = 0; i < mdlp->nitems; i++)
1014 message_list_ty *mlp = mdlp->item[i]->messages;
1015 nerrors = syntax_check_message_list (mlp);
1018 /* Exit with status 1 on any error. */
1020 error (EXIT_FAILURE, 0,
1021 ngettext ("found %d fatal error", "found %d fatal errors",
1026 /* Write the PO file. */
1027 msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug);
1029 if (its_locating_rules)
1030 locating_rule_list_free (its_locating_rules);
1032 for (i = 0; i < SIZEOF (its_dirs); i++)
1035 exit (EXIT_SUCCESS);
1039 /* Display usage information and exit. */
1043 if (status != EXIT_SUCCESS)
1044 fprintf (stderr, _("Try '%s --help' for more information.\n"),
1049 Usage: %s [OPTION] [INPUTFILE]...\n\
1053 Extract translatable strings from given input files.\n\
1056 /* xgettext: no-wrap */
1058 Mandatory arguments to long options are mandatory for short options too.\n\
1059 Similarly for optional arguments.\n\
1063 Input file location:\n"));
1065 INPUTFILE ... input files\n"));
1067 -f, --files-from=FILE get list of input files from FILE\n"));
1069 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
1071 If input file is -, standard input is read.\n"));
1074 Output file location:\n"));
1076 -d, --default-domain=NAME use NAME.po for output (instead of messages.po)\n"));
1078 -o, --output=FILE write output to specified file\n"));
1080 -p, --output-dir=DIR output files will be placed in directory DIR\n"));
1082 If output file is -, output is written to standard output.\n"));
1085 Choice of input file language:\n"));
1087 -L, --language=NAME recognise the specified language\n\
1088 (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
1089 EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
1090 JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
1091 GCC-source, NXStringTable, RST, Glade, Lua,\n\
1092 JavaScript, Vala, Desktop)\n"));
1094 -C, --c++ shorthand for --language=C++\n"));
1096 By default the language is guessed depending on the input file name extension.\n"));
1099 Input file interpretation:\n"));
1101 --from-code=NAME encoding of input files\n\
1102 (except for Python, Tcl, Glade)\n"));
1104 By default the input files are assumed to be in ASCII.\n"));
1107 Operation mode:\n"));
1109 -j, --join-existing join messages with existing file\n"));
1111 -x, --exclude-file=FILE.po entries from FILE.po are not extracted\n"));
1113 -cTAG, --add-comments=TAG place comment blocks starting with TAG and\n\
1114 preceding keyword lines in output file\n\
1115 -c, --add-comments place all comment blocks preceding keyword lines\n\
1116 in output file\n"));
1118 --check=NAME perform syntax check on messages\n\
1119 (ellipsis-unicode, space-ellipsis,\n\
1120 quote-unicode)\n"));
1122 --sentence-end=TYPE type describing the end of sentence\n\
1123 (single-space, which is the default, \n\
1124 or double-space)\n"));
1127 Language specific options:\n"));
1129 -a, --extract-all extract all strings\n"));
1131 (only languages C, C++, ObjectiveC, Shell,\n\
1132 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
1133 C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
1134 Lua, JavaScript, Vala)\n"));
1136 -kWORD, --keyword=WORD look for WORD as an additional keyword\n\
1137 -k, --keyword do not to use default keywords\n"));
1139 (only languages C, C++, ObjectiveC, Shell,\n\
1140 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
1141 C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
1142 Lua, JavaScript, Vala, Desktop)\n"));
1144 --flag=WORD:ARG:FLAG additional flag for strings inside the argument\n\
1145 number ARG of keyword WORD\n"));
1147 (only languages C, C++, ObjectiveC, Shell,\n\
1148 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
1149 C#, awk, YCP, Tcl, Perl, PHP, GCC-source,\n\
1150 Lua, JavaScript, Vala)\n"));
1152 -T, --trigraphs understand ANSI C trigraphs for input\n"));
1154 (only languages C, C++, ObjectiveC)\n"));
1156 --qt recognize Qt format strings\n"));
1158 (only language C++)\n"));
1160 --kde recognize KDE 4 format strings\n"));
1162 (only language C++)\n"));
1164 --boost recognize Boost format strings\n"));
1166 (only language C++)\n"));
1168 --debug more detailed formatstring recognition result\n"));
1171 Output details:\n"));
1173 --color use colors and other text attributes always\n\
1174 --color=WHEN use colors and other text attributes if WHEN.\n\
1175 WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
1177 --style=STYLEFILE specify CSS style rule file for --color\n"));
1179 -e, --no-escape do not use C escapes in output (default)\n"));
1181 -E, --escape use C escapes in output, no extended chars\n"));
1183 --force-po write PO file even if empty\n"));
1185 -i, --indent write the .po file using indented style\n"));
1187 --no-location do not write '#: filename:line' lines\n"));
1189 -n, --add-location generate '#: filename:line' lines (default)\n"));
1191 --strict write out strict Uniforum conforming .po file\n"));
1193 --properties-output write out a Java .properties file\n"));
1195 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
1197 --its=FILE apply ITS rules from FILE\n"));
1199 --itstool write out itstool comments\n"));
1201 -w, --width=NUMBER set output page width\n"));
1203 --no-wrap do not break long message lines, longer than\n\
1204 the output page width, into several lines\n"));
1206 -s, --sort-output generate sorted output\n"));
1208 -F, --sort-by-file sort output by file location\n"));
1210 --omit-header don't write header with 'msgid \"\"' entry\n"));
1212 --copyright-holder=STRING set copyright holder in output\n"));
1214 --foreign-user omit FSF copyright in output for foreign user\n"));
1216 --package-name=PACKAGE set package name in output\n"));
1218 --package-version=VERSION set package version in output\n"));
1220 --msgid-bugs-address=EMAIL@ADDRESS set report address for msgid bugs\n"));
1222 -m[STRING], --msgstr-prefix[=STRING] use STRING or \"\" as prefix for msgstr\n\
1225 -M[STRING], --msgstr-suffix[=STRING] use STRING or \"\" as suffix for msgstr\n\
1229 Informative output:\n"));
1231 -h, --help display this help and exit\n"));
1233 -V, --version output version information and exit\n"));
1235 /* TRANSLATORS: The placeholder indicates the bug-reporting address
1236 for this package. Please add _another line_ saying
1237 "Report translation bugs to <...>\n" with the address for translation
1238 bugs (typically your translation team's web or email address). */
1239 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
1248 exclude_directive_domain (abstract_catalog_reader_ty *pop, char *name)
1250 po_gram_error_at_line (&gram_pos,
1251 _("this file may not contain domain directives"));
1256 exclude_directive_message (abstract_catalog_reader_ty *pop,
1259 lex_pos_ty *msgid_pos,
1261 char *msgstr, size_t msgstr_len,
1262 lex_pos_ty *msgstr_pos,
1265 char *prev_msgid_plural,
1266 bool force_fuzzy, bool obsolete)
1270 /* See if this message ID has been seen before. */
1271 if (exclude == NULL)
1272 exclude = message_list_alloc (true);
1273 mp = message_list_search (exclude, msgctxt, msgid);
1278 mp = message_alloc (msgctxt, msgid, msgid_plural, "", 1, msgstr_pos);
1279 /* Do not free msgid. */
1280 message_list_append (exclude, mp);
1283 /* All we care about is the msgid. Throw the msgstr away.
1284 Don't even check for duplicate msgids. */
1289 /* So that the one parser can be used for multiple programs, and also
1290 use good data hiding and encapsulation practices, an object
1291 oriented approach has been taken. An object instance is allocated,
1292 and all actions resulting from the parse will be through
1293 invocations of method functions of that object. */
1295 static abstract_catalog_reader_class_ty exclude_methods =
1297 sizeof (abstract_catalog_reader_ty),
1298 NULL, /* constructor */
1299 NULL, /* destructor */
1300 NULL, /* parse_brief */
1301 NULL, /* parse_debrief */
1302 exclude_directive_domain,
1303 exclude_directive_message,
1305 NULL, /* comment_dot */
1306 NULL, /* comment_filepos */
1307 NULL, /* comment_special */
1312 read_exclusion_file (char *filename)
1314 char *real_filename;
1315 FILE *fp = open_catalog_file (filename, &real_filename, true);
1316 abstract_catalog_reader_ty *pop;
1318 pop = catalog_reader_alloc (&exclude_methods);
1319 catalog_reader_parse (pop, fp, real_filename, filename, &input_format_po);
1320 catalog_reader_free (pop);
1328 split_keywordspec (const char *spec,
1329 const char **endp, struct callshape *shapep)
1335 bool argnum1_glib_context = false;
1336 bool argnum2_glib_context = false;
1338 string_list_ty xcomments;
1340 string_list_init (&xcomments);
1342 /* Start parsing from the end. */
1343 p = spec + strlen (spec);
1346 if (isdigit ((unsigned char) p[-1])
1347 || ((p[-1] == 'c' || p[-1] == 'g' || p[-1] == 't')
1348 && p - 1 > spec && isdigit ((unsigned char) p[-2])))
1350 bool contextp = (p[-1] == 'c');
1351 bool glibp = (p[-1] == 'g');
1352 bool totalp = (p[-1] == 't');
1356 while (p > spec && isdigit ((unsigned char) p[-1]));
1358 if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1361 int arg = strtol (p, &dummy, 10);
1366 /* Only one context argument can be given. */
1373 /* Only one total number of arguments can be given. */
1380 /* At most two normal arguments can be given. */
1383 argnum2_glib_context = argnum1_glib_context;
1385 argnum1_glib_context = glibp;
1391 else if (p[-1] == '"')
1393 const char *xcomment_end;
1398 while (p > spec && p[-1] != '"')
1401 if (p > spec /* && p[-1] == '"' */)
1403 const char *xcomment_start;
1407 if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1409 size_t xcomment_len = xcomment_end - xcomment_start;
1410 char *xcomment = XNMALLOC (xcomment_len + 1, char);
1412 memcpy (xcomment, xcomment_start, xcomment_len);
1413 xcomment[xcomment_len] = '\0';
1414 string_list_append (&xcomments, xcomment);
1425 /* Here an element of the comma-separated list has been parsed. */
1426 if (!(p > spec && (p[-1] == ',' || p[-1] == ':')))
1433 if (argnum1 == 0 && argnum2 == 0)
1434 /* At least one non-context argument must be given. */
1437 && (argnum1_glib_context || argnum2_glib_context))
1438 /* Incompatible ways to specify the context. */
1441 shapep->argnum1 = (argnum1 > 0 ? argnum1 : 1);
1442 shapep->argnum2 = argnum2;
1443 shapep->argnumc = argnumc;
1444 shapep->argnum1_glib_context = argnum1_glib_context;
1445 shapep->argnum2_glib_context = argnum2_glib_context;
1446 shapep->argtotal = argtotal;
1447 /* Reverse the order of the xcomments. */
1448 string_list_init (&shapep->xcomments);
1449 for (i = xcomments.nitems; i > 0; )
1450 string_list_append (&shapep->xcomments, xcomments.item[--i]);
1451 string_list_destroy (&xcomments);
1456 /* Couldn't parse the desired syntax. */
1457 *endp = spec + strlen (spec);
1458 shapep->argnum1 = 1;
1459 shapep->argnum2 = 0;
1460 shapep->argnumc = 0;
1461 shapep->argnum1_glib_context = false;
1462 shapep->argnum2_glib_context = false;
1463 shapep->argtotal = 0;
1464 string_list_init (&shapep->xcomments);
1465 string_list_destroy (&xcomments);
1470 insert_keyword_callshape (hash_table *table,
1471 const char *keyword, size_t keyword_len,
1472 const struct callshape *shape)
1476 if (hash_find_entry (table, keyword, keyword_len, &old_value))
1478 /* Create a one-element 'struct callshapes'. */
1479 struct callshapes *shapes = XMALLOC (struct callshapes);
1480 shapes->nshapes = 1;
1481 shapes->shapes[0] = *shape;
1483 (const char *) hash_insert_entry (table, keyword, keyword_len, shapes);
1484 if (keyword == NULL)
1486 shapes->keyword = keyword;
1487 shapes->keyword_len = keyword_len;
1491 /* Found a 'struct callshapes'. See whether it already contains the
1493 struct callshapes *old_shapes = (struct callshapes *) old_value;
1498 for (i = 0; i < old_shapes->nshapes; i++)
1499 if (old_shapes->shapes[i].argnum1 == shape->argnum1
1500 && old_shapes->shapes[i].argnum2 == shape->argnum2
1501 && old_shapes->shapes[i].argnumc == shape->argnumc
1502 && old_shapes->shapes[i].argnum1_glib_context
1503 == shape->argnum1_glib_context
1504 && old_shapes->shapes[i].argnum2_glib_context
1505 == shape->argnum2_glib_context
1506 && old_shapes->shapes[i].argtotal == shape->argtotal)
1508 old_shapes->shapes[i].xcomments = shape->xcomments;
1515 /* Replace the existing 'struct callshapes' with a new one. */
1516 struct callshapes *shapes =
1517 (struct callshapes *)
1518 xmalloc (xsum (sizeof (struct callshapes),
1519 xtimes (old_shapes->nshapes,
1520 sizeof (struct callshape))));
1522 shapes->keyword = old_shapes->keyword;
1523 shapes->keyword_len = old_shapes->keyword_len;
1524 shapes->nshapes = old_shapes->nshapes + 1;
1525 for (i = 0; i < old_shapes->nshapes; i++)
1526 shapes->shapes[i] = old_shapes->shapes[i];
1527 shapes->shapes[i] = *shape;
1528 if (hash_set_value (table, keyword, keyword_len, shapes))
1537 flag_context_ty null_context = { undecided, false, undecided, false };
1539 /* Transparent context. */
1540 flag_context_ty passthrough_context = { undecided, true, undecided, true };
1544 inherited_context (flag_context_ty outer_context,
1545 flag_context_ty modifier_context)
1547 flag_context_ty result = modifier_context;
1549 if (result.pass_format1)
1551 result.is_format1 = outer_context.is_format1;
1552 result.pass_format1 = false;
1554 if (result.pass_format2)
1556 result.is_format2 = outer_context.is_format2;
1557 result.pass_format2 = false;
1559 if (result.pass_format3)
1561 result.is_format3 = outer_context.is_format3;
1562 result.pass_format3 = false;
1568 /* Null context list iterator. */
1569 flag_context_list_iterator_ty null_context_list_iterator = { 1, NULL };
1571 /* Transparent context list iterator. */
1572 static flag_context_list_ty passthrough_context_circular_list =
1575 { undecided, true, undecided, true },
1576 &passthrough_context_circular_list
1578 flag_context_list_iterator_ty passthrough_context_list_iterator =
1581 &passthrough_context_circular_list
1585 flag_context_list_iterator_ty
1586 flag_context_list_iterator (flag_context_list_ty *list)
1588 flag_context_list_iterator_ty result;
1597 flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter)
1599 if (iter->head == NULL)
1600 return null_context;
1601 if (iter->argnum == iter->head->argnum)
1603 flag_context_ty result = iter->head->flags;
1605 /* Special casing of circular list. */
1606 if (iter->head != iter->head->next)
1608 iter->head = iter->head->next;
1617 return null_context;
1622 flag_context_list_ty *
1623 flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
1624 const void *key, size_t keylen)
1628 if (flag_table->table != NULL
1629 && hash_find_entry (flag_table, key, keylen, &entry) == 0)
1630 return (flag_context_list_ty *) entry;
1637 flag_context_list_table_insert (flag_context_list_table_ty *table,
1639 const char *name_start, const char *name_end,
1640 int argnum, enum is_format value, bool pass)
1642 char *allocated_name = NULL;
1644 if (table == &flag_table_lisp)
1646 /* Convert NAME to upper case. */
1647 size_t name_len = name_end - name_start;
1648 char *name = allocated_name = (char *) xmalloca (name_len);
1651 for (i = 0; i < name_len; i++)
1652 name[i] = (name_start[i] >= 'a' && name_start[i] <= 'z'
1653 ? name_start[i] - 'a' + 'A'
1656 name_end = name + name_len;
1658 else if (table == &flag_table_tcl)
1660 /* Remove redundant "::" prefix. */
1661 if (name_end - name_start > 2
1662 && name_start[0] == ':' && name_start[1] == ':')
1666 /* Insert the pair (VALUE, PASS) at INDEX in the element numbered ARGNUM
1667 of the list corresponding to NAME in the TABLE. */
1668 if (table->table == NULL)
1669 hash_init (table, 100);
1673 if (hash_find_entry (table, name_start, name_end - name_start, &entry) != 0)
1675 /* Create new hash table entry. */
1676 flag_context_list_ty *list = XMALLOC (flag_context_list_ty);
1677 list->argnum = argnum;
1678 memset (&list->flags, '\0', sizeof (list->flags));
1682 list->flags.is_format1 = value;
1683 list->flags.pass_format1 = pass;
1686 list->flags.is_format2 = value;
1687 list->flags.pass_format2 = pass;
1690 list->flags.is_format3 = value;
1691 list->flags.pass_format3 = pass;
1697 hash_insert_entry (table, name_start, name_end - name_start, list);
1701 flag_context_list_ty *list = (flag_context_list_ty *)entry;
1702 flag_context_list_ty **lastp = NULL;
1703 /* Invariant: list == (lastp != NULL ? *lastp : entry). */
1705 while (list != NULL && list->argnum < argnum)
1707 lastp = &list->next;
1710 if (list != NULL && list->argnum == argnum)
1712 /* Add this flag to the current argument number. */
1716 list->flags.is_format1 = value;
1717 list->flags.pass_format1 = pass;
1720 list->flags.is_format2 = value;
1721 list->flags.pass_format2 = pass;
1724 list->flags.is_format3 = value;
1725 list->flags.pass_format3 = pass;
1731 else if (lastp != NULL)
1733 /* Add a new list entry for this argument number. */
1734 list = XMALLOC (flag_context_list_ty);
1735 list->argnum = argnum;
1736 memset (&list->flags, '\0', sizeof (list->flags));
1740 list->flags.is_format1 = value;
1741 list->flags.pass_format1 = pass;
1744 list->flags.is_format2 = value;
1745 list->flags.pass_format2 = pass;
1748 list->flags.is_format3 = value;
1749 list->flags.pass_format3 = pass;
1754 list->next = *lastp;
1759 /* Add a new list entry for this argument number, at the beginning
1760 of the list. Since we don't have an API for replacing the
1761 value of a key in the hash table, we have to copy the first
1763 flag_context_list_ty *copy = XMALLOC (flag_context_list_ty);
1766 list->argnum = argnum;
1767 memset (&list->flags, '\0', sizeof (list->flags));
1771 list->flags.is_format1 = value;
1772 list->flags.pass_format1 = pass;
1775 list->flags.is_format2 = value;
1776 list->flags.pass_format2 = pass;
1779 list->flags.is_format3 = value;
1780 list->flags.pass_format3 = pass;
1790 if (allocated_name != NULL)
1791 freea (allocated_name);
1796 xgettext_record_flag (const char *optionstring)
1798 /* Check the string has at least two colons. (Colons in the name are
1799 allowed, needed for the Lisp and the Tcl backends.) */
1803 for (colon2 = optionstring + strlen (optionstring); ; )
1805 if (colon2 == optionstring)
1811 for (colon1 = colon2; ; )
1813 if (colon1 == optionstring)
1820 const char *name_start = optionstring;
1821 const char *name_end = colon1;
1822 const char *argnum_start = colon1 + 1;
1823 const char *argnum_end = colon2;
1824 const char *flag = colon2 + 1;
1827 /* Check the parts' syntax. */
1828 if (name_end == name_start)
1830 if (argnum_end == argnum_start)
1834 argnum = strtol (argnum_start, &endp, 10);
1835 if (endp != argnum_end)
1841 /* Analyze the flag part. */
1846 if (strlen (flag) >= 5 && memcmp (flag, "pass-", 5) == 0)
1852 /* Unlike po_parse_comment_special(), we don't accept "fuzzy",
1853 "wrap", or "check" here - it has no sense. */
1854 if (strlen (flag) >= 7
1855 && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
1859 enum is_format value;
1863 n = strlen (flag) - 7;
1865 if (n >= 3 && memcmp (p, "no-", 3) == 0)
1871 else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
1877 else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
1884 value = yes_according_to_context;
1886 for (type = 0; type < NFORMATS; type++)
1887 if (strlen (format_language[type]) == n
1888 && memcmp (format_language[type], p, n) == 0)
1893 flag_context_list_table_insert (&flag_table_c, 0,
1894 name_start, name_end,
1895 argnum, value, pass);
1896 flag_context_list_table_insert (&flag_table_cxx_qt, 0,
1897 name_start, name_end,
1898 argnum, value, pass);
1899 flag_context_list_table_insert (&flag_table_cxx_kde, 0,
1900 name_start, name_end,
1901 argnum, value, pass);
1902 flag_context_list_table_insert (&flag_table_cxx_boost, 0,
1903 name_start, name_end,
1904 argnum, value, pass);
1905 flag_context_list_table_insert (&flag_table_objc, 0,
1906 name_start, name_end,
1907 argnum, value, pass);
1910 flag_context_list_table_insert (&flag_table_objc, 1,
1911 name_start, name_end,
1912 argnum, value, pass);
1915 flag_context_list_table_insert (&flag_table_sh, 0,
1916 name_start, name_end,
1917 argnum, value, pass);
1920 flag_context_list_table_insert (&flag_table_python, 0,
1921 name_start, name_end,
1922 argnum, value, pass);
1924 case format_python_brace:
1925 flag_context_list_table_insert (&flag_table_python, 0,
1926 name_start, name_end,
1927 argnum, value, pass);
1930 flag_context_list_table_insert (&flag_table_lisp, 0,
1931 name_start, name_end,
1932 argnum, value, pass);
1935 flag_context_list_table_insert (&flag_table_elisp, 0,
1936 name_start, name_end,
1937 argnum, value, pass);
1940 flag_context_list_table_insert (&flag_table_librep, 0,
1941 name_start, name_end,
1942 argnum, value, pass);
1945 flag_context_list_table_insert (&flag_table_scheme, 0,
1946 name_start, name_end,
1947 argnum, value, pass);
1949 case format_smalltalk:
1952 flag_context_list_table_insert (&flag_table_java, 0,
1953 name_start, name_end,
1954 argnum, value, pass);
1957 flag_context_list_table_insert (&flag_table_csharp, 0,
1958 name_start, name_end,
1959 argnum, value, pass);
1962 flag_context_list_table_insert (&flag_table_awk, 0,
1963 name_start, name_end,
1964 argnum, value, pass);
1969 flag_context_list_table_insert (&flag_table_ycp, 0,
1970 name_start, name_end,
1971 argnum, value, pass);
1974 flag_context_list_table_insert (&flag_table_tcl, 0,
1975 name_start, name_end,
1976 argnum, value, pass);
1979 flag_context_list_table_insert (&flag_table_perl, 0,
1980 name_start, name_end,
1981 argnum, value, pass);
1983 case format_perl_brace:
1984 flag_context_list_table_insert (&flag_table_perl, 1,
1985 name_start, name_end,
1986 argnum, value, pass);
1989 flag_context_list_table_insert (&flag_table_php, 0,
1990 name_start, name_end,
1991 argnum, value, pass);
1993 case format_gcc_internal:
1994 flag_context_list_table_insert (&flag_table_gcc_internal, 0,
1995 name_start, name_end,
1996 argnum, value, pass);
1998 case format_gfc_internal:
1999 flag_context_list_table_insert (&flag_table_gcc_internal, 1,
2000 name_start, name_end,
2001 argnum, value, pass);
2004 flag_context_list_table_insert (&flag_table_cxx_qt, 1,
2005 name_start, name_end,
2006 argnum, value, pass);
2008 case format_qt_plural:
2009 flag_context_list_table_insert (&flag_table_cxx_qt, 2,
2010 name_start, name_end,
2011 argnum, value, pass);
2014 flag_context_list_table_insert (&flag_table_cxx_kde, 1,
2015 name_start, name_end,
2016 argnum, value, pass);
2018 case format_kde_kuit:
2019 flag_context_list_table_insert (&flag_table_cxx_kde, 2,
2020 name_start, name_end,
2021 argnum, value, pass);
2024 flag_context_list_table_insert (&flag_table_cxx_boost, 1,
2025 name_start, name_end,
2026 argnum, value, pass);
2029 flag_context_list_table_insert (&flag_table_lua, 0,
2030 name_start, name_end,
2031 argnum, value, pass);
2033 case format_javascript:
2034 flag_context_list_table_insert (&flag_table_javascript, 0,
2035 name_start, name_end,
2036 argnum, value, pass);
2043 /* If the flag is not among the valid values, the optionstring is
2050 error (EXIT_FAILURE, 0, _("\
2051 A --flag argument doesn't have the <keyword>:<argnum>:[pass-]<flag> syntax: %s"),
2056 /* Comment handling: There is a list of automatic comments that may be appended
2057 to the next message. Used by remember_a_message(). */
2059 static string_list_ty *comment;
2062 xgettext_comment_add (const char *str)
2064 if (comment == NULL)
2065 comment = string_list_alloc ();
2066 string_list_append (comment, str);
2070 xgettext_comment (size_t n)
2072 if (comment == NULL || n >= comment->nitems)
2074 return comment->item[n];
2078 xgettext_comment_reset ()
2080 if (comment != NULL)
2082 string_list_free (comment);
2088 refcounted_string_list_ty *savable_comment;
2091 savable_comment_add (const char *str)
2093 if (savable_comment == NULL)
2095 savable_comment = XMALLOC (refcounted_string_list_ty);
2096 savable_comment->refcount = 1;
2097 string_list_init (&savable_comment->contents);
2099 else if (savable_comment->refcount > 1)
2101 /* Unshare the list by making copies. */
2102 struct string_list_ty *oldcontents;
2105 savable_comment->refcount--;
2106 oldcontents = &savable_comment->contents;
2108 savable_comment = XMALLOC (refcounted_string_list_ty);
2109 savable_comment->refcount = 1;
2110 string_list_init (&savable_comment->contents);
2111 for (i = 0; i < oldcontents->nitems; i++)
2112 string_list_append (&savable_comment->contents, oldcontents->item[i]);
2114 string_list_append (&savable_comment->contents, str);
2118 savable_comment_reset ()
2120 drop_reference (savable_comment);
2121 savable_comment = NULL;
2125 savable_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
2127 xgettext_comment_reset ();
2132 for (i = 0; i < rslp->contents.nitems; i++)
2133 xgettext_comment_add (rslp->contents.item[i]);
2137 refcounted_string_list_ty *
2138 savable_comment_convert_encoding (refcounted_string_list_ty *comment,
2141 refcounted_string_list_ty *result;
2144 result = XMALLOC (refcounted_string_list_ty);
2145 result->refcount = 1;
2146 string_list_init (&result->contents);
2148 for (i = 0; i < comment->contents.nitems; i++)
2150 const char *old_string = comment->contents.item[i];
2151 char *string = from_current_source_encoding (old_string,
2155 string_list_append (&result->contents, string);
2156 if (string != old_string)
2166 xgettext_open (const char *fn,
2167 char **logical_file_name_p, char **real_file_name_p)
2171 char *logical_file_name;
2173 if (strcmp (fn, "-") == 0)
2175 new_name = xstrdup (_("standard input"));
2176 logical_file_name = xstrdup (new_name);
2179 else if (IS_ABSOLUTE_PATH (fn))
2181 new_name = xstrdup (fn);
2182 fp = fopen (fn, "r");
2184 error (EXIT_FAILURE, errno, _("\
2185 error while opening \"%s\" for reading"), fn);
2186 logical_file_name = xstrdup (new_name);
2194 const char *dir = dir_list_nth (j);
2197 error (EXIT_FAILURE, ENOENT, _("\
2198 error while opening \"%s\" for reading"), fn);
2200 new_name = xconcatenated_filename (dir, fn, NULL);
2202 fp = fopen (new_name, "r");
2206 if (errno != ENOENT)
2207 error (EXIT_FAILURE, errno, _("\
2208 error while opening \"%s\" for reading"), new_name);
2212 /* Note that the NEW_NAME variable contains the actual file name
2213 and the logical file name is what is reported by xgettext. In
2214 this case NEW_NAME is set to the file which was found along the
2215 directory search path, and LOGICAL_FILE_NAME is is set to the
2216 file name which was searched for. */
2217 logical_file_name = xstrdup (fn);
2220 *logical_file_name_p = logical_file_name;
2221 *real_file_name_p = new_name;
2226 /* Language dependent format string parser.
2227 NULL if the language has no notion of format strings. */
2228 static struct formatstring_parser *current_formatstring_parser1;
2229 static struct formatstring_parser *current_formatstring_parser2;
2230 static struct formatstring_parser *current_formatstring_parser3;
2232 static struct literalstring_parser *current_literalstring_parser;
2235 extract_from_file (const char *file_name, extractor_ty extractor,
2236 msgdomain_list_ty *mdlp)
2238 char *logical_file_name;
2239 char *real_file_name;
2240 FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
2242 /* Set the default for the source file encoding. May be overridden by
2243 the extractor function. */
2244 xgettext_current_source_encoding = xgettext_global_source_encoding;
2246 xgettext_current_source_iconv = xgettext_global_source_iconv;
2249 current_formatstring_parser1 = extractor.formatstring_parser1;
2250 current_formatstring_parser2 = extractor.formatstring_parser2;
2251 current_formatstring_parser3 = extractor.formatstring_parser3;
2252 current_literalstring_parser = extractor.literalstring_parser;
2253 extractor.func (fp, real_file_name, logical_file_name, extractor.flag_table,
2258 free (logical_file_name);
2259 free (real_file_name);
2263 xgettext_its_extract_callback (message_list_ty *mlp,
2264 const char *msgctxt,
2267 const char *extracted_comment,
2269 enum its_whitespace_type_ty whitespace)
2271 message_ty *message;
2273 message = remember_a_message (mlp,
2274 msgctxt == NULL ? NULL : xstrdup (msgctxt),
2277 extracted_comment, NULL);
2279 if (add_itstool_comments)
2281 char *dot = xasprintf ("(itstool) path: %s", marker);
2282 message_comment_dot_append (message, dot);
2285 if (whitespace == ITS_WHITESPACE_PRESERVE)
2286 message->do_wrap = no;
2293 extract_from_xml_file (const char *file_name,
2294 its_rule_list_ty *rules,
2295 msgdomain_list_ty *mdlp)
2297 char *logical_file_name;
2298 char *real_file_name;
2299 FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
2301 /* The default encoding for XML is UTF-8. It can be overridden by
2302 an XML declaration in the XML file itself, not through the
2303 --from-code option. */
2304 xgettext_current_source_encoding = po_charset_utf8;
2307 xgettext_current_source_iconv = xgettext_global_source_iconv;
2310 its_rule_list_extract (rules, fp, real_file_name, logical_file_name,
2313 xgettext_its_extract_callback);
2317 free (logical_file_name);
2318 free (real_file_name);
2323 /* Error message about non-ASCII character in a specific lexical context. */
2325 non_ascii_error_message (lexical_context_ty lcontext,
2326 const char *file_name, size_t line_number)
2331 if (line_number == (size_t)(-1))
2334 sprintf (buffer, ":%ld", (long) line_number);
2340 xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer);
2344 xasprintf (_("Non-ASCII comment at or before %s%s."),
2349 xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer);
2357 /* Convert the given string from xgettext_current_source_encoding to
2358 the output file encoding (i.e. ASCII or UTF-8).
2359 The resulting string is either the argument string, or freshly allocated.
2360 The file_name and line_number are only used for error message purposes. */
2362 from_current_source_encoding (const char *string,
2363 lexical_context_ty lcontext,
2364 const char *file_name, size_t line_number)
2366 if (xgettext_current_source_encoding == po_charset_ascii)
2368 if (!is_ascii_string (string))
2370 multiline_error (xstrdup (""),
2371 xasprintf ("%s\n%s\n",
2372 non_ascii_error_message (lcontext,
2376 Please specify the source encoding through --from-code.")));
2377 exit (EXIT_FAILURE);
2380 else if (xgettext_current_source_encoding != po_charset_utf8)
2383 struct conversion_context context;
2385 context.from_code = xgettext_current_source_encoding;
2386 context.to_code = po_charset_utf8;
2387 context.from_filename = file_name;
2388 context.message = NULL;
2390 string = convert_string_directly (xgettext_current_source_iconv, string,
2393 /* If we don't have iconv(), the only supported values for
2394 xgettext_global_source_encoding and thus also for
2395 xgettext_current_source_encoding are ASCII and UTF-8.
2396 convert_string_directly() should not be called in this case. */
2401 return (char *) string;
2404 #define CONVERT_STRING(string, lcontext) \
2405 string = from_current_source_encoding (string, lcontext, pos->file_name, \
2409 /* Update the is_format[] flags depending on the information given in the
2412 set_format_flags_from_context (enum is_format is_format[NFORMATS],
2413 flag_context_ty context, const char *string,
2414 lex_pos_ty *pos, const char *pretty_msgstr)
2418 if (context.is_format1 != undecided
2419 || context.is_format2 != undecided
2420 || context.is_format3 != undecided)
2421 for (i = 0; i < NFORMATS; i++)
2423 if (is_format[i] == undecided)
2425 if (formatstring_parsers[i] == current_formatstring_parser1
2426 && context.is_format1 != undecided)
2427 is_format[i] = (enum is_format) context.is_format1;
2428 if (formatstring_parsers[i] == current_formatstring_parser2
2429 && context.is_format2 != undecided)
2430 is_format[i] = (enum is_format) context.is_format2;
2431 if (formatstring_parsers[i] == current_formatstring_parser3
2432 && context.is_format3 != undecided)
2433 is_format[i] = (enum is_format) context.is_format3;
2435 if (possible_format_p (is_format[i]))
2437 struct formatstring_parser *parser = formatstring_parsers[i];
2438 char *invalid_reason = NULL;
2439 void *descr = parser->parse (string, false, NULL, &invalid_reason);
2442 parser->free (descr);
2445 /* The string is not a valid format string. */
2446 if (is_format[i] != possible)
2450 error_with_progname = false;
2451 if (pos->line_number == (size_t)(-1))
2454 sprintf (buffer, ":%ld", (long) pos->line_number);
2455 multiline_warning (xasprintf (_("%s%s: warning: "),
2456 pos->file_name, buffer),
2457 xasprintf (is_format[i] == yes_according_to_context
2458 ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n")
2459 : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
2461 format_language_pretty[i],
2463 error_with_progname = true;
2466 is_format[i] = impossible;
2467 free (invalid_reason);
2475 warn_format_string (enum is_format is_format[NFORMATS], const char *string,
2476 lex_pos_ty *pos, const char *pretty_msgstr)
2478 if (possible_format_p (is_format[format_python])
2479 && get_python_format_unnamed_arg_count (string) > 1)
2483 error_with_progname = false;
2484 if (pos->line_number == (size_t)(-1))
2487 sprintf (buffer, ":%ld", (long) pos->line_number);
2488 multiline_warning (xasprintf (_("%s%s: warning: "),
2489 pos->file_name, buffer),
2491 '%s' format string with unnamed arguments cannot be properly localized:\n\
2492 The translator cannot reorder the arguments.\n\
2493 Please consider using a format string with named arguments,\n\
2494 and a mapping instead of a tuple for the arguments.\n"),
2496 error_with_progname = true;
2502 remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
2503 flag_context_ty context, lex_pos_ty *pos,
2504 const char *extracted_comment,
2505 refcounted_string_list_ty *comment)
2507 enum is_format is_format[NFORMATS];
2508 struct argument_range range;
2509 enum is_wrap do_wrap;
2510 enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
2515 /* See whether we shall exclude this message. */
2516 if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
2518 /* Tell the lexer to reset its comment buffer, so that the next
2519 message gets the correct comments. */
2520 xgettext_comment_reset ();
2521 savable_comment_reset ();
2523 if (msgctxt != NULL)
2530 savable_comment_to_xgettext_comment (comment);
2532 for (i = 0; i < NFORMATS; i++)
2533 is_format[i] = undecided;
2536 do_wrap = undecided;
2537 for (i = 0; i < NSYNTAXCHECKS; i++)
2538 do_syntax_check[i] = undecided;
2540 if (msgctxt != NULL)
2541 CONVERT_STRING (msgctxt, lc_string);
2542 CONVERT_STRING (msgid, lc_string);
2544 if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
2548 error_with_progname = false;
2549 if (pos->line_number == (size_t)(-1))
2552 sprintf (buffer, ":%ld", (long) pos->line_number);
2553 multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
2556 Empty msgid. It is reserved by GNU gettext:\n\
2557 gettext(\"\") returns the header entry with\n\
2558 meta information, not the empty string.\n")));
2559 error_with_progname = true;
2562 /* See if we have seen this message before. */
2563 mp = message_list_search (mlp, msgctxt, msgid);
2566 if (msgctxt != NULL)
2569 for (i = 0; i < NFORMATS; i++)
2570 is_format[i] = mp->is_format[i];
2571 do_wrap = mp->do_wrap;
2572 for (i = 0; i < NSYNTAXCHECKS; i++)
2573 do_syntax_check[i] = mp->do_syntax_check[i];
2577 /* Construct the msgstr from the prefix and suffix, otherwise use the
2580 msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix);
2584 /* Allocate a new message and append the message to the list. */
2585 mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1,
2587 /* Do not free msgctxt and msgid. */
2588 message_list_append (mlp, mp);
2591 /* Determine whether the context specifies that the msgid is a format
2593 set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
2595 /* Ask the lexer for the comments it has seen. */
2597 size_t nitems_before;
2598 size_t nitems_after;
2600 bool add_all_remaining_comments;
2601 /* The string before the comment tag. For example, If "** TRANSLATORS:"
2602 is seen and the comment tag is "TRANSLATORS:",
2603 then comment_tag_prefix is set to "** ". */
2604 const char *comment_tag_prefix = "";
2605 size_t comment_tag_prefix_length = 0;
2607 nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2609 if (extracted_comment != NULL)
2611 char *copy = xstrdup (extracted_comment);
2615 while (*rest != '\0')
2617 char *newline = strchr (rest, '\n');
2619 if (newline != NULL)
2622 message_comment_dot_append (mp, rest);
2627 message_comment_dot_append (mp, rest);
2634 add_all_remaining_comments = add_all_comments;
2637 const char *s = xgettext_comment (j);
2642 CONVERT_STRING (s, lc_comment);
2644 /* To reduce the possibility of unwanted matches we do a two
2645 step match: the line must contain 'xgettext:' and one of
2646 the possible format description strings. */
2647 if ((t = c_strstr (s, "xgettext:")) != NULL)
2650 enum is_format tmp_format[NFORMATS];
2651 struct argument_range tmp_range;
2652 enum is_wrap tmp_wrap;
2653 enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS];
2656 t += strlen ("xgettext:");
2658 po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
2659 &tmp_wrap, tmp_syntax_check);
2661 interesting = false;
2662 for (i = 0; i < NFORMATS; i++)
2663 if (tmp_format[i] != undecided)
2665 is_format[i] = tmp_format[i];
2668 if (has_range_p (tmp_range))
2673 if (tmp_wrap != undecided)
2678 for (i = 0; i < NSYNTAXCHECKS; i++)
2679 if (tmp_syntax_check[i] != undecided)
2681 do_syntax_check[i] = tmp_syntax_check[i];
2685 /* If the "xgettext:" marker was followed by an interesting
2686 keyword, and we updated our is_format/do_wrap variables,
2687 we don't print the comment as a #. comment. */
2692 if (!add_all_remaining_comments && comment_tag != NULL)
2694 /* When the comment tag is seen, it drags in not only the line
2695 which it starts, but all remaining comment lines. */
2696 if ((t = c_strstr (s, comment_tag)) != NULL)
2698 add_all_remaining_comments = true;
2699 comment_tag_prefix = s;
2700 comment_tag_prefix_length = t - s;
2704 if (add_all_remaining_comments)
2706 if (strncmp (s, comment_tag_prefix, comment_tag_prefix_length) == 0)
2707 s += comment_tag_prefix_length;
2708 message_comment_dot_append (mp, s);
2712 nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2714 /* Don't add the comments if they are a repetition of the tail of the
2715 already present comments. This avoids unneeded duplication if the
2716 same message appears several times, each time with the same comment. */
2717 if (nitems_before < nitems_after)
2719 size_t added = nitems_after - nitems_before;
2721 if (added <= nitems_before)
2723 bool repeated = true;
2725 for (i = 0; i < added; i++)
2726 if (strcmp (mp->comment_dot->item[nitems_before - added + i],
2727 mp->comment_dot->item[nitems_before + i]) != 0)
2735 for (i = 0; i < added; i++)
2736 free ((char *) mp->comment_dot->item[nitems_before + i]);
2737 mp->comment_dot->nitems = nitems_before;
2743 /* If it is not already decided, through programmer comments, whether the
2744 msgid is a format string, examine the msgid. This is a heuristic. */
2745 for (i = 0; i < NFORMATS; i++)
2747 if (is_format[i] == undecided
2748 && (formatstring_parsers[i] == current_formatstring_parser1
2749 || formatstring_parsers[i] == current_formatstring_parser2
2750 || formatstring_parsers[i] == current_formatstring_parser3)
2751 /* But avoid redundancy: objc-format is stronger than c-format. */
2752 && !(i == format_c && possible_format_p (is_format[format_objc]))
2753 && !(i == format_objc && possible_format_p (is_format[format_c]))
2754 /* Avoid flagging a string as c-format when it's known to be a
2755 qt-format or qt-plural-format or kde-format or boost-format
2758 && (possible_format_p (is_format[format_qt])
2759 || possible_format_p (is_format[format_qt_plural])
2760 || possible_format_p (is_format[format_kde])
2761 || possible_format_p (is_format[format_kde_kuit])
2762 || possible_format_p (is_format[format_boost])))
2763 /* Avoid flagging a string as kde-format when it's known to
2764 be a kde-kuit-format string. */
2765 && !(i == format_kde
2766 && possible_format_p (is_format[format_kde_kuit]))
2767 /* Avoid flagging a string as kde-kuit-format when it's
2768 known to be a kde-format string. Note that this relies
2769 on the fact that format_kde < format_kde_kuit, so a
2770 string will be marked as kde-format if both are
2772 && !(i == format_kde_kuit
2773 && possible_format_p (is_format[format_kde])))
2775 struct formatstring_parser *parser = formatstring_parsers[i];
2776 char *invalid_reason = NULL;
2777 void *descr = parser->parse (mp->msgid, false, NULL, &invalid_reason);
2781 /* msgid is a valid format string. We mark only those msgids
2782 as format strings which contain at least one format directive
2783 and thus are format strings with a high probability. We
2784 don't mark strings without directives as format strings,
2785 because that would force the programmer to add
2786 "xgettext: no-c-format" anywhere where a translator wishes
2787 to use a percent sign. So, the msgfmt checking will not be
2788 perfect. Oh well. */
2789 if (parser->get_number_of_directives (descr) > 0
2790 && !(parser->is_unlikely_intentional != NULL
2791 && parser->is_unlikely_intentional (descr)))
2792 is_format[i] = possible;
2794 parser->free (descr);
2798 /* msgid is not a valid format string. */
2799 is_format[i] = impossible;
2800 free (invalid_reason);
2803 mp->is_format[i] = is_format[i];
2806 if (has_range_p (range))
2808 if (has_range_p (mp->range))
2810 if (range.min < mp->range.min)
2811 mp->range.min = range.min;
2812 if (range.max > mp->range.max)
2813 mp->range.max = range.max;
2819 mp->do_wrap = do_wrap == no ? no : yes; /* By default we wrap. */
2821 for (i = 0; i < NSYNTAXCHECKS; i++)
2823 if (do_syntax_check[i] == undecided)
2824 do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no;
2826 mp->do_syntax_check[i] = do_syntax_check[i];
2829 /* Warn about the use of non-reorderable format strings when the programming
2830 language also provides reorderable format strings. */
2831 warn_format_string (is_format, mp->msgid, pos, "msgid");
2833 /* Remember where we saw this msgid. */
2834 message_comment_filepos (mp, pos->file_name, pos->line_number);
2836 /* Tell the lexer to reset its comment buffer, so that the next
2837 message gets the correct comments. */
2838 xgettext_comment_reset ();
2839 savable_comment_reset ();
2846 remember_a_message_plural (message_ty *mp, char *string,
2847 flag_context_ty context, lex_pos_ty *pos,
2848 refcounted_string_list_ty *comment)
2856 msgid_plural = string;
2858 savable_comment_to_xgettext_comment (comment);
2860 CONVERT_STRING (msgid_plural, lc_string);
2862 /* See if the message is already a plural message. */
2863 if (mp->msgid_plural == NULL)
2865 mp->msgid_plural = msgid_plural;
2867 /* Construct the first plural form from the prefix and suffix,
2868 otherwise use the empty string. The translator will have to
2869 provide additional plural forms. */
2872 xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix);
2875 msgstr1_len = strlen (msgstr1) + 1;
2876 msgstr = XNMALLOC (mp->msgstr_len + msgstr1_len, char);
2877 memcpy (msgstr, mp->msgstr, mp->msgstr_len);
2878 memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
2879 mp->msgstr = msgstr;
2880 mp->msgstr_len = mp->msgstr_len + msgstr1_len;
2884 /* Determine whether the context specifies that the msgid_plural is a
2886 set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
2887 pos, "msgid_plural");
2889 /* If it is not already decided, through programmer comments or
2890 the msgid, whether the msgid is a format string, examine the
2891 msgid_plural. This is a heuristic. */
2892 for (i = 0; i < NFORMATS; i++)
2893 if ((formatstring_parsers[i] == current_formatstring_parser1
2894 || formatstring_parsers[i] == current_formatstring_parser2
2895 || formatstring_parsers[i] == current_formatstring_parser3)
2896 && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
2897 /* But avoid redundancy: objc-format is stronger than c-format. */
2899 && possible_format_p (mp->is_format[format_objc]))
2900 && !(i == format_objc
2901 && possible_format_p (mp->is_format[format_c]))
2902 /* Avoid flagging a string as c-format when it's known to be a
2903 qt-format or qt-plural-format or boost-format string. */
2905 && (possible_format_p (mp->is_format[format_qt])
2906 || possible_format_p (mp->is_format[format_qt_plural])
2907 || possible_format_p (mp->is_format[format_kde])
2908 || possible_format_p (mp->is_format[format_kde_kuit])
2909 || possible_format_p (mp->is_format[format_boost])))
2910 /* Avoid flagging a string as kde-format when it's known
2911 to be a kde-kuit-format string. */
2912 && !(i == format_kde
2913 && possible_format_p (mp->is_format[format_kde_kuit]))
2914 /* Avoid flagging a string as kde-kuit-format when it's
2915 known to be a kde-format string. Note that this relies
2916 on the fact that format_kde < format_kde_kuit, so a
2917 string will be marked as kde-format if both are
2919 && !(i == format_kde_kuit
2920 && possible_format_p (mp->is_format[format_kde])))
2922 struct formatstring_parser *parser = formatstring_parsers[i];
2923 char *invalid_reason = NULL;
2925 parser->parse (mp->msgid_plural, false, NULL, &invalid_reason);
2929 /* Same heuristic as in remember_a_message. */
2930 if (parser->get_number_of_directives (descr) > 0
2931 && !(parser->is_unlikely_intentional != NULL
2932 && parser->is_unlikely_intentional (descr)))
2933 mp->is_format[i] = possible;
2935 parser->free (descr);
2939 /* msgid_plural is not a valid format string. */
2940 mp->is_format[i] = impossible;
2941 free (invalid_reason);
2945 /* Warn about the use of non-reorderable format strings when the programming
2946 language also provides reorderable format strings. */
2947 warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural");
2950 free (msgid_plural);
2952 /* Tell the lexer to reset its comment buffer, so that the next
2953 message gets the correct comments. */
2954 xgettext_comment_reset ();
2955 savable_comment_reset ();
2959 struct arglist_parser *
2960 arglist_parser_alloc (message_list_ty *mlp, const struct callshapes *shapes)
2962 if (shapes == NULL || shapes->nshapes == 0)
2964 struct arglist_parser *ap =
2965 (struct arglist_parser *)
2966 xmalloc (offsetof (struct arglist_parser, alternative[0]));
2970 ap->keyword_len = 0;
2971 ap->nalternatives = 0;
2977 struct arglist_parser *ap =
2978 (struct arglist_parser *)
2979 xmalloc (xsum (sizeof (struct arglist_parser),
2980 xtimes (shapes->nshapes - 1,
2981 sizeof (struct partial_call))));
2985 ap->keyword = shapes->keyword;
2986 ap->keyword_len = shapes->keyword_len;
2987 ap->nalternatives = shapes->nshapes;
2988 for (i = 0; i < shapes->nshapes; i++)
2990 ap->alternative[i].argnumc = shapes->shapes[i].argnumc;
2991 ap->alternative[i].argnum1 = shapes->shapes[i].argnum1;
2992 ap->alternative[i].argnum2 = shapes->shapes[i].argnum2;
2993 ap->alternative[i].argnum1_glib_context =
2994 shapes->shapes[i].argnum1_glib_context;
2995 ap->alternative[i].argnum2_glib_context =
2996 shapes->shapes[i].argnum2_glib_context;
2997 ap->alternative[i].argtotal = shapes->shapes[i].argtotal;
2998 ap->alternative[i].xcomments = shapes->shapes[i].xcomments;
2999 ap->alternative[i].msgctxt = NULL;
3000 ap->alternative[i].msgctxt_escape = LET_NONE;
3001 ap->alternative[i].msgctxt_pos.file_name = NULL;
3002 ap->alternative[i].msgctxt_pos.line_number = (size_t)(-1);
3003 ap->alternative[i].msgid = NULL;
3004 ap->alternative[i].msgid_escape = LET_NONE;
3005 ap->alternative[i].msgid_context = null_context;
3006 ap->alternative[i].msgid_pos.file_name = NULL;
3007 ap->alternative[i].msgid_pos.line_number = (size_t)(-1);
3008 ap->alternative[i].msgid_comment = NULL;
3009 ap->alternative[i].msgid_plural = NULL;
3010 ap->alternative[i].msgid_plural_escape = LET_NONE;
3011 ap->alternative[i].msgid_plural_context = null_context;
3012 ap->alternative[i].msgid_plural_pos.file_name = NULL;
3013 ap->alternative[i].msgid_plural_pos.line_number = (size_t)(-1);
3021 struct arglist_parser *
3022 arglist_parser_clone (struct arglist_parser *ap)
3024 struct arglist_parser *copy =
3025 (struct arglist_parser *)
3026 xmalloc (xsum (sizeof (struct arglist_parser) - sizeof (struct partial_call),
3027 xtimes (ap->nalternatives, sizeof (struct partial_call))));
3030 copy->mlp = ap->mlp;
3031 copy->keyword = ap->keyword;
3032 copy->keyword_len = ap->keyword_len;
3033 copy->nalternatives = ap->nalternatives;
3034 for (i = 0; i < ap->nalternatives; i++)
3036 const struct partial_call *cp = &ap->alternative[i];
3037 struct partial_call *ccp = ©->alternative[i];
3039 ccp->argnumc = cp->argnumc;
3040 ccp->argnum1 = cp->argnum1;
3041 ccp->argnum2 = cp->argnum2;
3042 ccp->argnum1_glib_context = cp->argnum1_glib_context;
3043 ccp->argnum2_glib_context = cp->argnum2_glib_context;
3044 ccp->argtotal = cp->argtotal;
3045 ccp->xcomments = cp->xcomments;
3046 ccp->msgctxt = (cp->msgctxt != NULL ? xstrdup (cp->msgctxt) : NULL);
3047 ccp->msgctxt_escape = cp->msgctxt_escape;
3048 ccp->msgctxt_pos = cp->msgctxt_pos;
3049 ccp->msgid = (cp->msgid != NULL ? xstrdup (cp->msgid) : NULL);
3050 ccp->msgid_escape = cp->msgid_escape;
3051 ccp->msgid_context = cp->msgid_context;
3052 ccp->msgid_pos = cp->msgctxt_pos;
3053 ccp->msgid_comment = add_reference (cp->msgid_comment);
3055 (cp->msgid_plural != NULL ? xstrdup (cp->msgid_plural) : NULL);
3056 ccp->msgid_plural_escape = cp->msgid_plural_escape;
3057 ccp->msgid_plural_context = cp->msgid_plural_context;
3058 ccp->msgid_plural_pos = cp->msgid_plural_pos;
3066 arglist_parser_remember_literal (struct arglist_parser *ap,
3067 int argnum, char *string,
3068 flag_context_ty context,
3069 char *file_name, size_t line_number,
3070 refcounted_string_list_ty *comment,
3071 enum literalstring_escape_type type)
3073 bool stored_string = false;
3074 size_t nalternatives = ap->nalternatives;
3079 for (i = 0; i < nalternatives; i++)
3081 struct partial_call *cp = &ap->alternative[i];
3083 if (argnum == cp->argnumc)
3085 cp->msgctxt = string;
3086 cp->msgctxt_escape = type;
3087 cp->msgctxt_pos.file_name = file_name;
3088 cp->msgctxt_pos.line_number = line_number;
3089 stored_string = true;
3090 /* Mark msgctxt as done. */
3095 if (argnum == cp->argnum1)
3098 cp->msgid_escape = type;
3099 cp->msgid_context = context;
3100 cp->msgid_pos.file_name = file_name;
3101 cp->msgid_pos.line_number = line_number;
3102 cp->msgid_comment = add_reference (comment);
3103 stored_string = true;
3104 /* Mark msgid as done. */
3107 if (argnum == cp->argnum2)
3109 cp->msgid_plural = string;
3110 cp->msgid_plural_escape = type;
3111 cp->msgid_plural_context = context;
3112 cp->msgid_plural_pos.file_name = file_name;
3113 cp->msgid_plural_pos.line_number = line_number;
3114 stored_string = true;
3115 /* Mark msgid_plural as done. */
3120 /* Note: There is a memory leak here: When string was stored but is later
3121 not used by arglist_parser_done, we don't free it. */
3127 arglist_parser_remember (struct arglist_parser *ap,
3128 int argnum, char *string,
3129 flag_context_ty context,
3130 char *file_name, size_t line_number,
3131 refcounted_string_list_ty *comment)
3133 arglist_parser_remember_literal (ap, argnum, string, context,
3134 file_name, line_number,
3139 arglist_parser_decidedp (struct arglist_parser *ap, int argnum)
3143 /* Test whether all alternatives are decided.
3144 Note: A decided alternative can be complete
3145 cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3146 && cp->argtotal == 0
3147 or it can be failed if no literal strings were found at the specified
3149 cp->argnumc <= argnum && cp->argnum1 <= argnum && cp->argnum2 <= argnum
3150 or it can be failed if the number of arguments is exceeded:
3151 cp->argtotal > 0 && cp->argtotal < argnum
3153 for (i = 0; i < ap->nalternatives; i++)
3155 struct partial_call *cp = &ap->alternative[i];
3157 if (!((cp->argnumc <= argnum
3158 && cp->argnum1 <= argnum
3159 && cp->argnum2 <= argnum)
3160 || (cp->argtotal > 0 && cp->argtotal < argnum)))
3161 /* cp is still undecided. */
3169 arglist_parser_done (struct arglist_parser *ap, int argnum)
3174 /* Determine the number of complete calls. */
3176 for (i = 0; i < ap->nalternatives; i++)
3178 struct partial_call *cp = &ap->alternative[i];
3180 if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3181 && (cp->argtotal == 0 || cp->argtotal == argnum))
3187 struct partial_call *best_cp = NULL;
3188 bool ambiguous = false;
3190 /* Find complete calls where msgctxt, msgid, msgid_plural are all
3192 for (i = 0; i < ap->nalternatives; i++)
3194 struct partial_call *cp = &ap->alternative[i];
3196 if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3197 && (cp->argtotal == 0 || cp->argtotal == argnum)
3198 && cp->msgctxt != NULL
3199 && cp->msgid != NULL
3200 && cp->msgid_plural != NULL)
3202 if (best_cp != NULL)
3211 if (best_cp == NULL)
3213 struct partial_call *best_cp1 = NULL;
3214 struct partial_call *best_cp2 = NULL;
3216 /* Find complete calls where msgctxt, msgid are provided. */
3217 for (i = 0; i < ap->nalternatives; i++)
3219 struct partial_call *cp = &ap->alternative[i];
3221 if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3222 && (cp->argtotal == 0 || cp->argtotal == argnum)
3223 && cp->msgctxt != NULL
3224 && cp->msgid != NULL)
3226 if (best_cp1 != NULL)
3235 /* Find complete calls where msgid, msgid_plural are provided. */
3236 for (i = 0; i < ap->nalternatives; i++)
3238 struct partial_call *cp = &ap->alternative[i];
3240 if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3241 && (cp->argtotal == 0 || cp->argtotal == argnum)
3242 && cp->msgid != NULL
3243 && cp->msgid_plural != NULL)
3245 if (best_cp2 != NULL)
3254 if (best_cp1 != NULL)
3256 if (best_cp2 != NULL)
3258 if (best_cp != NULL)
3265 if (best_cp == NULL)
3267 /* Find complete calls where msgid is provided. */
3268 for (i = 0; i < ap->nalternatives; i++)
3270 struct partial_call *cp = &ap->alternative[i];
3272 if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3273 && (cp->argtotal == 0 || cp->argtotal == argnum)
3274 && cp->msgid != NULL)
3276 if (best_cp != NULL)
3288 error_with_progname = false;
3289 error_at_line (0, 0,
3290 best_cp->msgid_pos.file_name,
3291 best_cp->msgid_pos.line_number,
3292 _("ambiguous argument specification for keyword '%.*s'"),
3293 (int) ap->keyword_len, ap->keyword);
3294 error_with_progname = true;
3297 if (best_cp != NULL)
3299 /* best_cp indicates the best found complete call.
3300 Now call remember_a_message. */
3303 /* Split strings in the GNOME glib syntax "msgctxt|msgid". */
3304 if (best_cp->argnum1_glib_context || best_cp->argnum2_glib_context)
3305 /* split_keywordspec should not allow the context to be specified
3306 in two different ways. */
3307 if (best_cp->msgctxt != NULL)
3309 if (best_cp->argnum1_glib_context)
3311 const char *separator = strchr (best_cp->msgid, '|');
3313 if (separator == NULL)
3315 error_with_progname = false;
3316 error_at_line (0, 0,
3317 best_cp->msgid_pos.file_name,
3318 best_cp->msgid_pos.line_number,
3319 _("warning: missing context for keyword '%.*s'"),
3320 (int) ap->keyword_len, ap->keyword);
3321 error_with_progname = true;
3325 size_t ctxt_len = separator - best_cp->msgid;
3326 char *ctxt = XNMALLOC (ctxt_len + 1, char);
3328 memcpy (ctxt, best_cp->msgid, ctxt_len);
3329 ctxt[ctxt_len] = '\0';
3330 best_cp->msgctxt = ctxt;
3331 best_cp->msgid = xstrdup (separator + 1);
3334 if (best_cp->msgid_plural != NULL && best_cp->argnum2_glib_context)
3336 const char *separator = strchr (best_cp->msgid_plural, '|');
3338 if (separator == NULL)
3340 error_with_progname = false;
3341 error_at_line (0, 0,
3342 best_cp->msgid_plural_pos.file_name,
3343 best_cp->msgid_plural_pos.line_number,
3344 _("warning: missing context for plural argument of keyword '%.*s'"),
3345 (int) ap->keyword_len, ap->keyword);
3346 error_with_progname = true;
3350 size_t ctxt_len = separator - best_cp->msgid_plural;
3351 char *ctxt = XNMALLOC (ctxt_len + 1, char);
3353 memcpy (ctxt, best_cp->msgid_plural, ctxt_len);
3354 ctxt[ctxt_len] = '\0';
3355 if (best_cp->msgctxt == NULL)
3356 best_cp->msgctxt = ctxt;
3359 if (strcmp (ctxt, best_cp->msgctxt) != 0)
3361 error_with_progname = false;
3362 error_at_line (0, 0,
3363 best_cp->msgid_plural_pos.file_name,
3364 best_cp->msgid_plural_pos.line_number,
3365 _("context mismatch between singular and plural form"));
3366 error_with_progname = true;
3370 best_cp->msgid_plural = xstrdup (separator + 1);
3375 flag_context_ty msgid_context = best_cp->msgid_context;
3376 flag_context_ty msgid_plural_context = best_cp->msgid_plural_context;
3377 struct literalstring_parser *parser = current_literalstring_parser;
3378 const char *encoding;
3380 /* Special support for the 3-argument tr operator in Qt:
3381 When --qt and --keyword=tr:1,1,2c,3t are specified, add to the
3382 context the information that the argument is expeected to be a
3383 qt-plural-format. */
3384 if (recognize_format_qt
3385 && current_formatstring_parser3 == &formatstring_qt_plural
3386 && best_cp->msgid_plural == best_cp->msgid)
3388 msgid_context.is_format3 = yes_according_to_context;
3389 msgid_plural_context.is_format3 = yes_according_to_context;
3392 if (best_cp->msgctxt != NULL)
3394 if (parser != NULL && best_cp->msgctxt_escape != 0)
3397 parser->parse (best_cp->msgctxt,
3398 &best_cp->msgctxt_pos,
3399 best_cp->msgctxt_escape);
3400 free (best_cp->msgctxt);
3401 best_cp->msgctxt = msgctxt;
3405 lex_pos_ty *pos = &best_cp->msgctxt_pos;
3406 CONVERT_STRING (best_cp->msgctxt, lc_string);
3410 if (parser != NULL && best_cp->msgid_escape != 0)
3412 char *msgid = parser->parse (best_cp->msgid,
3413 &best_cp->msgid_pos,
3414 best_cp->msgid_escape);
3415 if (best_cp->msgid_plural == best_cp->msgid)
3416 best_cp->msgid_plural = msgid;
3417 free (best_cp->msgid);
3418 best_cp->msgid = msgid;
3422 lex_pos_ty *pos = &best_cp->msgid_pos;
3423 CONVERT_STRING (best_cp->msgid, lc_string);
3426 if (best_cp->msgid_plural)
3428 /* best_cp->msgid_plural may point to best_cp->msgid.
3429 In that case, it is already interpreted and converted. */
3430 if (best_cp->msgid_plural != best_cp->msgid)
3433 && best_cp->msgid_plural_escape != 0)
3435 char *msgid_plural =
3436 parser->parse (best_cp->msgid_plural,
3437 &best_cp->msgid_plural_pos,
3438 best_cp->msgid_plural_escape);
3439 free (best_cp->msgid_plural);
3440 best_cp->msgid_plural = msgid_plural;
3444 lex_pos_ty *pos = &best_cp->msgid_plural_pos;
3445 CONVERT_STRING (best_cp->msgid_plural, lc_string);
3449 /* If best_cp->msgid_plural equals to best_cp->msgid,
3450 the ownership will be transferred to
3451 remember_a_message before it is passed to
3452 remember_a_message_plural.
3454 Make a copy of the string in that case. */
3455 if (best_cp->msgid_plural == best_cp->msgid)
3456 best_cp->msgid_plural = xstrdup (best_cp->msgid);
3459 if (best_cp->msgid_comment != NULL)
3461 refcounted_string_list_ty *msgid_comment =
3462 savable_comment_convert_encoding (best_cp->msgid_comment,
3463 &best_cp->msgid_pos);
3464 drop_reference (best_cp->msgid_comment);
3465 best_cp->msgid_comment = msgid_comment;
3468 /* best_cp->msgctxt, best_cp->msgid, and best_cp->msgid_plural
3469 are already in UTF-8. Prevent further conversion in
3470 remember_a_message. */
3471 encoding = xgettext_current_source_encoding;
3472 xgettext_current_source_encoding = po_charset_utf8;
3473 mp = remember_a_message (ap->mlp, best_cp->msgctxt, best_cp->msgid,
3475 &best_cp->msgid_pos,
3476 NULL, best_cp->msgid_comment);
3477 if (mp != NULL && best_cp->msgid_plural != NULL)
3478 remember_a_message_plural (mp,
3479 best_cp->msgid_plural,
3480 msgid_plural_context,
3481 &best_cp->msgid_plural_pos,
3483 xgettext_current_source_encoding = encoding;
3486 if (best_cp->xcomments.nitems > 0)
3488 /* Add best_cp->xcomments to mp->comment_dot, unless already
3492 for (i = 0; i < best_cp->xcomments.nitems; i++)
3494 const char *xcomment = best_cp->xcomments.item[i];
3497 if (mp != NULL && mp->comment_dot != NULL)
3501 for (j = 0; j < mp->comment_dot->nitems; j++)
3502 if (strcmp (xcomment, mp->comment_dot->item[j]) == 0)
3509 message_comment_dot_append (mp, xcomment);
3516 /* No complete call was parsed. */
3517 /* Note: There is a memory leak here: When there is more than one
3518 alternative, the same string can be stored in multiple alternatives,
3519 and it's not easy to free all strings reliably. */
3520 if (ap->nalternatives == 1)
3522 if (ap->alternative[0].msgctxt != NULL)
3523 free (ap->alternative[0].msgctxt);
3524 if (ap->alternative[0].msgid != NULL)
3525 free (ap->alternative[0].msgid);
3526 if (ap->alternative[0].msgid_plural != NULL)
3527 free (ap->alternative[0].msgid_plural);
3531 for (i = 0; i < ap->nalternatives; i++)
3532 drop_reference (ap->alternative[i].msgid_comment);
3537 struct mixed_string_buffer *
3538 mixed_string_buffer_alloc (lexical_context_ty lcontext,
3539 const char *logical_file_name,
3542 struct mixed_string_buffer *bp = XMALLOC (struct mixed_string_buffer);
3543 bp->utf8_buffer = NULL;
3544 bp->utf8_buflen = 0;
3545 bp->utf8_allocated = 0;
3547 bp->curr_buffer = NULL;
3548 bp->curr_buflen = 0;
3549 bp->curr_allocated = 0;
3550 bp->lcontext = lcontext;
3551 bp->logical_file_name = logical_file_name;
3552 bp->line_number = line_number;
3556 /* Auxiliary function: Append a byte to bp->curr. */
3558 mixed_string_buffer_append_to_curr_buffer (struct mixed_string_buffer *bp,
3561 if (bp->curr_buflen == bp->curr_allocated)
3563 bp->curr_allocated = 2 * bp->curr_allocated + 10;
3564 bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
3566 bp->curr_buffer[bp->curr_buflen++] = c;
3569 /* Auxiliary function: Ensure count more bytes are available in bp->utf8. */
3571 mixed_string_buffer_grow_utf8_buffer (struct mixed_string_buffer *bp,
3574 if (bp->utf8_buflen + count > bp->utf8_allocated)
3576 size_t new_allocated = 2 * bp->utf8_allocated + 10;
3577 if (new_allocated < bp->utf8_buflen + count)
3578 new_allocated = bp->utf8_buflen + count;
3579 bp->utf8_allocated = new_allocated;
3580 bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
3584 /* Auxiliary function: Append a Unicode character to bp->utf8.
3585 uc must be < 0x110000. */
3587 mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
3590 unsigned char utf8buf[6];
3591 int count = u8_uctomb (utf8buf, uc, 6);
3594 /* The caller should have ensured that uc is not out-of-range. */
3597 mixed_string_buffer_grow_utf8_buffer (bp, count);
3598 memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
3599 bp->utf8_buflen += count;
3602 /* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer. */
3604 mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
3606 if (bp->utf16_surr != 0)
3608 /* A half surrogate is invalid, therefore use U+FFFD instead. */
3609 mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
3614 /* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer. */
3616 mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp,
3619 if (bp->curr_buflen > 0)
3624 mixed_string_buffer_append_to_curr_buffer (bp, '\0');
3626 /* Convert from the source encoding to UTF-8. */
3627 curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
3628 bp->logical_file_name,
3631 /* Append it to bp->utf8_buffer. */
3632 count = strlen (curr);
3633 mixed_string_buffer_grow_utf8_buffer (bp, count);
3634 memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
3635 bp->utf8_buflen += count;
3637 if (curr != bp->curr_buffer)
3639 bp->curr_buflen = 0;
3644 mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c)
3646 /* Switch from Unicode character mode to multibyte character mode. */
3647 mixed_string_buffer_flush_utf16_surr (bp);
3649 /* When a newline is seen, convert the accumulated multibyte sequence.
3650 This ensures a correct line number in the error message in case of
3651 a conversion error. The "- 1" is to account for the newline. */
3653 mixed_string_buffer_flush_curr_buffer (bp, bp->line_number - 1);
3655 mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
3659 mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
3661 /* Switch from multibyte character mode to Unicode character mode. */
3662 mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
3664 /* Test whether this character and the previous one form a Unicode
3665 surrogate character pair. */
3666 if (bp->utf16_surr != 0 && (c >= 0xdc00 && c < 0xe000))
3668 unsigned short utf16buf[2];
3671 utf16buf[0] = bp->utf16_surr;
3673 if (u16_mbtouc (&uc, utf16buf, 2) != 2)
3676 mixed_string_buffer_append_to_utf8_buffer (bp, uc);
3681 mixed_string_buffer_flush_utf16_surr (bp);
3683 if (c >= 0xd800 && c < 0xdc00)
3685 else if (c >= 0xdc00 && c < 0xe000)
3687 /* A half surrogate is invalid, therefore use U+FFFD instead. */
3688 mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
3691 mixed_string_buffer_append_to_utf8_buffer (bp, c);
3696 mixed_string_buffer_done (struct mixed_string_buffer *bp)
3700 /* Flush all into bp->utf8_buffer. */
3701 mixed_string_buffer_flush_utf16_surr (bp);
3702 mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
3703 /* NUL-terminate it. */
3704 mixed_string_buffer_grow_utf8_buffer (bp, 1);
3705 bp->utf8_buffer[bp->utf8_buflen] = '\0';
3707 /* Free curr_buffer and bp itself. */
3708 utf8_buffer = bp->utf8_buffer;
3709 free (bp->curr_buffer);
3720 char *project_id_version;
3726 static lex_pos_ty pos = { __FILE__, __LINE__ };
3728 if (package_name != NULL)
3730 if (package_version != NULL)
3731 project_id_version = xasprintf ("%s %s", package_name, package_version);
3733 project_id_version = xasprintf ("%s", package_name);
3736 project_id_version = xstrdup ("PACKAGE VERSION");
3738 if (msgid_bugs_address != NULL && msgid_bugs_address[0] == '\0')
3739 multiline_warning (xasprintf (_("warning: ")),
3741 The option --msgid-bugs-address was not specified.\n\
3742 If you are using a 'Makevars' file, please specify\n\
3743 the MSGID_BUGS_ADDRESS variable there; otherwise please\n\
3744 specify an --msgid-bugs-address command line option.\n\
3748 timestring = po_strftime (&now);
3750 msgstr = xasprintf ("\
3751 Project-Id-Version: %s\n\
3752 Report-Msgid-Bugs-To: %s\n\
3753 POT-Creation-Date: %s\n\
3754 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n\
3755 Last-Translator: FULL NAME <EMAIL@ADDRESS>\n\
3756 Language-Team: LANGUAGE <LL@li.org>\n\
3758 MIME-Version: 1.0\n\
3759 Content-Type: text/plain; charset=CHARSET\n\
3760 Content-Transfer-Encoding: 8bit\n",
3762 msgid_bugs_address != NULL ? msgid_bugs_address : "",
3765 free (project_id_version);
3767 mp = message_alloc (NULL, "", NULL, msgstr, strlen (msgstr) + 1, &pos);
3769 if (copyright_holder[0] != '\0')
3770 comment = xasprintf ("\
3771 SOME DESCRIPTIVE TITLE.\n\
3772 Copyright (C) YEAR %s\n\
3773 This file is distributed under the same license as the %s package.\n\
3774 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n",
3776 package_name != NULL ? package_name : "PACKAGE");
3778 comment = xstrdup ("\
3779 SOME DESCRIPTIVE TITLE.\n\
3780 This file is put in the public domain.\n\
3781 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
3782 message_comment_append (mp, comment);
3785 mp->is_fuzzy = true;
3791 finalize_header (msgdomain_list_ty *mdlp)
3793 /* If the generated PO file has plural forms, add a Plural-Forms template
3794 to the constructed header. */
3800 for (i = 0; i < mdlp->nitems; i++)
3802 message_list_ty *mlp = mdlp->item[i]->messages;
3804 for (j = 0; j < mlp->nitems; j++)
3806 message_ty *mp = mlp->item[j];
3808 if (mp->msgid_plural != NULL)
3820 message_ty *header =
3821 message_list_search (mdlp->item[0]->messages, NULL, "");
3823 && c_strstr (header->msgstr, "Plural-Forms:") == NULL)
3825 size_t insertpos = strlen (header->msgstr);
3830 suffix = "\nPlural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n";
3831 if (insertpos == 0 || header->msgstr[insertpos-1] == '\n')
3833 suffix_len = strlen (suffix);
3834 new_msgstr = XNMALLOC (header->msgstr_len + suffix_len, char);
3835 memcpy (new_msgstr, header->msgstr, insertpos);
3836 memcpy (new_msgstr + insertpos, suffix, suffix_len);
3837 memcpy (new_msgstr + insertpos + suffix_len,
3838 header->msgstr + insertpos,
3839 header->msgstr_len - insertpos);
3840 header->msgstr = new_msgstr;
3841 header->msgstr_len = header->msgstr_len + suffix_len;
3846 /* If not all the strings were plain ASCII, or if the output syntax
3847 requires a charset conversion, set the charset in the header to UTF-8.
3848 All messages have already been converted to UTF-8 in remember_a_message
3849 and remember_a_message_plural. */
3851 bool has_nonascii = false;
3854 for (i = 0; i < mdlp->nitems; i++)
3856 message_list_ty *mlp = mdlp->item[i]->messages;
3858 if (!is_ascii_message_list (mlp))
3859 has_nonascii = true;
3862 if (has_nonascii || output_syntax->requires_utf8)
3864 message_list_ty *mlp = mdlp->item[0]->messages;
3866 iconv_message_list (mlp, po_charset_utf8, po_charset_utf8, NULL);
3873 language_to_extractor (const char *name)
3878 extractor_func func;
3879 flag_context_list_table_ty *flag_table;
3880 struct formatstring_parser *formatstring_parser1;
3881 struct formatstring_parser *formatstring_parser2;
3882 struct literalstring_parser *literalstring_parser;
3884 typedef struct table_ty table_ty;
3886 static table_ty table[] =
3905 SCANNERS_STRINGTABLE
3914 /* Here may follow more languages and their scanners: pike, etc...
3915 Make sure new scanners honor the --exclude-file option. */
3920 for (tp = table; tp < ENDOF(table); ++tp)
3921 if (c_strcasecmp (name, tp->name) == 0)
3923 extractor_ty result;
3925 result.func = tp->func;
3926 result.flag_table = tp->flag_table;
3927 result.formatstring_parser1 = tp->formatstring_parser1;
3928 result.formatstring_parser2 = tp->formatstring_parser2;
3929 result.formatstring_parser3 = NULL;
3930 result.literalstring_parser = tp->literalstring_parser;
3932 /* Handle --qt. It's preferrable to handle this facility here rather
3933 than through an option --language=C++/Qt because the latter would
3934 conflict with the language "C++" regarding the file extensions. */
3935 if (recognize_format_qt && strcmp (tp->name, "C++") == 0)
3937 result.flag_table = &flag_table_cxx_qt;
3938 result.formatstring_parser2 = &formatstring_qt;
3939 result.formatstring_parser3 = &formatstring_qt_plural;
3941 /* Likewise for --kde. */
3942 if (recognize_format_kde && strcmp (tp->name, "C++") == 0)
3944 result.flag_table = &flag_table_cxx_kde;
3945 result.formatstring_parser2 = &formatstring_kde;
3946 result.formatstring_parser3 = &formatstring_kde_kuit;
3948 /* Likewise for --boost. */
3949 if (recognize_format_boost && strcmp (tp->name, "C++") == 0)
3951 result.flag_table = &flag_table_cxx_boost;
3952 result.formatstring_parser2 = &formatstring_boost;
3958 error (EXIT_FAILURE, 0, _("language '%s' unknown"), name);
3961 extractor_ty result = { NULL, NULL, NULL, NULL };
3968 extension_to_language (const char *extension)
3972 const char *extension;
3973 const char *language;
3975 typedef struct table_ty table_ty;
3977 static table_ty table[] =
3987 EXTENSIONS_SMALLTALK
3989 EXTENSIONS_PROPERTIES
3996 EXTENSIONS_STRINGTABLE
4000 EXTENSIONS_JAVASCRIPT
4002 EXTENSIONS_GSETTINGS
4005 /* Here may follow more file extensions... */
4010 for (tp = table; tp < ENDOF(table); ++tp)
4011 if (strcmp (extension, tp->extension) == 0)
4012 return tp->language;