1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2010, 2012, 2015 Free Software
4 This file was written by Peter Miller <millerp@canb.auug.org.au>
6 This program is free software: you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>. */
33 #include "error-progname.h"
35 #include "relocatable.h"
38 #include "read-catalog.h"
40 #include "read-properties.h"
41 #include "read-stringtable.h"
43 #include "po-charset.h"
44 #include "msgl-iconv.h"
45 #include "msgl-fsearch.h"
47 #include "c-strcase.h"
48 #include "propername.h"
51 #define _(str) gettext (str)
54 /* Apply the .pot file to each of the domains in the PO file. */
55 static bool multi_domain_mode = false;
57 /* Determines whether to use fuzzy matching. */
58 static bool use_fuzzy_matching = true;
60 /* Whether to consider fuzzy messages as translations. */
61 static bool include_fuzzies = false;
63 /* Whether to consider untranslated messages as translations. */
64 static bool include_untranslated = false;
67 static const struct option long_options[] =
69 { "directory", required_argument, NULL, 'D' },
70 { "help", no_argument, NULL, 'h' },
71 { "multi-domain", no_argument, NULL, 'm' },
72 { "no-fuzzy-matching", no_argument, NULL, 'N' },
73 { "properties-input", no_argument, NULL, 'P' },
74 { "stringtable-input", no_argument, NULL, CHAR_MAX + 1 },
75 { "use-fuzzy", no_argument, NULL, CHAR_MAX + 2 },
76 { "use-untranslated", no_argument, NULL, CHAR_MAX + 3 },
77 { "version", no_argument, NULL, 'V' },
82 /* Forward declaration of local functions. */
83 static void usage (int status)
84 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
85 __attribute__ ((noreturn))
88 static void compare (const char *fn1, const char *fn2,
89 catalog_input_format_ty input_syntax);
93 main (int argc, char *argv[])
98 catalog_input_format_ty input_syntax = &input_format_po;
100 /* Set program name for messages. */
101 set_program_name (argv[0]);
102 error_print_progname = maybe_print_progname;
103 gram_max_allowed_errors = UINT_MAX;
105 #ifdef HAVE_SETLOCALE
106 /* Set locale via LC_ALL. */
107 setlocale (LC_ALL, "");
110 /* Set the text message domain. */
111 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
112 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
113 textdomain (PACKAGE);
115 /* Ensure that write errors on stdout are detected. */
116 atexit (close_stdout);
120 while ((optchar = getopt_long (argc, argv, "D:hmNPV", long_options, NULL))
124 case '\0': /* long option */
128 dir_list_append (optarg);
136 multi_domain_mode = true;
140 use_fuzzy_matching = false;
144 input_syntax = &input_format_properties;
151 case CHAR_MAX + 1: /* --stringtable-input */
152 input_syntax = &input_format_stringtable;
155 case CHAR_MAX + 2: /* --use-fuzzy */
156 include_fuzzies = true;
159 case CHAR_MAX + 3: /* --use-untranslated */
160 include_untranslated = true;
164 usage (EXIT_FAILURE);
168 /* Version information is requested. */
171 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
172 /* xgettext: no-wrap */
173 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
174 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
175 This is free software: you are free to change and redistribute it.\n\
176 There is NO WARRANTY, to the extent permitted by law.\n\
178 "1995-1998, 2000-2010");
179 printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
183 /* Help is requested. */
185 usage (EXIT_SUCCESS);
187 /* Test whether we have an .po file name as argument. */
190 error (EXIT_SUCCESS, 0, _("no input files given"));
191 usage (EXIT_FAILURE);
193 if (optind + 2 != argc)
195 error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
196 usage (EXIT_FAILURE);
199 /* compare the two files */
200 compare (argv[optind], argv[optind + 1], input_syntax);
205 /* Display usage information and exit. */
209 if (status != EXIT_SUCCESS)
210 fprintf (stderr, _("Try '%s --help' for more information.\n"),
215 Usage: %s [OPTION] def.po ref.pot\n\
218 /* xgettext: no-wrap */
220 Compare two Uniforum style .po files to check that both contain the same\n\
221 set of msgid strings. The def.po file is an existing PO file with the\n\
222 translations. The ref.pot file is the last created PO file, or a PO Template\n\
223 file (generally created by xgettext). This is useful for checking that\n\
224 you have translated each and every message in your program. Where an exact\n\
225 match cannot be found, fuzzy matching is used to produce better diagnostics.\n\
229 Mandatory arguments to long options are mandatory for short options too.\n"));
232 Input file location:\n"));
234 def.po translations\n"));
236 ref.pot references to the sources\n"));
238 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
241 Operation modifiers:\n"));
243 -m, --multi-domain apply ref.pot to each of the domains in def.po\n"));
245 -N, --no-fuzzy-matching do not use fuzzy matching\n"));
247 --use-fuzzy consider fuzzy entries\n"));
249 --use-untranslated consider untranslated entries\n"));
252 Input file syntax:\n"));
254 -P, --properties-input input files are in Java .properties syntax\n"));
256 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\
260 Informative output:\n"));
262 -h, --help display this help and exit\n"));
264 -V, --version output version information and exit\n"));
266 /* TRANSLATORS: The placeholder indicates the bug-reporting address
267 for this package. Please add _another line_ saying
268 "Report translation bugs to <...>\n" with the address for translation
269 bugs (typically your translation team's web or email address). */
270 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
277 /* Return true if a message should be kept. */
279 is_message_selected (const message_ty *mp)
281 /* Always keep the header entry. */
285 return !mp->obsolete;
289 /* Remove obsolete messages from a message list. Return the modified list. */
290 static msgdomain_list_ty *
291 remove_obsoletes (msgdomain_list_ty *mdlp)
295 for (k = 0; k < mdlp->nitems; k++)
296 message_list_remove_if_not (mdlp->item[k]->messages, is_message_selected);
303 match_domain (const char *fn1, const char *fn2,
304 message_list_ty *defmlp, message_fuzzy_index_ty **defmlp_findex,
305 const char *def_canon_charset,
306 message_list_ty *refmlp,
311 for (j = 0; j < refmlp->nitems; j++)
316 refmsg = refmlp->item[j];
318 /* See if it is in the other file. */
319 defmsg = message_list_search (defmlp, refmsg->msgctxt, refmsg->msgid);
322 if (!include_untranslated && defmsg->msgstr[0] == '\0')
325 po_gram_error_at_line (&defmsg->pos, _("\
326 this message is untranslated"));
328 else if (!include_fuzzies && defmsg->is_fuzzy && !is_header (defmsg))
331 po_gram_error_at_line (&defmsg->pos, _("\
332 this message needs to be reviewed by the translator"));
339 /* If the message was not defined at all, try to find a very
340 similar message, it could be a typo, or the suggestion may
343 if (use_fuzzy_matching)
347 /* Old, slow code. */
349 message_list_search_fuzzy (defmlp,
350 refmsg->msgctxt, refmsg->msgid);
354 /* Speedup through early abort in fstrcmp(), combined with
355 pre-sorting of the messages through a hashed index. */
356 /* Create the fuzzy index lazily. */
357 if (*defmlp_findex == NULL)
359 message_fuzzy_index_alloc (defmlp, def_canon_charset);
361 message_fuzzy_index_search (*defmlp_findex,
362 refmsg->msgctxt, refmsg->msgid,
363 FUZZY_THRESHOLD, false);
370 po_gram_error_at_line (&refmsg->pos, _("\
371 this message is used but not defined..."));
372 error_message_count--;
373 po_gram_error_at_line (&defmsg->pos, _("\
374 ...but this definition is similar"));
378 po_gram_error_at_line (&refmsg->pos, _("\
379 this message is used but not defined in %s"), fn1);
386 compare (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax)
388 msgdomain_list_ty *def;
389 msgdomain_list_ty *ref;
392 const char *def_canon_charset;
393 message_list_ty *empty_list;
395 /* This is the master file, created by a human. */
396 def = remove_obsoletes (read_catalog_file (fn1, input_syntax));
398 /* This is the generated file, created by groping the sources with
399 the xgettext program. */
400 ref = remove_obsoletes (read_catalog_file (fn2, input_syntax));
402 /* The references file can be either in ASCII or in UTF-8. If it is
403 in UTF-8, we have to convert the definitions to UTF-8 as well. */
405 bool was_utf8 = false;
406 for (k = 0; k < ref->nitems; k++)
408 message_list_ty *mlp = ref->item[k]->messages;
410 for (j = 0; j < mlp->nitems; j++)
411 if (is_header (mlp->item[j]) /* && !mlp->item[j]->obsolete */)
413 const char *header = mlp->item[j]->msgstr;
417 const char *charsetstr = c_strstr (header, "charset=");
419 if (charsetstr != NULL)
423 charsetstr += strlen ("charset=");
424 len = strcspn (charsetstr, " \t\n");
425 if (len == strlen ("UTF-8")
426 && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
433 def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
436 /* Determine canonicalized encoding name of the definitions now, after
437 conversion. Only used for fuzzy matching. */
438 if (use_fuzzy_matching)
440 def_canon_charset = def->encoding;
441 if (def_canon_charset == NULL)
443 char *charset = NULL;
445 /* Get the encoding of the definitions file. */
446 for (k = 0; k < def->nitems; k++)
448 message_list_ty *mlp = def->item[k]->messages;
450 for (j = 0; j < mlp->nitems; j++)
451 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
453 const char *header = mlp->item[j]->msgstr;
457 const char *charsetstr = c_strstr (header, "charset=");
459 if (charsetstr != NULL)
463 charsetstr += strlen ("charset=");
464 len = strcspn (charsetstr, " \t\n");
465 charset = (char *) xmalloca (len + 1);
466 memcpy (charset, charsetstr, len);
476 def_canon_charset = po_charset_canonicalize (charset);
477 if (def_canon_charset == NULL)
478 /* Unspecified encoding. Assume unibyte encoding. */
479 def_canon_charset = po_charset_ascii;
483 def_canon_charset = NULL;
485 empty_list = message_list_alloc (false);
487 /* Every entry in the xgettext generated file must be matched by a
488 (single) entry in the human created file. */
490 if (!multi_domain_mode)
491 for (k = 0; k < ref->nitems; k++)
493 const char *domain = ref->item[k]->domain;
494 message_list_ty *refmlp = ref->item[k]->messages;
495 message_list_ty *defmlp;
496 message_fuzzy_index_ty *defmlp_findex;
498 defmlp = msgdomain_list_sublist (def, domain, false);
502 defmlp_findex = NULL;
504 match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
507 if (defmlp_findex != NULL)
508 message_fuzzy_index_free (defmlp_findex);
512 /* Apply the references messages in the default domain to each of
513 the definition domains. */
514 message_list_ty *refmlp = ref->item[0]->messages;
516 for (k = 0; k < def->nitems; k++)
518 message_list_ty *defmlp = def->item[k]->messages;
520 /* Ignore the default message domain if it has no messages. */
521 if (k > 0 || defmlp->nitems > 0)
523 message_fuzzy_index_ty *defmlp_findex = NULL;
525 match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
528 if (defmlp_findex != NULL)
529 message_fuzzy_index_free (defmlp_findex);
534 /* Look for messages in the definition file, which are not present
535 in the reference file, indicating messages which defined but not
536 used in the program. */
537 for (k = 0; k < def->nitems; ++k)
539 message_list_ty *defmlp = def->item[k]->messages;
541 for (j = 0; j < defmlp->nitems; j++)
543 message_ty *defmsg = defmlp->item[j];
546 po_gram_error_at_line (&defmsg->pos,
547 _("warning: this message is not used"));
551 /* Exit with status 1 on any error. */
553 error (EXIT_FAILURE, 0,
554 ngettext ("found %d fatal error", "found %d fatal errors", nerrors),