1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2010 Free Software Foundation, Inc.
3 This file was written by Peter Miller <millerp@canb.auug.org.au>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
32 #include "error-progname.h"
34 #include "relocatable.h"
37 #include "read-catalog.h"
39 #include "read-properties.h"
40 #include "read-stringtable.h"
42 #include "po-charset.h"
43 #include "msgl-iconv.h"
44 #include "msgl-fsearch.h"
46 #include "c-strcase.h"
47 #include "propername.h"
50 #define _(str) gettext (str)
53 /* Apply the .pot file to each of the domains in the PO file. */
54 static bool multi_domain_mode = false;
56 /* Determines whether to use fuzzy matching. */
57 static bool use_fuzzy_matching = true;
59 /* Whether to consider fuzzy messages as translations. */
60 static bool include_fuzzies = false;
62 /* Whether to consider untranslated messages as translations. */
63 static bool include_untranslated = false;
66 static const struct option long_options[] =
68 { "directory", required_argument, NULL, 'D' },
69 { "help", no_argument, NULL, 'h' },
70 { "multi-domain", no_argument, NULL, 'm' },
71 { "no-fuzzy-matching", no_argument, NULL, 'N' },
72 { "properties-input", no_argument, NULL, 'P' },
73 { "stringtable-input", no_argument, NULL, CHAR_MAX + 1 },
74 { "use-fuzzy", no_argument, NULL, CHAR_MAX + 2 },
75 { "use-untranslated", no_argument, NULL, CHAR_MAX + 3 },
76 { "version", no_argument, NULL, 'V' },
81 /* Forward declaration of local functions. */
82 static void usage (int status)
83 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
84 __attribute__ ((noreturn))
87 static void compare (const char *fn1, const char *fn2,
88 catalog_input_format_ty input_syntax);
92 main (int argc, char *argv[])
97 catalog_input_format_ty input_syntax = &input_format_po;
99 /* Set program name for messages. */
100 set_program_name (argv[0]);
101 error_print_progname = maybe_print_progname;
102 gram_max_allowed_errors = UINT_MAX;
104 #ifdef HAVE_SETLOCALE
105 /* Set locale via LC_ALL. */
106 setlocale (LC_ALL, "");
109 /* Set the text message domain. */
110 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
111 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
112 textdomain (PACKAGE);
114 /* Ensure that write errors on stdout are detected. */
115 atexit (close_stdout);
119 while ((optchar = getopt_long (argc, argv, "D:hmNPV", long_options, NULL))
123 case '\0': /* long option */
127 dir_list_append (optarg);
135 multi_domain_mode = true;
139 use_fuzzy_matching = false;
143 input_syntax = &input_format_properties;
150 case CHAR_MAX + 1: /* --stringtable-input */
151 input_syntax = &input_format_stringtable;
154 case CHAR_MAX + 2: /* --use-fuzzy */
155 include_fuzzies = true;
158 case CHAR_MAX + 3: /* --use-untranslated */
159 include_untranslated = true;
163 usage (EXIT_FAILURE);
167 /* Version information is requested. */
170 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
171 /* xgettext: no-wrap */
172 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
173 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
174 This is free software: you are free to change and redistribute it.\n\
175 There is NO WARRANTY, to the extent permitted by law.\n\
177 "1995-1998, 2000-2010");
178 printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
182 /* Help is requested. */
184 usage (EXIT_SUCCESS);
186 /* Test whether we have an .po file name as argument. */
189 error (EXIT_SUCCESS, 0, _("no input files given"));
190 usage (EXIT_FAILURE);
192 if (optind + 2 != argc)
194 error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
195 usage (EXIT_FAILURE);
198 /* compare the two files */
199 compare (argv[optind], argv[optind + 1], input_syntax);
204 /* Display usage information and exit. */
208 if (status != EXIT_SUCCESS)
209 fprintf (stderr, _("Try `%s --help' for more information.\n"),
214 Usage: %s [OPTION] def.po ref.pot\n\
217 /* xgettext: no-wrap */
219 Compare two Uniforum style .po files to check that both contain the same\n\
220 set of msgid strings. The def.po file is an existing PO file with the\n\
221 translations. The ref.pot file is the last created PO file, or a PO Template\n\
222 file (generally created by xgettext). This is useful for checking that\n\
223 you have translated each and every message in your program. Where an exact\n\
224 match cannot be found, fuzzy matching is used to produce better diagnostics.\n\
228 Mandatory arguments to long options are mandatory for short options too.\n"));
231 Input file location:\n"));
233 def.po translations\n"));
235 ref.pot references to the sources\n"));
237 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
240 Operation modifiers:\n"));
242 -m, --multi-domain apply ref.pot to each of the domains in def.po\n"));
244 -N, --no-fuzzy-matching do not use fuzzy matching\n"));
246 --use-fuzzy consider fuzzy entries\n"));
248 --use-untranslated consider untranslated entries\n"));
251 Input file syntax:\n"));
253 -P, --properties-input input files are in Java .properties syntax\n"));
255 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\
259 Informative output:\n"));
261 -h, --help display this help and exit\n"));
263 -V, --version output version information and exit\n"));
265 /* TRANSLATORS: The placeholder indicates the bug-reporting address
266 for this package. Please add _another line_ saying
267 "Report translation bugs to <...>\n" with the address for translation
268 bugs (typically your translation team's web or email address). */
269 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"), stdout);
276 /* Return true if a message should be kept. */
278 is_message_selected (const message_ty *mp)
280 /* Always keep the header entry. */
284 return !mp->obsolete;
288 /* Remove obsolete messages from a message list. Return the modified list. */
289 static msgdomain_list_ty *
290 remove_obsoletes (msgdomain_list_ty *mdlp)
294 for (k = 0; k < mdlp->nitems; k++)
295 message_list_remove_if_not (mdlp->item[k]->messages, is_message_selected);
302 match_domain (const char *fn1, const char *fn2,
303 message_list_ty *defmlp, message_fuzzy_index_ty **defmlp_findex,
304 const char *def_canon_charset,
305 message_list_ty *refmlp,
310 for (j = 0; j < refmlp->nitems; j++)
315 refmsg = refmlp->item[j];
317 /* See if it is in the other file. */
318 defmsg = message_list_search (defmlp, refmsg->msgctxt, refmsg->msgid);
321 if (!include_untranslated && defmsg->msgstr[0] == '\0')
324 po_gram_error_at_line (&defmsg->pos, _("\
325 this message is untranslated"));
327 else if (!include_fuzzies && defmsg->is_fuzzy && !is_header (defmsg))
330 po_gram_error_at_line (&defmsg->pos, _("\
331 this message needs to be reviewed by the translator"));
338 /* If the message was not defined at all, try to find a very
339 similar message, it could be a typo, or the suggestion may
342 if (use_fuzzy_matching)
346 /* Old, slow code. */
348 message_list_search_fuzzy (defmlp,
349 refmsg->msgctxt, refmsg->msgid);
353 /* Speedup through early abort in fstrcmp(), combined with
354 pre-sorting of the messages through a hashed index. */
355 /* Create the fuzzy index lazily. */
356 if (*defmlp_findex == NULL)
358 message_fuzzy_index_alloc (defmlp, def_canon_charset);
360 message_fuzzy_index_search (*defmlp_findex,
361 refmsg->msgctxt, refmsg->msgid,
362 FUZZY_THRESHOLD, false);
369 po_gram_error_at_line (&refmsg->pos, _("\
370 this message is used but not defined..."));
371 error_message_count--;
372 po_gram_error_at_line (&defmsg->pos, _("\
373 ...but this definition is similar"));
377 po_gram_error_at_line (&refmsg->pos, _("\
378 this message is used but not defined in %s"), fn1);
385 compare (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax)
387 msgdomain_list_ty *def;
388 msgdomain_list_ty *ref;
391 const char *def_canon_charset;
392 message_list_ty *empty_list;
394 /* This is the master file, created by a human. */
395 def = remove_obsoletes (read_catalog_file (fn1, input_syntax));
397 /* This is the generated file, created by groping the sources with
398 the xgettext program. */
399 ref = remove_obsoletes (read_catalog_file (fn2, input_syntax));
401 /* The references file can be either in ASCII or in UTF-8. If it is
402 in UTF-8, we have to convert the definitions to UTF-8 as well. */
404 bool was_utf8 = false;
405 for (k = 0; k < ref->nitems; k++)
407 message_list_ty *mlp = ref->item[k]->messages;
409 for (j = 0; j < mlp->nitems; j++)
410 if (is_header (mlp->item[j]) /* && !mlp->item[j]->obsolete */)
412 const char *header = mlp->item[j]->msgstr;
416 const char *charsetstr = c_strstr (header, "charset=");
418 if (charsetstr != NULL)
422 charsetstr += strlen ("charset=");
423 len = strcspn (charsetstr, " \t\n");
424 if (len == strlen ("UTF-8")
425 && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
432 def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
435 /* Determine canonicalized encoding name of the definitions now, after
436 conversion. Only used for fuzzy matching. */
437 if (use_fuzzy_matching)
439 def_canon_charset = def->encoding;
440 if (def_canon_charset == NULL)
442 char *charset = NULL;
444 /* Get the encoding of the definitions file. */
445 for (k = 0; k < def->nitems; k++)
447 message_list_ty *mlp = def->item[k]->messages;
449 for (j = 0; j < mlp->nitems; j++)
450 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
452 const char *header = mlp->item[j]->msgstr;
456 const char *charsetstr = c_strstr (header, "charset=");
458 if (charsetstr != NULL)
462 charsetstr += strlen ("charset=");
463 len = strcspn (charsetstr, " \t\n");
464 charset = (char *) xmalloca (len + 1);
465 memcpy (charset, charsetstr, len);
475 def_canon_charset = po_charset_canonicalize (charset);
476 if (def_canon_charset == NULL)
477 /* Unspecified encoding. Assume unibyte encoding. */
478 def_canon_charset = po_charset_ascii;
482 def_canon_charset = NULL;
484 empty_list = message_list_alloc (false);
486 /* Every entry in the xgettext generated file must be matched by a
487 (single) entry in the human created file. */
489 if (!multi_domain_mode)
490 for (k = 0; k < ref->nitems; k++)
492 const char *domain = ref->item[k]->domain;
493 message_list_ty *refmlp = ref->item[k]->messages;
494 message_list_ty *defmlp;
495 message_fuzzy_index_ty *defmlp_findex;
497 defmlp = msgdomain_list_sublist (def, domain, false);
501 defmlp_findex = NULL;
503 match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
506 if (defmlp_findex != NULL)
507 message_fuzzy_index_free (defmlp_findex);
511 /* Apply the references messages in the default domain to each of
512 the definition domains. */
513 message_list_ty *refmlp = ref->item[0]->messages;
515 for (k = 0; k < def->nitems; k++)
517 message_list_ty *defmlp = def->item[k]->messages;
519 /* Ignore the default message domain if it has no messages. */
520 if (k > 0 || defmlp->nitems > 0)
522 message_fuzzy_index_ty *defmlp_findex = NULL;
524 match_domain (fn1, fn2, defmlp, &defmlp_findex, def_canon_charset,
527 if (defmlp_findex != NULL)
528 message_fuzzy_index_free (defmlp_findex);
533 /* Look for messages in the definition file, which are not present
534 in the reference file, indicating messages which defined but not
535 used in the program. */
536 for (k = 0; k < def->nitems; ++k)
538 message_list_ty *defmlp = def->item[k]->messages;
540 for (j = 0; j < defmlp->nitems; j++)
542 message_ty *defmsg = defmlp->item[j];
545 po_gram_error_at_line (&defmsg->pos,
546 _("warning: this message is not used"));
550 /* Exit with status 1 on any error. */
552 error (EXIT_FAILURE, 0,
553 ngettext ("found %d fatal error", "found %d fatal errors", nerrors),