1 /* GNU gettext - internationalization aids
2 Copyright (C) 1995-1998, 2000-2010, 2012 Free Software Foundation, Inc.
3 This file was written by Peter Miller <millerp@canb.auug.org.au>
5 This program is free software: you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 3 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program. If not, see <http://www.gnu.org/licenses/>. */
34 #include "error-progname.h"
36 #include "relocatable.h"
39 #include "read-catalog.h"
41 #include "read-properties.h"
42 #include "read-stringtable.h"
43 #include "write-catalog.h"
45 #include "write-properties.h"
46 #include "write-stringtable.h"
53 #include "c-strcase.h"
54 #include "po-charset.h"
55 #include "msgl-iconv.h"
56 #include "msgl-equal.h"
57 #include "msgl-fsearch.h"
58 #include "glthread/lock.h"
59 #include "lang-table.h"
60 #include "plural-exp.h"
61 #include "plural-count.h"
62 #include "msgl-check.h"
63 #include "po-xerror.h"
64 #include "backupfile.h"
65 #include "copy-file.h"
66 #include "propername.h"
69 #define _(str) gettext (str)
71 #define obstack_chunk_alloc xmalloc
72 #define obstack_chunk_free free
75 /* If true do not print unneeded messages. */
78 /* Verbosity level. */
79 static int verbosity_level;
81 /* Force output of PO file even if empty. */
84 /* Apply the .pot file to each of the domains in the PO file. */
85 static bool multi_domain_mode = false;
87 /* Determines whether to use fuzzy matching. */
88 static bool use_fuzzy_matching = true;
90 /* Determines whether to keep old msgids as previous msgids. */
91 static bool keep_previous = false;
93 /* Language (ISO-639 code) and optional territory (ISO-3166 code). */
94 static const char *catalogname = NULL;
96 /* List of user-specified compendiums. */
97 static message_list_list_ty *compendiums;
99 /* List of corresponding filenames. */
100 static string_list_ty *compendium_filenames;
103 static bool update_mode = false;
104 static const char *version_control_string;
105 static const char *backup_suffix_string;
108 static const struct option long_options[] =
110 { "add-location", no_argument, &line_comment, 1 },
111 { "backup", required_argument, NULL, CHAR_MAX + 1 },
112 { "color", optional_argument, NULL, CHAR_MAX + 9 },
113 { "compendium", required_argument, NULL, 'C', },
114 { "directory", required_argument, NULL, 'D' },
115 { "escape", no_argument, NULL, 'E' },
116 { "force-po", no_argument, &force_po, 1 },
117 { "help", no_argument, NULL, 'h' },
118 { "indent", no_argument, NULL, 'i' },
119 { "lang", required_argument, NULL, CHAR_MAX + 8 },
120 { "multi-domain", no_argument, NULL, 'm' },
121 { "no-escape", no_argument, NULL, 'e' },
122 { "no-fuzzy-matching", no_argument, NULL, 'N' },
123 { "no-location", no_argument, &line_comment, 0 },
124 { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
125 { "output-file", required_argument, NULL, 'o' },
126 { "previous", no_argument, NULL, CHAR_MAX + 7 },
127 { "properties-input", no_argument, NULL, 'P' },
128 { "properties-output", no_argument, NULL, 'p' },
129 { "quiet", no_argument, NULL, 'q' },
130 { "sort-by-file", no_argument, NULL, 'F' },
131 { "sort-output", no_argument, NULL, 's' },
132 { "silent", no_argument, NULL, 'q' },
133 { "strict", no_argument, NULL, CHAR_MAX + 2 },
134 { "stringtable-input", no_argument, NULL, CHAR_MAX + 5 },
135 { "stringtable-output", no_argument, NULL, CHAR_MAX + 6 },
136 { "style", required_argument, NULL, CHAR_MAX + 10 },
137 { "suffix", required_argument, NULL, CHAR_MAX + 3 },
138 { "update", no_argument, NULL, 'U' },
139 { "verbose", no_argument, NULL, 'v' },
140 { "version", no_argument, NULL, 'V' },
141 { "width", required_argument, NULL, 'w', },
155 /* Forward declaration of local functions. */
156 static void usage (int status)
157 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ >= 5) || __GNUC__ > 2)
158 __attribute__ ((noreturn))
161 static void compendium (const char *filename);
162 static void msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp);
163 static msgdomain_list_ty *merge (const char *fn1, const char *fn2,
164 catalog_input_format_ty input_syntax,
165 msgdomain_list_ty **defp);
169 main (int argc, char **argv)
175 msgdomain_list_ty *def;
176 msgdomain_list_ty *result;
177 catalog_input_format_ty input_syntax = &input_format_po;
178 catalog_output_format_ty output_syntax = &output_format_po;
179 bool sort_by_filepos = false;
180 bool sort_by_msgid = false;
182 /* Set program name for messages. */
183 set_program_name (argv[0]);
184 error_print_progname = maybe_print_progname;
187 gram_max_allowed_errors = UINT_MAX;
189 #ifdef HAVE_SETLOCALE
190 /* Set locale via LC_ALL. */
191 setlocale (LC_ALL, "");
194 /* Set the text message domain. */
195 bindtextdomain (PACKAGE, relocate (LOCALEDIR));
196 bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
197 textdomain (PACKAGE);
199 /* Ensure that write errors on stdout are detected. */
200 atexit (close_stdout);
202 /* Set default values for variables. */
207 while ((opt = getopt_long (argc, argv, "C:D:eEFhimNo:pPqsUvVw:",
212 case '\0': /* Long option. */
220 dir_list_append (optarg);
224 message_print_style_escape (false);
228 message_print_style_escape (true);
232 sort_by_filepos = true;
240 message_print_style_indent ();
244 multi_domain_mode = true;
248 use_fuzzy_matching = false;
252 output_file = optarg;
256 output_syntax = &output_format_properties;
260 input_syntax = &input_format_properties;
268 sort_by_msgid = true;
287 value = strtol (optarg, &endp, 10);
289 message_page_width_set (value);
293 case CHAR_MAX + 1: /* --backup */
294 version_control_string = optarg;
297 case CHAR_MAX + 2: /* --strict */
298 message_print_style_uniforum ();
301 case CHAR_MAX + 3: /* --suffix */
302 backup_suffix_string = optarg;
305 case CHAR_MAX + 4: /* --no-wrap */
306 message_page_width_ignore ();
309 case CHAR_MAX + 5: /* --stringtable-input */
310 input_syntax = &input_format_stringtable;
313 case CHAR_MAX + 6: /* --stringtable-output */
314 output_syntax = &output_format_stringtable;
317 case CHAR_MAX + 7: /* --previous */
318 keep_previous = true;
321 case CHAR_MAX + 8: /* --lang */
322 catalogname = optarg;
325 case CHAR_MAX + 9: /* --color */
326 if (handle_color_option (optarg) || color_test_mode)
327 usage (EXIT_FAILURE);
330 case CHAR_MAX + 10: /* --style */
331 handle_style_option (optarg);
335 usage (EXIT_FAILURE);
339 /* Version information is requested. */
342 printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
343 /* xgettext: no-wrap */
344 printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
345 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
346 This is free software: you are free to change and redistribute it.\n\
347 There is NO WARRANTY, to the extent permitted by law.\n\
349 "1995-1998, 2000-2010");
350 printf (_("Written by %s.\n"), proper_name ("Peter Miller"));
354 /* Help is requested. */
356 usage (EXIT_SUCCESS);
358 /* Test whether we have an .po file name as argument. */
361 error (EXIT_SUCCESS, 0, _("no input files given"));
362 usage (EXIT_FAILURE);
364 if (optind + 2 != argc)
366 error (EXIT_SUCCESS, 0, _("exactly 2 input files required"));
367 usage (EXIT_FAILURE);
370 /* Verify selected options. */
373 if (output_file != NULL)
375 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
376 "--update", "--output-file");
381 if (version_control_string != NULL)
383 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
384 "--backup", "--update");
385 usage (EXIT_FAILURE);
387 if (backup_suffix_string != NULL)
389 error (EXIT_SUCCESS, 0, _("%s is only valid with %s"),
390 "--suffix", "--update");
391 usage (EXIT_FAILURE);
395 if (!line_comment && sort_by_filepos)
396 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
397 "--no-location", "--sort-by-file");
399 if (sort_by_msgid && sort_by_filepos)
400 error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
401 "--sort-output", "--sort-by-file");
403 /* In update mode, --properties-input implies --properties-output. */
404 if (update_mode && input_syntax == &input_format_properties)
405 output_syntax = &output_format_properties;
406 /* In update mode, --stringtable-input implies --stringtable-output. */
407 if (update_mode && input_syntax == &input_format_stringtable)
408 output_syntax = &output_format_stringtable;
410 /* Merge the two files. */
411 result = merge (argv[optind], argv[optind + 1], input_syntax, &def);
413 /* Sort the results. */
415 msgdomain_list_sort_by_filepos (result);
416 else if (sort_by_msgid)
417 msgdomain_list_sort_by_msgid (result);
421 /* Before comparing result with def, sort the result into the same order
422 as would be done implicitly by output_syntax->print. */
423 if (output_syntax->sorts_obsoletes_to_end)
424 msgdomain_list_stablesort_by_obsolete (result);
426 /* Do nothing if the original file and the result are equal. Also do
427 nothing if the original file and the result differ only by the
428 POT-Creation-Date in the header entry; this is needed for projects
429 which don't put the .pot file under CVS. */
430 if (!msgdomain_list_equal (def, result, true))
432 /* Back up def.po. */
433 enum backup_type backup_type;
436 output_file = argv[optind];
438 if (backup_suffix_string == NULL)
440 backup_suffix_string = getenv ("SIMPLE_BACKUP_SUFFIX");
441 if (backup_suffix_string != NULL
442 && backup_suffix_string[0] == '\0')
443 backup_suffix_string = NULL;
445 if (backup_suffix_string != NULL)
446 simple_backup_suffix = backup_suffix_string;
448 backup_type = xget_version (_("backup type"), version_control_string);
449 if (backup_type != none)
451 backup_file = find_backup_file_name (output_file, backup_type);
452 copy_file_preserving (output_file, backup_file);
455 /* Write the merged message list out. */
456 msgdomain_list_print (result, output_file, output_syntax, true,
462 /* Write the merged message list out. */
463 msgdomain_list_print (result, output_file, output_syntax, force_po,
471 /* Display usage information and exit. */
475 if (status != EXIT_SUCCESS)
476 fprintf (stderr, _("Try '%s --help' for more information.\n"),
481 Usage: %s [OPTION] def.po ref.pot\n\
484 /* xgettext: no-wrap */
486 Merges two Uniforum style .po files together. The def.po file is an\n\
487 existing PO file with translations which will be taken over to the newly\n\
488 created file as long as they still match; comments will be preserved,\n\
489 but extracted comments and file positions will be discarded. The ref.pot\n\
490 file is the last created PO file with up-to-date source references but\n\
491 old translations, or a PO Template file (generally created by xgettext);\n\
492 any translations or comments in the file will be discarded, however dot\n\
493 comments and file positions will be preserved. Where an exact match\n\
494 cannot be found, fuzzy matching is used to produce better results.\n\
498 Mandatory arguments to long options are mandatory for short options too.\n"));
501 Input file location:\n"));
503 def.po translations referring to old sources\n"));
505 ref.pot references to new sources\n"));
507 -D, --directory=DIRECTORY add DIRECTORY to list for input files search\n"));
509 -C, --compendium=FILE additional library of message translations,\n\
510 may be specified more than once\n"));
513 Operation mode:\n"));
515 -U, --update update def.po,\n\
516 do nothing if def.po already up to date\n"));
519 Output file location:\n"));
521 -o, --output-file=FILE write output to specified file\n"));
523 The results are written to standard output if no output file is specified\n\
527 Output file location in update mode:\n"));
529 The result is written back to def.po.\n"));
531 --backup=CONTROL make a backup of def.po\n"));
533 --suffix=SUFFIX override the usual backup suffix\n"));
535 The version control method may be selected via the --backup option or through\n\
536 the VERSION_CONTROL environment variable. Here are the values:\n\
537 none, off never make backups (even if --backup is given)\n\
538 numbered, t make numbered backups\n\
539 existing, nil numbered if numbered backups exist, simple otherwise\n\
540 simple, never always make simple backups\n"));
542 The backup suffix is '~', unless set with --suffix or the SIMPLE_BACKUP_SUFFIX\n\
543 environment variable.\n\
547 Operation modifiers:\n"));
549 -m, --multi-domain apply ref.pot to each of the domains in def.po\n"));
551 -N, --no-fuzzy-matching do not use fuzzy matching\n"));
553 --previous keep previous msgids of translated messages\n"));
556 Input file syntax:\n"));
558 -P, --properties-input input files are in Java .properties syntax\n"));
560 --stringtable-input input files are in NeXTstep/GNUstep .strings\n\
564 Output details:\n"));
566 --lang=CATALOGNAME set 'Language' field in the header entry\n"));
568 --color use colors and other text attributes always\n\
569 --color=WHEN use colors and other text attributes if WHEN.\n\
570 WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
572 --style=STYLEFILE specify CSS style rule file for --color\n"));
574 -e, --no-escape do not use C escapes in output (default)\n"));
576 -E, --escape use C escapes in output, no extended chars\n"));
578 --force-po write PO file even if empty\n"));
580 -i, --indent indented output style\n"));
582 --no-location suppress '#: filename:line' lines\n"));
584 --add-location preserve '#: filename:line' lines (default)\n"));
586 --strict strict Uniforum output style\n"));
588 -p, --properties-output write out a Java .properties file\n"));
590 --stringtable-output write out a NeXTstep/GNUstep .strings file\n"));
592 -w, --width=NUMBER set output page width\n"));
594 --no-wrap do not break long message lines, longer than\n\
595 the output page width, into several lines\n"));
597 -s, --sort-output generate sorted output\n"));
599 -F, --sort-by-file sort output by file location\n"));
602 Informative output:\n"));
604 -h, --help display this help and exit\n"));
606 -V, --version output version information and exit\n"));
608 -v, --verbose increase verbosity level\n"));
610 -q, --quiet, --silent suppress progress indicators\n"));
612 /* TRANSLATORS: The placeholder indicates the bug-reporting address
613 for this package. Please add _another line_ saying
614 "Report translation bugs to <...>\n" with the address for translation
615 bugs (typically your translation team's web or email address). */
616 fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
625 compendium (const char *filename)
627 msgdomain_list_ty *mdlp;
630 mdlp = read_catalog_file (filename, &input_format_po);
631 if (compendiums == NULL)
633 compendiums = message_list_list_alloc ();
634 compendium_filenames = string_list_alloc ();
636 for (k = 0; k < mdlp->nitems; k++)
638 message_list_list_append (compendiums, mdlp->item[k]->messages);
639 string_list_append (compendium_filenames, filename);
644 /* Sorts obsolete messages to the end, for every domain. */
646 msgdomain_list_stablesort_by_obsolete (msgdomain_list_ty *mdlp)
650 for (k = 0; k < mdlp->nitems; k++)
652 message_list_ty *mlp = mdlp->item[k]->messages;
654 /* Sort obsolete messages to the end. */
657 message_ty **l1 = XNMALLOC (mlp->nitems, message_ty *);
659 message_ty **l2 = XNMALLOC (mlp->nitems, message_ty *);
663 /* Sort the non-obsolete messages into l1 and the obsolete messages
667 for (j = 0; j < mlp->nitems; j++)
669 message_ty *mp = mlp->item[j];
676 if (n1 > 0 && n2 > 0)
678 memcpy (mlp->item, l1, n1 * sizeof (message_ty *));
679 memcpy (mlp->item + n1, l2, n2 * sizeof (message_ty *));
688 /* Data structure representing the messages with known translations.
690 - A message list from def.po,
692 The data structure is optimized for exact and fuzzy searches. */
693 typedef struct definitions_ty definitions_ty;
694 struct definitions_ty
696 /* A list of message lists. The first comes from def.po, the other ones
697 from the compendiums. Each message list has a built-in hash table,
698 for speed when doing the exact searches. */
699 message_list_list_ty *lists;
701 /* A fuzzy index of the current list of non-compendium messages, for speed
702 when doing fuzzy searches. Used only if use_fuzzy_matching is true. */
703 message_fuzzy_index_ty *curr_findex;
704 /* A once-only execution guard for the initialization of the fuzzy index.
705 Needed for OpenMP. */
706 gl_lock_define(, curr_findex_init_lock)
708 /* A fuzzy index of the compendiums, for speed when doing fuzzy searches.
709 Used only if use_fuzzy_matching is true and compendiums != NULL. */
710 message_fuzzy_index_ty *comp_findex;
711 /* A once-only execution guard for the initialization of the fuzzy index.
712 Needed for OpenMP. */
713 gl_lock_define(, comp_findex_init_lock)
715 /* The canonical encoding of the definitions and the compendiums.
716 Only used for fuzzy matching. */
717 const char *canon_charset;
721 definitions_init (definitions_ty *definitions, const char *canon_charset)
723 definitions->lists = message_list_list_alloc ();
724 message_list_list_append (definitions->lists, NULL);
725 if (compendiums != NULL)
726 message_list_list_append_list (definitions->lists, compendiums);
727 definitions->curr_findex = NULL;
728 gl_lock_init (definitions->curr_findex_init_lock);
729 definitions->comp_findex = NULL;
730 gl_lock_init (definitions->comp_findex_init_lock);
731 definitions->canon_charset = canon_charset;
734 /* Return the current list of non-compendium messages. */
735 static inline message_list_ty *
736 definitions_current_list (const definitions_ty *definitions)
738 return definitions->lists->item[0];
741 /* Set the current list of non-compendium messages. */
743 definitions_set_current_list (definitions_ty *definitions, message_list_ty *mlp)
745 definitions->lists->item[0] = mlp;
746 if (definitions->curr_findex != NULL)
748 message_fuzzy_index_free (definitions->curr_findex);
749 definitions->curr_findex = NULL;
753 /* Create the fuzzy index for the current list of non-compendium messages.
754 Used only if use_fuzzy_matching is true. */
756 definitions_init_curr_findex (definitions_ty *definitions)
758 /* Protect against concurrent execution. */
759 gl_lock_lock (definitions->curr_findex_init_lock);
760 if (definitions->curr_findex == NULL)
761 definitions->curr_findex =
762 message_fuzzy_index_alloc (definitions_current_list (definitions),
763 definitions->canon_charset);
764 gl_lock_unlock (definitions->curr_findex_init_lock);
767 /* Create the fuzzy index for the compendium messages.
768 Used only if use_fuzzy_matching is true and compendiums != NULL. */
770 definitions_init_comp_findex (definitions_ty *definitions)
772 /* Protect against concurrent execution. */
773 gl_lock_lock (definitions->comp_findex_init_lock);
774 if (definitions->comp_findex == NULL)
776 /* Combine all the compendium message lists into a single one. Don't
777 bother checking for duplicates. */
778 message_list_ty *all_compendium;
781 all_compendium = message_list_alloc (false);
782 for (i = 0; i < compendiums->nitems; i++)
784 message_list_ty *mlp = compendiums->item[i];
787 for (j = 0; j < mlp->nitems; j++)
788 message_list_append (all_compendium, mlp->item[j]);
791 /* Create the fuzzy index from it. */
792 definitions->comp_findex =
793 message_fuzzy_index_alloc (all_compendium, definitions->canon_charset);
795 gl_lock_unlock (definitions->comp_findex_init_lock);
799 static inline message_ty *
800 definitions_search (const definitions_ty *definitions,
801 const char *msgctxt, const char *msgid)
803 return message_list_list_search (definitions->lists, msgctxt, msgid);
807 Used only if use_fuzzy_matching is true. */
808 static inline message_ty *
809 definitions_search_fuzzy (definitions_ty *definitions,
810 const char *msgctxt, const char *msgid)
816 /* Old, slow code. */
818 message_list_search_fuzzy (definitions_current_list (definitions),
823 /* Speedup through early abort in fstrcmp(), combined with pre-sorting
824 of the messages through a hashed index. */
825 /* Create the fuzzy index lazily. */
826 if (definitions->curr_findex == NULL)
827 definitions_init_curr_findex (definitions);
828 mp1 = message_fuzzy_index_search (definitions->curr_findex,
830 FUZZY_THRESHOLD, false);
833 if (compendiums != NULL)
835 double lower_bound_for_mp2;
838 lower_bound_for_mp2 =
840 ? fuzzy_search_goal_function (mp1, msgctxt, msgid, 0.0)
842 /* This lower bound must be >= FUZZY_THRESHOLD. */
843 if (!(lower_bound_for_mp2 >= FUZZY_THRESHOLD))
846 /* Create the fuzzy index lazily. */
847 if (definitions->comp_findex == NULL)
848 definitions_init_comp_findex (definitions);
850 mp2 = message_fuzzy_index_search (definitions->comp_findex,
852 lower_bound_for_mp2, true);
854 /* Choose the best among mp1, mp2. */
857 && (fuzzy_search_goal_function (mp2, msgctxt, msgid,
859 > lower_bound_for_mp2)))
867 definitions_destroy (definitions_ty *definitions)
869 message_list_list_free (definitions->lists, 2);
870 if (definitions->curr_findex != NULL)
871 message_fuzzy_index_free (definitions->curr_findex);
872 if (definitions->comp_findex != NULL)
873 message_fuzzy_index_free (definitions->comp_findex);
877 /* A silent error logger. We are only interested in knowing whether errors
880 silent_error_logger (const char *format, ...)
881 __attribute__ ((__format__ (__printf__, 1, 2)));
883 silent_error_logger (const char *format, ...)
888 /* Another silent error logger. */
890 silent_xerror (int severity,
891 const struct message_ty *message,
892 const char *filename, size_t lineno, size_t column,
893 int multiline_p, const char *message_text)
899 message_merge (message_ty *def, message_ty *ref, bool force_fuzzy,
900 const struct plural_distribution *distribution)
904 const char *prev_msgctxt;
905 const char *prev_msgid;
906 const char *prev_msgid_plural;
910 /* Take the msgid from the reference. When fuzzy matches are made,
911 the definition will not be unique, but the reference will be -
912 usually because it has only been slightly changed. */
914 /* Take the msgstr from the definition. The msgstr of the reference
915 is usually empty, as it was generated by xgettext. If we currently
916 process the header entry we have to merge the msgstr by using the
917 Report-Msgid-Bugs-To and POT-Creation-Date fields from the reference. */
920 /* Oh, oh. The header entry and we have something to fill in. */
927 { "Project-Id-Version:", sizeof ("Project-Id-Version:") - 1 },
929 { "Report-Msgid-Bugs-To:", sizeof ("Report-Msgid-Bugs-To:") - 1 },
930 #define REPORT_MSGID_BUGS_TO 1
931 { "POT-Creation-Date:", sizeof ("POT-Creation-Date:") - 1 },
932 #define POT_CREATION_DATE 2
933 { "PO-Revision-Date:", sizeof ("PO-Revision-Date:") - 1 },
934 #define PO_REVISION_DATE 3
935 { "Last-Translator:", sizeof ("Last-Translator:") - 1 },
936 #define LAST_TRANSLATOR 4
937 { "Language-Team:", sizeof ("Language-Team:") - 1 },
938 #define LANGUAGE_TEAM 5
939 { "Language:", sizeof ("Language:") - 1 },
941 { "MIME-Version:", sizeof ("MIME-Version:") - 1 },
942 #define MIME_VERSION 7
943 { "Content-Type:", sizeof ("Content-Type:") - 1 },
944 #define CONTENT_TYPE 8
945 { "Content-Transfer-Encoding:",
946 sizeof ("Content-Transfer-Encoding:") - 1 }
947 #define CONTENT_TRANSFER 9
954 } header_fields[UNKNOWN + 1];
960 /* Clear all fields. */
961 memset (header_fields, '\0', sizeof (header_fields));
963 /* Prepare a temporary memory pool. */
964 obstack_init (&pool);
969 const char *endp = strchr (cp, '\n');
970 int terminated = endp != NULL;
974 /* Add a trailing newline. */
976 endp = strchr (cp, '\0');
980 copy = (char *) obstack_alloc (&pool, len + 1);
981 stpcpy (stpcpy (copy, cp), "\n");
986 len = (endp - cp) + 1;
990 /* Compare with any of the known fields. */
992 cnt < sizeof (known_fields) / sizeof (known_fields[0]);
994 if (c_strncasecmp (cp, known_fields[cnt].name, known_fields[cnt].len)
998 if (cnt < sizeof (known_fields) / sizeof (known_fields[0]))
1000 header_fields[cnt].string = &cp[known_fields[cnt].len];
1001 header_fields[cnt].len = len - known_fields[cnt].len;
1005 /* It's an unknown field. Append content to what is already
1008 (char *) obstack_alloc (&pool,
1009 header_fields[UNKNOWN].len + len + 1);
1010 memcpy (extended, header_fields[UNKNOWN].string,
1011 header_fields[UNKNOWN].len);
1012 memcpy (&extended[header_fields[UNKNOWN].len], cp, len);
1013 extended[header_fields[UNKNOWN].len + len] = '\0';
1014 header_fields[UNKNOWN].string = extended;
1015 header_fields[UNKNOWN].len += len;
1021 /* Set the Language field if specified on the command line. */
1022 if (catalogname != NULL)
1024 /* Prepend a space and append a newline. */
1025 size_t len = strlen (catalogname);
1026 char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
1027 stpcpy (stpcpy (stpcpy (copy, " "), catalogname), "\n");
1028 header_fields[LANGUAGE].string = copy;
1029 header_fields[LANGUAGE].len = strlen (header_fields[LANGUAGE].string);
1031 /* Add a Language field to PO files that don't have one. The Language
1032 field was introduced in gettext-0.18. */
1033 else if (header_fields[LANGUAGE].string == NULL)
1035 const char *language_team_ptr = header_fields[LANGUAGE_TEAM].string;
1037 if (language_team_ptr != NULL)
1039 size_t language_team_len = header_fields[LANGUAGE_TEAM].len;
1041 /* Trim leading blanks. */
1042 while (language_team_len > 0
1043 && (*language_team_ptr == ' '
1044 || *language_team_ptr == '\t'))
1046 language_team_ptr++;
1047 language_team_len--;
1050 /* Trim trailing blanks. */
1051 while (language_team_len > 0
1052 && (language_team_ptr[language_team_len - 1] == ' '
1053 || language_team_ptr[language_team_len - 1] == '\t'))
1054 language_team_len--;
1056 /* Trim last word, if it looks like an URL or email address. */
1060 for (i = language_team_len; i > 0; i--)
1061 if (language_team_ptr[i - 1] == ' '
1062 || language_team_ptr[i - 1] == '\t')
1064 /* The last word: language_team_ptr[i..language_team_len-1]. */
1065 if (i < language_team_len
1066 && (language_team_ptr[i] == '<'
1067 || language_team_ptr[language_team_len - 1] == '>'
1068 || memchr (language_team_ptr, '@', language_team_len)
1070 || memchr (language_team_ptr, '/', language_team_len)
1073 /* Trim last word and blanks before it. */
1075 && (language_team_ptr[i - 1] == ' '
1076 || language_team_ptr[i - 1] == '\t'))
1078 language_team_len = i;
1082 /* The rest of the Language-Team field should be the english name
1083 of the languge. Convert to ISO 639 and ISO 3166 syntax. */
1087 for (i = 0; i < language_variant_table_size; i++)
1088 if (strlen (language_variant_table[i].english)
1089 == language_team_len
1090 && memcmp (language_variant_table[i].english,
1091 language_team_ptr, language_team_len) == 0)
1093 header_fields[LANGUAGE].string =
1094 language_variant_table[i].code;
1098 if (header_fields[LANGUAGE].string == NULL)
1102 for (i = 0; i < language_table_size; i++)
1103 if (strlen (language_table[i].english) == language_team_len
1104 && memcmp (language_table[i].english,
1105 language_team_ptr, language_team_len) == 0)
1107 header_fields[LANGUAGE].string = language_table[i].code;
1111 if (header_fields[LANGUAGE].string != NULL)
1113 /* Prepend a space and append a newline. */
1114 const char *str = header_fields[LANGUAGE].string;
1115 size_t len = strlen (str);
1116 char *copy = (char *) obstack_alloc (&pool, 1 + len + 1 + 1);
1117 stpcpy (stpcpy (stpcpy (copy, " "), str), "\n");
1118 header_fields[LANGUAGE].string = copy;
1121 header_fields[LANGUAGE].string = " \n";
1122 header_fields[LANGUAGE].len =
1123 strlen (header_fields[LANGUAGE].string);
1128 const char *msgid_bugs_ptr;
1130 msgid_bugs_ptr = c_strstr (ref->msgstr, "Report-Msgid-Bugs-To:");
1131 if (msgid_bugs_ptr != NULL)
1133 size_t msgid_bugs_len;
1136 msgid_bugs_ptr += sizeof ("Report-Msgid-Bugs-To:") - 1;
1138 endp = strchr (msgid_bugs_ptr, '\n');
1141 /* Add a trailing newline. */
1143 endp = strchr (msgid_bugs_ptr, '\0');
1144 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
1145 extended = (char *) obstack_alloc (&pool, msgid_bugs_len + 1);
1146 stpcpy (stpcpy (extended, msgid_bugs_ptr), "\n");
1147 msgid_bugs_ptr = extended;
1150 msgid_bugs_len = (endp - msgid_bugs_ptr) + 1;
1152 header_fields[REPORT_MSGID_BUGS_TO].string = msgid_bugs_ptr;
1153 header_fields[REPORT_MSGID_BUGS_TO].len = msgid_bugs_len;
1158 const char *pot_date_ptr;
1160 pot_date_ptr = c_strstr (ref->msgstr, "POT-Creation-Date:");
1161 if (pot_date_ptr != NULL)
1163 size_t pot_date_len;
1166 pot_date_ptr += sizeof ("POT-Creation-Date:") - 1;
1168 endp = strchr (pot_date_ptr, '\n');
1171 /* Add a trailing newline. */
1173 endp = strchr (pot_date_ptr, '\0');
1174 pot_date_len = (endp - pot_date_ptr) + 1;
1175 extended = (char *) obstack_alloc (&pool, pot_date_len + 1);
1176 stpcpy (stpcpy (extended, pot_date_ptr), "\n");
1177 pot_date_ptr = extended;
1180 pot_date_len = (endp - pot_date_ptr) + 1;
1182 header_fields[POT_CREATION_DATE].string = pot_date_ptr;
1183 header_fields[POT_CREATION_DATE].len = pot_date_len;
1187 /* Concatenate all the various fields. */
1189 for (cnt = 0; cnt < UNKNOWN; ++cnt)
1190 if (header_fields[cnt].string != NULL)
1191 len += known_fields[cnt].len + header_fields[cnt].len;
1192 len += header_fields[UNKNOWN].len;
1194 cp = newp = XNMALLOC (len + 1, char);
1197 #define IF_FILLED(idx) \
1198 if (header_fields[idx].string) \
1199 newp = stpncpy (stpcpy (newp, known_fields[idx].name), \
1200 header_fields[idx].string, header_fields[idx].len)
1202 IF_FILLED (PROJECT_ID);
1203 IF_FILLED (REPORT_MSGID_BUGS_TO);
1204 IF_FILLED (POT_CREATION_DATE);
1205 IF_FILLED (PO_REVISION_DATE);
1206 IF_FILLED (LAST_TRANSLATOR);
1207 IF_FILLED (LANGUAGE_TEAM);
1208 IF_FILLED (LANGUAGE);
1209 IF_FILLED (MIME_VERSION);
1210 IF_FILLED (CONTENT_TYPE);
1211 IF_FILLED (CONTENT_TRANSFER);
1212 if (header_fields[UNKNOWN].string != NULL)
1213 stpcpy (newp, header_fields[UNKNOWN].string);
1217 /* Free the temporary memory pool. */
1218 obstack_free (&pool, NULL);
1221 msgstr_len = strlen (cp) + 1;
1223 prev_msgctxt = NULL;
1225 prev_msgid_plural = NULL;
1229 msgstr = def->msgstr;
1230 msgstr_len = def->msgstr_len;
1234 prev_msgctxt = def->prev_msgctxt;
1235 prev_msgid = def->prev_msgid;
1236 prev_msgid_plural = def->prev_msgid_plural;
1240 prev_msgctxt = def->msgctxt;
1241 prev_msgid = def->msgid;
1242 prev_msgid_plural = def->msgid_plural;
1246 result = message_alloc (ref->msgctxt != NULL ? xstrdup (ref->msgctxt) : NULL,
1247 xstrdup (ref->msgid), ref->msgid_plural,
1248 msgstr, msgstr_len, &def->pos);
1250 /* Take the comments from the definition file. There will be none at
1251 all in the reference file, as it was generated by xgettext. */
1253 for (j = 0; j < def->comment->nitems; ++j)
1254 message_comment_append (result, def->comment->item[j]);
1256 /* Take the dot comments from the reference file, as they are
1257 generated by xgettext. Any in the definition file are old ones
1258 collected by previous runs of xgettext and msgmerge. */
1259 if (ref->comment_dot)
1260 for (j = 0; j < ref->comment_dot->nitems; ++j)
1261 message_comment_dot_append (result, ref->comment_dot->item[j]);
1263 /* The flags are mixed in a special way. Some informations come
1264 from the reference message (such as format/no-format), others
1265 come from the definition file (fuzzy or not). */
1266 result->is_fuzzy = def->is_fuzzy | force_fuzzy;
1268 /* If ref and def have the same msgid but different msgid_plural, it's
1269 a reason to mark the result fuzzy. */
1270 if (!result->is_fuzzy
1271 && (ref->msgid_plural != NULL
1272 ? def->msgid_plural == NULL
1273 || strcmp (ref->msgid_plural, def->msgid_plural) != 0
1274 : def->msgid_plural != NULL))
1275 result->is_fuzzy = true;
1277 for (i = 0; i < NFORMATS; i++)
1279 result->is_format[i] = ref->is_format[i];
1281 /* If the reference message is marked as being a format specifier,
1282 but the definition message is not, we check if the resulting
1283 message would pass "msgfmt -c". If yes, then all is fine. If
1284 not, we add a fuzzy marker, because
1285 1. the message needs the translator's attention,
1286 2. msgmerge must not transform a PO file which passes "msgfmt -c"
1287 into a PO file which doesn't. */
1288 if (!result->is_fuzzy
1289 && possible_format_p (ref->is_format[i])
1290 && !possible_format_p (def->is_format[i])
1291 && check_msgid_msgstr_format_i (ref->msgid, ref->msgid_plural,
1292 msgstr, msgstr_len, i, ref->range,
1293 distribution, silent_error_logger)
1295 result->is_fuzzy = true;
1298 result->range = ref->range;
1299 /* If the definition message was assuming a certain range, but the reference
1300 message does not specify a range any more or specifies a range that is
1301 not the same or a subset, we add a fuzzy marker, because
1302 1. the message needs the translator's attention,
1303 2. msgmerge must not transform a PO file which passes "msgfmt -c"
1304 into a PO file which doesn't. */
1305 if (!result->is_fuzzy
1306 && has_range_p (def->range)
1307 && !(has_range_p (ref->range)
1308 && ref->range.min >= def->range.min
1309 && ref->range.max <= def->range.max))
1310 result->is_fuzzy = true;
1312 result->do_wrap = ref->do_wrap;
1314 /* Insert previous msgid, commented out with "#|".
1315 Do so only when --previous is specified, for backward compatibility.
1316 Since the "previous msgid" represents the original msgid that led to
1318 - we can omit it if the resulting message is not fuzzy or is
1319 untranslated (but do this in a later pass, since result->is_fuzzy
1320 is not finalized at this point),
1321 - otherwise, if the corresponding message from the definition file
1322 was translated (not fuzzy), we use that message's msgid,
1323 - otherwise, we use that message's prev_msgid. */
1326 result->prev_msgctxt = prev_msgctxt;
1327 result->prev_msgid = prev_msgid;
1328 result->prev_msgid_plural = prev_msgid_plural;
1331 /* If the reference message was obsolete, make the resulting message
1332 obsolete. This case doesn't occur for POT files, but users sometimes
1333 use PO files that are themselves the result of msgmerge instead of POT
1335 result->obsolete = ref->obsolete;
1337 /* Take the file position comments from the reference file, as they
1338 are generated by xgettext. Any in the definition file are old ones
1339 collected by previous runs of xgettext and msgmerge. */
1340 for (j = 0; j < ref->filepos_count; ++j)
1342 lex_pos_ty *pp = &ref->filepos[j];
1343 message_comment_filepos (result, pp->file_name, pp->line_number);
1346 /* Special postprocessing is needed if the reference message is a
1347 plural form and the definition message isn't, or vice versa. */
1348 if (ref->msgid_plural != NULL)
1350 if (def->msgid_plural == NULL)
1355 if (def->msgid_plural != NULL)
1359 /* All done, return the merged message to the caller. */
1364 #define DOT_FREQUENCY 10
1367 match_domain (const char *fn1, const char *fn2,
1368 definitions_ty *definitions, message_list_ty *refmlp,
1369 message_list_ty *resultmlp,
1370 struct statistics *stats, unsigned int *processed)
1372 message_ty *header_entry;
1373 unsigned long int nplurals;
1374 const struct expression *plural_expr;
1375 char *untranslated_plural_msgstr;
1376 struct plural_distribution distribution;
1377 struct search_result { message_ty *found; bool fuzzy; } *search_results;
1381 message_list_search (definitions_current_list (definitions), NULL, "");
1382 extract_plural_expression (header_entry ? header_entry->msgstr : NULL,
1383 &plural_expr, &nplurals);
1384 untranslated_plural_msgstr = XNMALLOC (nplurals, char);
1385 memset (untranslated_plural_msgstr, '\0', nplurals);
1387 /* Determine the plural distribution of the plural_expr formula. */
1389 /* Disable error output temporarily. */
1390 void (*old_po_xerror) (int, const struct message_ty *, const char *, size_t,
1391 size_t, int, const char *)
1393 po_xerror = silent_xerror;
1395 if (check_plural_eval (plural_expr, nplurals, header_entry,
1398 distribution.expr = NULL;
1399 distribution.often = NULL;
1400 distribution.often_length = 0;
1401 distribution.histogram = NULL;
1404 po_xerror = old_po_xerror;
1407 /* Most of the time is spent in definitions_search_fuzzy.
1408 Perform it in a separate loop that can be parallelized by an OpenMP
1409 capable compiler. */
1410 search_results = XNMALLOC (refmlp->nitems, struct search_result);
1412 long int nn = refmlp->nitems;
1415 /* Tell the OpenMP capable compiler to distribute this loop across
1416 several threads. The schedule is dynamic, because for some messages
1417 the loop body can be executed very quickly, whereas for others it takes
1419 Note: The Sun Workshop 6.2 C compiler does not allow a space between
1420 '#' and 'pragma'. */
1422 #pragma omp parallel for schedule(dynamic)
1424 for (jj = 0; jj < nn; jj++)
1426 message_ty *refmsg = refmlp->item[jj];
1429 /* Because merging can take a while we print something to signal
1431 if (!quiet && verbosity_level <= 1 && *processed % DOT_FREQUENCY == 0)
1432 fputc ('.', stderr);
1438 /* See if it is in the other file. */
1440 definitions_search (definitions, refmsg->msgctxt, refmsg->msgid);
1443 search_results[jj].found = defmsg;
1444 search_results[jj].fuzzy = false;
1446 else if (!is_header (refmsg)
1447 /* If the message was not defined at all, try to find a very
1448 similar message, it could be a typo, or the suggestion may
1450 && use_fuzzy_matching
1452 definitions_search_fuzzy (definitions,
1454 refmsg->msgid)) != NULL))
1456 search_results[jj].found = defmsg;
1457 search_results[jj].fuzzy = true;
1460 search_results[jj].found = NULL;
1464 for (j = 0; j < refmlp->nitems; j++)
1466 message_ty *refmsg = refmlp->item[j];
1468 /* See if it is in the other file.
1469 This used definitions_search. */
1470 if (search_results[j].found != NULL && !search_results[j].fuzzy)
1472 message_ty *defmsg = search_results[j].found;
1473 /* Merge the reference with the definition: take the #. and
1474 #: comments from the reference, take the # comments from
1475 the definition, take the msgstr from the definition. Add
1476 this merged entry to the output message list. */
1478 message_merge (defmsg, refmsg, false, &distribution);
1480 message_list_append (resultmlp, mp);
1482 /* Remember that this message has been used, when we scan
1483 later to see if anything was omitted. */
1487 else if (!is_header (refmsg))
1489 /* If the message was not defined at all, try to find a very
1490 similar message, it could be a typo, or the suggestion may
1491 help. This search assumed use_fuzzy_matching and used
1492 definitions_search_fuzzy. */
1493 if (search_results[j].found != NULL && search_results[j].fuzzy)
1495 message_ty *defmsg = search_results[j].found;
1498 if (verbosity_level > 1)
1500 po_gram_error_at_line (&refmsg->pos, _("\
1501 this message is used but not defined..."));
1502 error_message_count--;
1503 po_gram_error_at_line (&defmsg->pos, _("\
1504 ...but this definition is similar"));
1507 /* Merge the reference with the definition: take the #. and
1508 #: comments from the reference, take the # comments from
1509 the definition, take the msgstr from the definition. Add
1510 this merged entry to the output message list. */
1511 mp = message_merge (defmsg, refmsg, true, &distribution);
1513 message_list_append (resultmlp, mp);
1515 /* Remember that this message has been used, when we scan
1516 later to see if anything was omitted. */
1519 if (!quiet && verbosity_level <= 1)
1520 /* Always print a dot if we handled a fuzzy match. */
1521 fputc ('.', stderr);
1526 bool is_untranslated;
1530 if (verbosity_level > 1)
1531 po_gram_error_at_line (&refmsg->pos, _("\
1532 this message is used but not defined in %s"), fn1);
1534 mp = message_copy (refmsg);
1536 if (mp->msgid_plural != NULL)
1538 /* Test if mp is untranslated. (It most likely is.) */
1539 is_untranslated = true;
1540 for (p = mp->msgstr, pend = p + mp->msgstr_len; p < pend; p++)
1543 is_untranslated = false;
1546 if (is_untranslated)
1548 /* Change mp->msgstr_len consecutive empty strings into
1549 nplurals consecutive empty strings. */
1550 if (nplurals > mp->msgstr_len)
1551 mp->msgstr = untranslated_plural_msgstr;
1552 mp->msgstr_len = nplurals;
1556 message_list_append (resultmlp, mp);
1562 free (search_results);
1564 /* Now postprocess the problematic merges. This is needed because we
1565 want the result to pass the "msgfmt -c -v" check. */
1567 /* message_merge sets mp->used to 1 or 2, depending on the problem.
1568 Compute the bitwise OR of all these. */
1569 int problematic = 0;
1571 for (j = 0; j < resultmlp->nitems; j++)
1572 problematic |= resultmlp->item[j]->used;
1576 unsigned long int nplurals = 0;
1578 if (problematic & 1)
1580 /* Need to know nplurals of the result domain. */
1581 message_ty *header_entry =
1582 message_list_search (resultmlp, NULL, "");
1584 nplurals = get_plural_count (header_entry
1585 ? header_entry->msgstr
1589 for (j = 0; j < resultmlp->nitems; j++)
1591 message_ty *mp = resultmlp->item[j];
1593 if ((mp->used & 1) && (nplurals > 0))
1595 /* ref->msgid_plural != NULL but def->msgid_plural == NULL.
1596 Use a copy of def->msgstr for each possible plural form. */
1597 size_t new_msgstr_len;
1602 if (verbosity_level > 1)
1604 po_gram_error_at_line (&mp->pos, _("\
1605 this message should define plural forms"));
1608 new_msgstr_len = nplurals * mp->msgstr_len;
1609 new_msgstr = XNMALLOC (new_msgstr_len, char);
1610 for (i = 0, p = new_msgstr; i < nplurals; i++)
1612 memcpy (p, mp->msgstr, mp->msgstr_len);
1613 p += mp->msgstr_len;
1615 mp->msgstr = new_msgstr;
1616 mp->msgstr_len = new_msgstr_len;
1617 mp->is_fuzzy = true;
1620 if ((mp->used & 2) && (mp->msgstr_len > strlen (mp->msgstr) + 1))
1622 /* ref->msgid_plural == NULL but def->msgid_plural != NULL.
1623 Use only the first among the plural forms. */
1625 if (verbosity_level > 1)
1627 po_gram_error_at_line (&mp->pos, _("\
1628 this message should not define plural forms"));
1631 mp->msgstr_len = strlen (mp->msgstr) + 1;
1632 mp->is_fuzzy = true;
1635 /* Postprocessing of this message is done. */
1641 /* Now that mp->is_fuzzy is finalized for all messages, remove the
1642 "previous msgid" information from all messages that are not fuzzy or
1643 are untranslated. */
1644 for (j = 0; j < resultmlp->nitems; j++)
1646 message_ty *mp = resultmlp->item[j];
1648 if (!mp->is_fuzzy || mp->msgstr[0] == '\0')
1650 mp->prev_msgctxt = NULL;
1651 mp->prev_msgid = NULL;
1652 mp->prev_msgid_plural = NULL;
1657 static msgdomain_list_ty *
1658 merge (const char *fn1, const char *fn2, catalog_input_format_ty input_syntax,
1659 msgdomain_list_ty **defp)
1661 msgdomain_list_ty *def;
1662 msgdomain_list_ty *ref;
1664 unsigned int processed;
1665 struct statistics stats;
1666 msgdomain_list_ty *result;
1667 const char *def_canon_charset;
1668 definitions_ty definitions;
1669 message_list_ty *empty_list;
1671 stats.merged = stats.fuzzied = stats.missing = stats.obsolete = 0;
1673 /* This is the definitions file, created by a human. */
1674 def = read_catalog_file (fn1, input_syntax);
1676 /* This is the references file, created by groping the sources with
1677 the xgettext program. */
1678 ref = read_catalog_file (fn2, input_syntax);
1679 /* Add a dummy header entry, if the references file contains none. */
1680 for (k = 0; k < ref->nitems; k++)
1681 if (message_list_search (ref->item[k]->messages, NULL, "") == NULL)
1683 static lex_pos_ty pos = { __FILE__, __LINE__ };
1684 message_ty *refheader = message_alloc (NULL, "", NULL, "", 1, &pos);
1686 message_list_prepend (ref->item[k]->messages, refheader);
1689 /* The references file can be either in ASCII or in UTF-8. If it is
1690 in UTF-8, we have to convert the definitions and the compendiums to
1693 bool was_utf8 = false;
1694 for (k = 0; k < ref->nitems; k++)
1696 message_list_ty *mlp = ref->item[k]->messages;
1698 for (j = 0; j < mlp->nitems; j++)
1699 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1701 const char *header = mlp->item[j]->msgstr;
1705 const char *charsetstr = c_strstr (header, "charset=");
1707 if (charsetstr != NULL)
1711 charsetstr += strlen ("charset=");
1712 len = strcspn (charsetstr, " \t\n");
1713 if (len == strlen ("UTF-8")
1714 && c_strncasecmp (charsetstr, "UTF-8", len) == 0)
1722 def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
1723 if (compendiums != NULL)
1724 for (k = 0; k < compendiums->nitems; k++)
1725 iconv_message_list (compendiums->item[k], NULL, po_charset_utf8,
1726 compendium_filenames->item[k]);
1728 else if (compendiums != NULL && compendiums->nitems > 0)
1730 /* Ensure that the definitions and the compendiums are in the same
1731 encoding. Prefer the encoding of the definitions file, if
1732 possible; otherwise, if the definitions file is empty and the
1733 compendiums are all in the same encoding, use that encoding;
1734 otherwise, use UTF-8. */
1735 bool conversion_done = false;
1737 char *charset = NULL;
1739 /* Get the encoding of the definitions file. */
1740 for (k = 0; k < def->nitems; k++)
1742 message_list_ty *mlp = def->item[k]->messages;
1744 for (j = 0; j < mlp->nitems; j++)
1745 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1747 const char *header = mlp->item[j]->msgstr;
1751 const char *charsetstr = c_strstr (header, "charset=");
1753 if (charsetstr != NULL)
1757 charsetstr += strlen ("charset=");
1758 len = strcspn (charsetstr, " \t\n");
1759 charset = (char *) xmalloca (len + 1);
1760 memcpy (charset, charsetstr, len);
1761 charset[len] = '\0';
1766 if (charset != NULL)
1769 if (charset != NULL)
1771 const char *canon_charset = po_charset_canonicalize (charset);
1773 if (canon_charset != NULL)
1775 bool all_compendiums_iconvable = true;
1777 if (compendiums != NULL)
1778 for (k = 0; k < compendiums->nitems; k++)
1779 if (!is_message_list_iconvable (compendiums->item[k],
1780 NULL, canon_charset))
1782 all_compendiums_iconvable = false;
1786 if (all_compendiums_iconvable)
1788 /* Convert the compendiums to def's encoding. */
1789 if (compendiums != NULL)
1790 for (k = 0; k < compendiums->nitems; k++)
1791 iconv_message_list (compendiums->item[k],
1792 NULL, canon_charset,
1793 compendium_filenames->item[k]);
1794 conversion_done = true;
1800 if (!conversion_done)
1802 if (def->nitems == 0
1803 || (def->nitems == 1 && def->item[0]->messages->nitems == 0))
1805 /* The definitions file is empty.
1806 Compare the encodings of the compendiums. */
1807 const char *common_canon_charset = NULL;
1809 for (k = 0; k < compendiums->nitems; k++)
1811 message_list_ty *mlp = compendiums->item[k];
1812 char *charset = NULL;
1813 const char *canon_charset = NULL;
1815 for (j = 0; j < mlp->nitems; j++)
1816 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1818 const char *header = mlp->item[j]->msgstr;
1822 const char *charsetstr =
1823 c_strstr (header, "charset=");
1825 if (charsetstr != NULL)
1829 charsetstr += strlen ("charset=");
1830 len = strcspn (charsetstr, " \t\n");
1831 charset = (char *) xmalloca (len + 1);
1832 memcpy (charset, charsetstr, len);
1833 charset[len] = '\0';
1839 if (charset != NULL)
1841 canon_charset = po_charset_canonicalize (charset);
1844 /* If no charset declaration was found in this file,
1845 or if it is not a valid encoding name, or if it
1846 differs from the common charset found so far,
1847 we have no common charset. */
1848 if (canon_charset == NULL
1849 || (common_canon_charset != NULL
1850 && canon_charset != common_canon_charset))
1852 common_canon_charset = NULL;
1855 common_canon_charset = canon_charset;
1858 if (common_canon_charset != NULL)
1859 /* No conversion needed in this case. */
1860 conversion_done = true;
1862 if (!conversion_done)
1864 /* It's too hairy to find out what would be the optimal target
1865 encoding. So, convert everything to UTF-8. */
1866 def = iconv_msgdomain_list (def, "UTF-8", true, fn1);
1867 if (compendiums != NULL)
1868 for (k = 0; k < compendiums->nitems; k++)
1869 iconv_message_list (compendiums->item[k],
1870 NULL, po_charset_utf8,
1871 compendium_filenames->item[k]);
1877 /* Determine canonicalized encoding name of the definitions now, after
1878 conversion. Only used for fuzzy matching. */
1879 if (use_fuzzy_matching)
1881 def_canon_charset = def->encoding;
1882 if (def_canon_charset == NULL)
1884 char *charset = NULL;
1886 /* Get the encoding of the definitions file. */
1887 for (k = 0; k < def->nitems; k++)
1889 message_list_ty *mlp = def->item[k]->messages;
1891 for (j = 0; j < mlp->nitems; j++)
1892 if (is_header (mlp->item[j]) && !mlp->item[j]->obsolete)
1894 const char *header = mlp->item[j]->msgstr;
1898 const char *charsetstr = c_strstr (header, "charset=");
1900 if (charsetstr != NULL)
1904 charsetstr += strlen ("charset=");
1905 len = strcspn (charsetstr, " \t\n");
1906 charset = (char *) xmalloca (len + 1);
1907 memcpy (charset, charsetstr, len);
1908 charset[len] = '\0';
1913 if (charset != NULL)
1916 if (charset != NULL)
1917 def_canon_charset = po_charset_canonicalize (charset);
1918 if (def_canon_charset == NULL)
1919 /* Unspecified encoding. Assume unibyte encoding. */
1920 def_canon_charset = po_charset_ascii;
1924 def_canon_charset = NULL;
1926 /* Initialize and preprocess the total set of message definitions. */
1927 definitions_init (&definitions, def_canon_charset);
1928 empty_list = message_list_alloc (false);
1930 result = msgdomain_list_alloc (false);
1933 /* Every reference must be matched with its definition. */
1934 if (!multi_domain_mode)
1935 for (k = 0; k < ref->nitems; k++)
1937 const char *domain = ref->item[k]->domain;
1938 message_list_ty *refmlp = ref->item[k]->messages;
1939 message_list_ty *resultmlp =
1940 msgdomain_list_sublist (result, domain, true);
1941 message_list_ty *defmlp;
1943 defmlp = msgdomain_list_sublist (def, domain, false);
1945 defmlp = empty_list;
1946 definitions_set_current_list (&definitions, defmlp);
1948 match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
1949 &stats, &processed);
1953 /* Apply the references messages in the default domain to each of
1954 the definition domains. */
1955 message_list_ty *refmlp = ref->item[0]->messages;
1957 for (k = 0; k < def->nitems; k++)
1959 const char *domain = def->item[k]->domain;
1960 message_list_ty *defmlp = def->item[k]->messages;
1962 /* Ignore the default message domain if it has no messages. */
1963 if (k > 0 || defmlp->nitems > 0)
1965 message_list_ty *resultmlp =
1966 msgdomain_list_sublist (result, domain, true);
1968 definitions_set_current_list (&definitions, defmlp);
1970 match_domain (fn1, fn2, &definitions, refmlp, resultmlp,
1971 &stats, &processed);
1976 definitions_destroy (&definitions);
1978 /* Look for messages in the definition file, which are not present
1979 in the reference file, indicating messages which defined but not
1980 used in the program. Don't scan the compendium(s). */
1981 for (k = 0; k < def->nitems; ++k)
1983 const char *domain = def->item[k]->domain;
1984 message_list_ty *defmlp = def->item[k]->messages;
1986 for (j = 0; j < defmlp->nitems; j++)
1988 message_ty *defmsg = defmlp->item[j];
1992 /* Remember the old translation although it is not used anymore.
1993 But we mark it as obsolete. */
1996 mp = message_copy (defmsg);
1997 /* Clear the extracted comments. */
1998 if (mp->comment_dot != NULL)
2000 string_list_free (mp->comment_dot);
2001 mp->comment_dot = NULL;
2003 /* Clear the file position comments. */
2004 if (mp->filepos != NULL)
2008 for (i = 0; i < mp->filepos_count; i++)
2009 free ((char *) mp->filepos[i].file_name);
2010 mp->filepos_count = 0;
2014 /* Mark as obsolete. */
2015 mp->obsolete = true;
2017 message_list_append (msgdomain_list_sublist (result, domain, true),
2024 /* Determine the known a-priori encoding, if any. */
2025 if (def->encoding == ref->encoding)
2026 result->encoding = def->encoding;
2028 /* Report some statistics. */
2029 if (verbosity_level > 0)
2030 fprintf (stderr, _("%s\
2031 Read %ld old + %ld reference, \
2032 merged %ld, fuzzied %ld, missing %ld, obsolete %ld.\n"),
2033 !quiet && verbosity_level <= 1 ? "\n" : "",
2034 (long) def->nitems, (long) ref->nitems,
2035 (long) stats.merged, (long) stats.fuzzied, (long) stats.missing,
2036 (long) stats.obsolete);
2038 fputs (_(" done.\n"), stderr);
2040 /* Return results. */