gettext-tools/src/xgettext.c

   1 /* Extracts strings from C source file to Uniforum style .po file.
   2    Copyright (C) 1995-1998, 2000-2015 Free Software Foundation,
   3    Inc.
   4    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include <config.h>
  21 #endif
  22 #include <alloca.h>
  23
  24 #include <ctype.h>
  25 #include <errno.h>
  26 #include <getopt.h>
  27 #include <stdio.h>
  28 #include <time.h>
  29 #include <stdlib.h>
  30 #include <stdbool.h>
  31 #include <string.h>
  32 #include <sys/stat.h>
  33 #include <locale.h>
  34 #include <limits.h>
  35
  36 #include "xgettext.h"
  37 #include "closeout.h"
  38 #include "dir-list.h"
  39 #include "file-list.h"
  40 #include "str-list.h"
  41 #include "error.h"
  42 #include "error-progname.h"
  43 #include "progname.h"
  44 #include "relocatable.h"
  45 #include "basename.h"
  46 #include "xerror.h"
  47 #include "xvasprintf.h"
  48 #include "xsize.h"
  49 #include "xalloc.h"
  50 #include "xmalloca.h"
  51 #include "c-strstr.h"
  52 #include "xerror.h"
  53 #include "filename.h"
  54 #include "concat-filename.h"
  55 #include "c-strcase.h"
  56 #include "open-catalog.h"
  57 #include "read-catalog-abstract.h"
  58 #include "read-po.h"
  59 #include "message.h"
  60 #include "po-charset.h"
  61 #include "msgl-iconv.h"
  62 #include "msgl-ascii.h"
  63 #include "msgl-check.h"
  64 #include "po-xerror.h"
  65 #include "po-time.h"
  66 #include "write-catalog.h"
  67 #include "write-po.h"
  68 #include "write-properties.h"
  69 #include "write-stringtable.h"
  70 #include "color.h"
  71 #include "format.h"
  72 #include "propername.h"
  73 #include "sentence.h"
  74 #include "unistr.h"
  75 #include "its.h"
  76 #include "locating-rule.h"
  77 #include "gettext.h"
  78
  79 /* A convenience macro.  I don't like writing gettext() every time.  */
  80 #define _(str) gettext (str)
  81
  82
  83 #include "x-c.h"
  84 #include "x-po.h"
  85 #include "x-sh.h"
  86 #include "x-python.h"
  87 #include "x-lisp.h"
  88 #include "x-elisp.h"
  89 #include "x-librep.h"
  90 #include "x-scheme.h"
  91 #include "x-smalltalk.h"
  92 #include "x-java.h"
  93 #include "x-properties.h"
  94 #include "x-csharp.h"
  95 #include "x-appdata.h"
  96 #include "x-awk.h"
  97 #include "x-ycp.h"
  98 #include "x-tcl.h"
  99 #include "x-perl.h"
 100 #include "x-php.h"
 101 #include "x-stringtable.h"
 102 #include "x-rst.h"
 103 #include "x-glade.h"
 104 #include "x-lua.h"
 105 #include "x-javascript.h"
 106 #include "x-vala.h"
 107 #include "x-gsettings.h"
 108 #include "x-desktop.h"
 109
 110
 111 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
 112 #define ENDOF(a) ((a) + SIZEOF(a))
 113
 114
 115 /* If nonzero add all comments immediately preceding one of the keywords. */
 116 static bool add_all_comments = false;
 117
 118 /* Tag used in comment of prevailing domain.  */
 119 static char *comment_tag;
 120
 121 /* Name of default domain file.  If not set defaults to messages.po.  */
 122 static const char *default_domain;
 123
 124 /* If called with --debug option the output reflects whether format
 125    string recognition is done automatically or forced by the user.  */
 126 static int do_debug;
 127
 128 /* Content of .po files with symbols to be excluded.  */
 129 message_list_ty *exclude;
 130
 131 /* Force output of PO file even if empty.  */
 132 static int force_po;
 133
 134 /* Copyright holder of the output file and the translations.  */
 135 static const char *copyright_holder = "THE PACKAGE'S COPYRIGHT HOLDER";
 136
 137 /* Package name.  */
 138 static const char *package_name = NULL;
 139
 140 /* Package version.  */
 141 static const char *package_version = NULL;
 142
 143 /* Email address or URL for reports of bugs in msgids.  */
 144 static const char *msgid_bugs_address = NULL;
 145
 146 /* String used as prefix for msgstr.  */
 147 static const char *msgstr_prefix;
 148
 149 /* String used as suffix for msgstr.  */
 150 static const char *msgstr_suffix;
 151
 152 /* Directory in which output files are created.  */
 153 static char *output_dir;
 154
 155 /* The output syntax: .pot or .properties or .strings.  */
 156 static catalog_output_format_ty output_syntax = &output_format_po;
 157
 158 /* If nonzero omit header with information about this run.  */
 159 int xgettext_omit_header;
 160
 161 /* Table of flag_context_list_ty tables.  */
 162 static flag_context_list_table_ty flag_table_c;
 163 static flag_context_list_table_ty flag_table_cxx_qt;
 164 static flag_context_list_table_ty flag_table_cxx_kde;
 165 static flag_context_list_table_ty flag_table_cxx_boost;
 166 static flag_context_list_table_ty flag_table_objc;
 167 static flag_context_list_table_ty flag_table_gcc_internal;
 168 static flag_context_list_table_ty flag_table_sh;
 169 static flag_context_list_table_ty flag_table_python;
 170 static flag_context_list_table_ty flag_table_lisp;
 171 static flag_context_list_table_ty flag_table_elisp;
 172 static flag_context_list_table_ty flag_table_librep;
 173 static flag_context_list_table_ty flag_table_scheme;
 174 static flag_context_list_table_ty flag_table_java;
 175 static flag_context_list_table_ty flag_table_csharp;
 176 static flag_context_list_table_ty flag_table_awk;
 177 static flag_context_list_table_ty flag_table_ycp;
 178 static flag_context_list_table_ty flag_table_tcl;
 179 static flag_context_list_table_ty flag_table_perl;
 180 static flag_context_list_table_ty flag_table_php;
 181 static flag_context_list_table_ty flag_table_lua;
 182 static flag_context_list_table_ty flag_table_javascript;
 183 static flag_context_list_table_ty flag_table_vala;
 184
 185 /* If true, recognize Qt format strings.  */
 186 static bool recognize_format_qt;
 187
 188 /* If true, recognize KDE format strings.  */
 189 static bool recognize_format_kde;
 190
 191 /* If true, recognize Boost format strings.  */
 192 static bool recognize_format_boost;
 193
 194 /* Syntax checks enabled by default.  */
 195 static enum is_syntax_check default_syntax_check[NSYNTAXCHECKS];
 196
 197 /* Canonicalized encoding name for all input files.  */
 198 const char *xgettext_global_source_encoding;
 199
 200 #if HAVE_ICONV
 201 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
 202    ASCII or UTF-8, when this conversion is a no-op).  */
 203 iconv_t xgettext_global_source_iconv;
 204 #endif
 205
 206 /* Canonicalized encoding name for the current input file.  */
 207 const char *xgettext_current_source_encoding;
 208
 209 #if HAVE_ICONV
 210 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
 211    ASCII or UTF-8, when this conversion is a no-op).  */
 212 iconv_t xgettext_current_source_iconv;
 213 #endif
 214
 215 static locating_rule_list_ty *its_locating_rules;
 216
 217 #define ITS_ROOT_UNTRANSLATABLE \
 218   "<its:rules xmlns:its=\"http://www.w3.org/2005/11/its\"" \
 219   "           version=\"2.0\">" \
 220   "  <its:translateRule selector=\"/*\" translate=\"no\"/>" \
 221   "</its:rules>"
 222
 223 /* If nonzero add comments used by itstool. */
 224 static bool add_itstool_comments = false;
 225
 226 /* Long options.  */
 227 static const struct option long_options[] =
 228 {
 229   { "add-comments", optional_argument, NULL, 'c' },
 230   { "add-location", optional_argument, NULL, 'n' },
 231   { "boost", no_argument, NULL, CHAR_MAX + 11 },
 232   { "c++", no_argument, NULL, 'C' },
 233   { "check", required_argument, NULL, CHAR_MAX + 17 },
 234   { "color", optional_argument, NULL, CHAR_MAX + 14 },
 235   { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
 236   { "debug", no_argument, &do_debug, 1 },
 237   { "default-domain", required_argument, NULL, 'd' },
 238   { "directory", required_argument, NULL, 'D' },
 239   { "escape", no_argument, NULL, 'E' },
 240   { "exclude-file", required_argument, NULL, 'x' },
 241   { "extract-all", no_argument, NULL, 'a' },
 242   { "files-from", required_argument, NULL, 'f' },
 243   { "flag", required_argument, NULL, CHAR_MAX + 8 },
 244   { "force-po", no_argument, &force_po, 1 },
 245   { "foreign-user", no_argument, NULL, CHAR_MAX + 2 },
 246   { "from-code", required_argument, NULL, CHAR_MAX + 3 },
 247   { "help", no_argument, NULL, 'h' },
 248   { "indent", no_argument, NULL, 'i' },
 249   { "its", required_argument, NULL, CHAR_MAX + 20 },
 250   { "itstool", no_argument, NULL, CHAR_MAX + 19 },
 251   { "join-existing", no_argument, NULL, 'j' },
 252   { "kde", no_argument, NULL, CHAR_MAX + 10 },
 253   { "keyword", optional_argument, NULL, 'k' },
 254   { "language", required_argument, NULL, 'L' },
 255   { "msgid-bugs-address", required_argument, NULL, CHAR_MAX + 5 },
 256   { "msgstr-prefix", optional_argument, NULL, 'm' },
 257   { "msgstr-suffix", optional_argument, NULL, 'M' },
 258   { "no-escape", no_argument, NULL, 'e' },
 259   { "no-location", no_argument, NULL, CHAR_MAX + 16 },
 260   { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
 261   { "omit-header", no_argument, &xgettext_omit_header, 1 },
 262   { "output", required_argument, NULL, 'o' },
 263   { "output-dir", required_argument, NULL, 'p' },
 264   { "package-name", required_argument, NULL, CHAR_MAX + 12 },
 265   { "package-version", required_argument, NULL, CHAR_MAX + 13 },
 266   { "properties-output", no_argument, NULL, CHAR_MAX + 6 },
 267   { "qt", no_argument, NULL, CHAR_MAX + 9 },
 268   { "sentence-end", required_argument, NULL, CHAR_MAX + 18 },
 269   { "sort-by-file", no_argument, NULL, 'F' },
 270   { "sort-output", no_argument, NULL, 's' },
 271   { "strict", no_argument, NULL, 'S' },
 272   { "string-limit", required_argument, NULL, 'l' },
 273   { "stringtable-output", no_argument, NULL, CHAR_MAX + 7 },
 274   { "style", required_argument, NULL, CHAR_MAX + 15 },
 275   { "trigraphs", no_argument, NULL, 'T' },
 276   { "version", no_argument, NULL, 'V' },
 277   { "width", required_argument, NULL, 'w', },
 278   { NULL, 0, NULL, 0 }
 279 };
 280
 281
 282 /* The extractors must all be functions returning void and taking three
 283    arguments designating the input stream and one message domain list argument
 284    in which to add the messages.  */
 285 typedef void (*extractor_func) (FILE *fp, const char *real_filename,
 286                                 const char *logical_filename,
 287                                 flag_context_list_table_ty *flag_table,
 288                                 msgdomain_list_ty *mdlp);
 289
 290 typedef struct extractor_ty extractor_ty;
 291 struct extractor_ty
 292 {
 293   extractor_func func;
 294   flag_context_list_table_ty *flag_table;
 295   struct formatstring_parser *formatstring_parser1;
 296   struct formatstring_parser *formatstring_parser2;
 297   struct formatstring_parser *formatstring_parser3;
 298   struct literalstring_parser *literalstring_parser;
 299 };
 300
 301
 302 /* Forward declaration of local functions.  */
 303 static void usage (int status)
 304 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ > 4) || __GNUC__ > 2)
 305         __attribute__ ((noreturn))
 306 #endif
 307 ;
 308 static void read_exclusion_file (char *file_name);
 309 static void extract_from_file (const char *file_name, extractor_ty extractor,
 310                                msgdomain_list_ty *mdlp);
 311 static void extract_from_xml_file (const char *file_name,
 312                                    its_rule_list_ty *rules,
 313                                    msgdomain_list_ty *mdlp);
 314 static message_ty *construct_header (void);
 315 static void finalize_header (msgdomain_list_ty *mdlp);
 316 static extractor_ty language_to_extractor (const char *name);
 317 static const char *extension_to_language (const char *extension);
 318
 319
 320 int
 321 main (int argc, char *argv[])
 322 {
 323   int optchar;
 324   bool do_help = false;
 325   bool do_version = false;
 326   msgdomain_list_ty *mdlp;
 327   bool join_existing = false;
 328   bool no_default_keywords = false;
 329   bool some_additional_keywords = false;
 330   bool sort_by_msgid = false;
 331   bool sort_by_filepos = false;
 332   char *its_dirs[2] = { NULL, NULL };
 333   char *explicit_its_filename = NULL;
 334   const char *file_name;
 335   const char *files_from = NULL;
 336   string_list_ty *file_list;
 337   char *output_file = NULL;
 338   const char *language = NULL;
 339   extractor_ty extractor = { NULL, NULL, NULL, NULL };
 340   int cnt;
 341   size_t i;
 342
 343   /* Set program name for messages.  */
 344   set_program_name (argv[0]);
 345   error_print_progname = maybe_print_progname;
 346
 347 #ifdef HAVE_SETLOCALE
 348   /* Set locale via LC_ALL.  */
 349   setlocale (LC_ALL, "");
 350 #endif
 351
 352   /* Set the text message domain.  */
 353   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
 354   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
 355   textdomain (PACKAGE);
 356
 357   /* Ensure that write errors on stdout are detected.  */
 358   atexit (close_stdout);
 359
 360   /* Set initial value of variables.  */
 361   default_domain = MESSAGE_DOMAIN_DEFAULT;
 362   xgettext_global_source_encoding = po_charset_ascii;
 363   init_flag_table_c ();
 364   init_flag_table_objc ();
 365   init_flag_table_gcc_internal ();
 366   init_flag_table_kde ();
 367   init_flag_table_sh ();
 368   init_flag_table_python ();
 369   init_flag_table_lisp ();
 370   init_flag_table_elisp ();
 371   init_flag_table_librep ();
 372   init_flag_table_scheme ();
 373   init_flag_table_java ();
 374   init_flag_table_csharp ();
 375   init_flag_table_awk ();
 376   init_flag_table_ycp ();
 377   init_flag_table_tcl ();
 378   init_flag_table_perl ();
 379   init_flag_table_php ();
 380   init_flag_table_lua ();
 381   init_flag_table_javascript ();
 382   init_flag_table_vala ();
 383
 384   while ((optchar = getopt_long (argc, argv,
 385                                  "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:W:x:",
 386                                  long_options, NULL)) != EOF)
 387     switch (optchar)
 388       {
 389       case '\0':                /* Long option.  */
 390         break;
 391
 392       case 'a':
 393         x_c_extract_all ();
 394         x_sh_extract_all ();
 395         x_python_extract_all ();
 396         x_lisp_extract_all ();
 397         x_elisp_extract_all ();
 398         x_librep_extract_all ();
 399         x_scheme_extract_all ();
 400         x_java_extract_all ();
 401         x_csharp_extract_all ();
 402         x_awk_extract_all ();
 403         x_tcl_extract_all ();
 404         x_perl_extract_all ();
 405         x_php_extract_all ();
 406         x_lua_extract_all ();
 407         x_javascript_extract_all ();
 408         x_vala_extract_all ();
 409         break;
 410
 411       case 'c':
 412         if (optarg == NULL)
 413           {
 414             add_all_comments = true;
 415             comment_tag = NULL;
 416           }
 417         else
 418           {
 419             add_all_comments = false;
 420             comment_tag = optarg;
 421             /* We ignore leading white space.  */
 422             while (isspace ((unsigned char) *comment_tag))
 423               ++comment_tag;
 424           }
 425         break;
 426
 427       case 'C':
 428         language = "C++";
 429         break;
 430
 431       case 'd':
 432         default_domain = optarg;
 433         break;
 434
 435       case 'D':
 436         dir_list_append (optarg);
 437         break;
 438
 439       case 'e':
 440         message_print_style_escape (false);
 441         break;
 442
 443       case 'E':
 444         message_print_style_escape (true);
 445         break;
 446
 447       case 'f':
 448         files_from = optarg;
 449         break;
 450
 451       case 'F':
 452         sort_by_filepos = true;
 453         break;
 454
 455       case 'h':
 456         do_help = true;
 457         break;
 458
 459       case 'i':
 460         message_print_style_indent ();
 461         break;
 462
 463       case 'j':
 464         join_existing = true;
 465         break;
 466
 467       case 'k':
 468         if (optarg != NULL && *optarg == '\0')
 469           /* Make "--keyword=" work like "--keyword" and "-k".  */
 470           optarg = NULL;
 471         x_c_keyword (optarg);
 472         x_objc_keyword (optarg);
 473         x_sh_keyword (optarg);
 474         x_python_keyword (optarg);
 475         x_lisp_keyword (optarg);
 476         x_elisp_keyword (optarg);
 477         x_librep_keyword (optarg);
 478         x_scheme_keyword (optarg);
 479         x_java_keyword (optarg);
 480         x_csharp_keyword (optarg);
 481         x_awk_keyword (optarg);
 482         x_tcl_keyword (optarg);
 483         x_perl_keyword (optarg);
 484         x_php_keyword (optarg);
 485         x_lua_keyword (optarg);
 486         x_javascript_keyword (optarg);
 487         x_vala_keyword (optarg);
 488         x_desktop_keyword (optarg);
 489         if (optarg == NULL)
 490           no_default_keywords = true;
 491         else
 492           some_additional_keywords = true;
 493         break;
 494
 495       case 'l':
 496         /* Accepted for backward compatibility with 0.10.35.  */
 497         break;
 498
 499       case 'L':
 500         language = optarg;
 501         break;
 502
 503       case 'm':
 504         /* -m takes an optional argument.  If none is given "" is assumed. */
 505         msgstr_prefix = optarg == NULL ? "" : optarg;
 506         break;
 507
 508       case 'M':
 509         /* -M takes an optional argument.  If none is given "" is assumed. */
 510         msgstr_suffix = optarg == NULL ? "" : optarg;
 511         break;
 512
 513       case 'n':
 514         if (handle_filepos_comment_option (optarg))
 515           usage (EXIT_FAILURE);
 516         break;
 517
 518       case 'o':
 519         output_file = optarg;
 520         break;
 521
 522       case 'p':
 523         {
 524           size_t len = strlen (optarg);
 525
 526           if (output_dir != NULL)
 527             free (output_dir);
 528
 529           if (optarg[len - 1] == '/')
 530             output_dir = xstrdup (optarg);
 531           else
 532             output_dir = xasprintf ("%s/", optarg);
 533         }
 534         break;
 535
 536       case 's':
 537         sort_by_msgid = true;
 538         break;
 539
 540       case 'S':
 541         message_print_style_uniforum ();
 542         break;
 543
 544       case 'T':
 545         x_c_trigraphs ();
 546         break;
 547
 548       case 'V':
 549         do_version = true;
 550         break;
 551
 552       case 'w':
 553         {
 554           int value;
 555           char *endp;
 556           value = strtol (optarg, &endp, 10);
 557           if (endp != optarg)
 558             message_page_width_set (value);
 559         }
 560         break;
 561
 562       case 'x':
 563         read_exclusion_file (optarg);
 564         break;
 565
 566       case CHAR_MAX + 1:        /* --copyright-holder */
 567         copyright_holder = optarg;
 568         break;
 569
 570       case CHAR_MAX + 2:        /* --foreign-user */
 571         copyright_holder = "";
 572         break;
 573
 574       case CHAR_MAX + 3:        /* --from-code */
 575         xgettext_global_source_encoding = po_charset_canonicalize (optarg);
 576         if (xgettext_global_source_encoding == NULL)
 577           {
 578             multiline_warning (xasprintf (_("warning: ")),
 579                                xasprintf (_("\
 580 '%s' is not a valid encoding name.  Using ASCII as fallback.\n"),
 581                                           optarg));
 582             xgettext_global_source_encoding = po_charset_ascii;
 583           }
 584         break;
 585
 586       case CHAR_MAX + 4:        /* --no-wrap */
 587         message_page_width_ignore ();
 588         break;
 589
 590       case CHAR_MAX + 5:        /* --msgid-bugs-address */
 591         msgid_bugs_address = optarg;
 592         break;
 593
 594       case CHAR_MAX + 6:        /* --properties-output */
 595         output_syntax = &output_format_properties;
 596         break;
 597
 598       case CHAR_MAX + 7:        /* --stringtable-output */
 599         output_syntax = &output_format_stringtable;
 600         break;
 601
 602       case CHAR_MAX + 8:        /* --flag */
 603         xgettext_record_flag (optarg);
 604         break;
 605
 606       case CHAR_MAX + 9:        /* --qt */
 607         recognize_format_qt = true;
 608         break;
 609
 610       case CHAR_MAX + 10:       /* --kde */
 611         recognize_format_kde = true;
 612         activate_additional_keywords_kde ();
 613         break;
 614
 615       case CHAR_MAX + 11:       /* --boost */
 616         recognize_format_boost = true;
 617         break;
 618
 619       case CHAR_MAX + 12:       /* --package-name */
 620         package_name = optarg;
 621         break;
 622
 623       case CHAR_MAX + 13:       /* --package-version */
 624         package_version = optarg;
 625         break;
 626
 627       case CHAR_MAX + 14: /* --color */
 628         if (handle_color_option (optarg) || color_test_mode)
 629           usage (EXIT_FAILURE);
 630         break;
 631
 632       case CHAR_MAX + 15: /* --style */
 633         handle_style_option (optarg);
 634         break;
 635
 636       case CHAR_MAX + 16: /* --no-location */
 637         message_print_style_filepos (filepos_comment_none);
 638         break;
 639
 640       case CHAR_MAX + 17: /* --check */
 641         if (strcmp (optarg, "ellipsis-unicode") == 0)
 642           default_syntax_check[sc_ellipsis_unicode] = yes;
 643         else if (strcmp (optarg, "space-ellipsis") == 0)
 644           default_syntax_check[sc_space_ellipsis] = yes;
 645         else if (strcmp (optarg, "quote-unicode") == 0)
 646           default_syntax_check[sc_quote_unicode] = yes;
 647         else
 648           error (EXIT_FAILURE, 0, _("syntax check '%s' unknown"), optarg);
 649         break;
 650
 651       case CHAR_MAX + 18: /* --sentence-end */
 652         if (strcmp (optarg, "single-space") == 0)
 653           sentence_end_required_spaces = 1;
 654         else if (strcmp (optarg, "double-space") == 0)
 655           sentence_end_required_spaces = 2;
 656         else
 657           error (EXIT_FAILURE, 0, _("sentence end type '%s' unknown"), optarg);
 658         break;
 659
 660       case CHAR_MAX + 20: /* --its */
 661         explicit_its_filename = optarg;
 662         break;
 663
 664       case CHAR_MAX + 19: /* --itstool */
 665         add_itstool_comments = true;
 666         break;
 667
 668       default:
 669         usage (EXIT_FAILURE);
 670         /* NOTREACHED */
 671       }
 672
 673   /* Version information requested.  */
 674   if (do_version)
 675     {
 676       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
 677       /* xgettext: no-wrap */
 678       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
 679 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
 680 This is free software: you are free to change and redistribute it.\n\
 681 There is NO WARRANTY, to the extent permitted by law.\n\
 682 "),
 683               "1995-1998, 2000-2013");
 684       printf (_("Written by %s.\n"), proper_name ("Ulrich Drepper"));
 685       exit (EXIT_SUCCESS);
 686     }
 687
 688   /* Help is requested.  */
 689   if (do_help)
 690     usage (EXIT_SUCCESS);
 691
 692   /* Verify selected options.  */
 693   if (sort_by_msgid && sort_by_filepos)
 694     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 695            "--sort-output", "--sort-by-file");
 696
 697   /* We cannot support both Qt and KDE, or Qt and Boost, or KDE and Boost
 698      format strings, because there are only two formatstring parsers per
 699      language, and formatstring_c is the first one for C++.  */
 700   if (recognize_format_qt && recognize_format_kde)
 701     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 702            "--qt", "--kde");
 703   if (recognize_format_qt && recognize_format_boost)
 704     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 705            "--qt", "--boost");
 706   if (recognize_format_kde && recognize_format_boost)
 707     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 708            "--kde", "--boost");
 709
 710   if (join_existing && strcmp (default_domain, "-") == 0)
 711     error (EXIT_FAILURE, 0, _("\
 712 --join-existing cannot be used when output is written to stdout"));
 713
 714   if (no_default_keywords && !some_additional_keywords)
 715     {
 716       error (0, 0, _("\
 717 xgettext cannot work without keywords to look for"));
 718       usage (EXIT_FAILURE);
 719     }
 720
 721   /* Test whether we have some input files given.  */
 722   if (files_from == NULL && optind >= argc)
 723     {
 724       error (EXIT_SUCCESS, 0, _("no input file given"));
 725       usage (EXIT_FAILURE);
 726     }
 727
 728   {
 729     const char *gettextdatadir;
 730     char *versioned_gettextdatadir;
 731
 732     /* Make it possible to override the locator file location.  This
 733        is necessary for running the testsuite before "make
 734        install".  */
 735     gettextdatadir = getenv ("GETTEXTDATADIR");
 736     if (gettextdatadir == NULL || gettextdatadir[0] == '\0')
 737       gettextdatadir = relocate (GETTEXTDATADIR);
 738
 739     its_dirs[0] = xconcatenated_filename (gettextdatadir, "its", NULL);
 740
 741     versioned_gettextdatadir =
 742       xasprintf ("%s%s", relocate (GETTEXTDATADIR), PACKAGE_SUFFIX);
 743     its_dirs[1] = xconcatenated_filename (versioned_gettextdatadir, "its",
 744                                           NULL);
 745     free (versioned_gettextdatadir);
 746
 747     its_locating_rules = locating_rule_list_alloc ();
 748     for (i = 0; i < SIZEOF (its_dirs); i++)
 749       locating_rule_list_add_from_directory (its_locating_rules, its_dirs[i]);
 750   }
 751
 752   /* Explicit ITS file selection and language specification are
 753      mutually exclusive.  */
 754   if (explicit_its_filename != NULL && language != NULL)
 755     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 756            "--its", "--language");
 757
 758   /* Determine extractor from language.  */
 759   if (language != NULL)
 760     extractor = language_to_extractor (language);
 761
 762   /* Canonize msgstr prefix/suffix.  */
 763   if (msgstr_prefix != NULL && msgstr_suffix == NULL)
 764     msgstr_suffix = "";
 765   else if (msgstr_prefix == NULL && msgstr_suffix != NULL)
 766     msgstr_prefix = "";
 767
 768   /* Default output directory is the current directory.  */
 769   if (output_dir == NULL)
 770     output_dir = ".";
 771
 772   /* Construct the name of the output file.  If the default domain has
 773      the special name "-" we write to stdout.  */
 774   if (output_file)
 775     {
 776       if (IS_ABSOLUTE_PATH (output_file) || strcmp (output_file, "-") == 0)
 777         file_name = xstrdup (output_file);
 778       else
 779         /* Please do NOT add a .po suffix! */
 780         file_name = xconcatenated_filename (output_dir, output_file, NULL);
 781     }
 782   else if (strcmp (default_domain, "-") == 0)
 783     file_name = "-";
 784   else
 785     file_name = xconcatenated_filename (output_dir, default_domain, ".po");
 786
 787   /* Determine list of files we have to process.  */
 788   if (files_from != NULL)
 789     file_list = read_names_from_file (files_from);
 790   else
 791     file_list = string_list_alloc ();
 792   /* Append names from command line.  */
 793   for (cnt = optind; cnt < argc; ++cnt)
 794     string_list_append_unique (file_list, argv[cnt]);
 795
 796   /* Allocate converter from xgettext_global_source_encoding to UTF-8 (except
 797      from ASCII or UTF-8, when this conversion is a no-op).  */
 798   if (xgettext_global_source_encoding != po_charset_ascii
 799       && xgettext_global_source_encoding != po_charset_utf8)
 800     {
 801 #if HAVE_ICONV
 802       iconv_t cd;
 803
 804       /* Avoid glibc-2.1 bug with EUC-KR.  */
 805 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
 806      && !defined _LIBICONV_VERSION
 807       if (strcmp (xgettext_global_source_encoding, "EUC-KR") == 0)
 808         cd = (iconv_t)(-1);
 809       else
 810 # endif
 811       cd = iconv_open (po_charset_utf8, xgettext_global_source_encoding);
 812       if (cd == (iconv_t)(-1))
 813         error (EXIT_FAILURE, 0, _("\
 814 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
 815 and iconv() does not support this conversion."),
 816                xgettext_global_source_encoding, po_charset_utf8,
 817                basename (program_name));
 818       xgettext_global_source_iconv = cd;
 819 #else
 820       error (EXIT_FAILURE, 0, _("\
 821 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
 822 This version was built without iconv()."),
 823              xgettext_global_source_encoding, po_charset_utf8,
 824              basename (program_name));
 825 #endif
 826     }
 827
 828   /* Allocate a message list to remember all the messages.  */
 829   mdlp = msgdomain_list_alloc (true);
 830
 831   /* Generate a header, so that we know how and when this PO file was
 832      created.  */
 833   if (!xgettext_omit_header)
 834     message_list_append (mdlp->item[0]->messages, construct_header ());
 835
 836   /* Read in the old messages, so that we can add to them.  */
 837   if (join_existing)
 838     {
 839       /* Temporarily reset the directory list to empty, because file_name
 840          is an output file and therefore should not be searched for.  */
 841       void *saved_directory_list = dir_list_save_reset ();
 842       extractor_ty po_extractor = { extract_po, NULL, NULL, NULL };
 843
 844       extract_from_file (file_name, po_extractor, mdlp);
 845       if (!is_ascii_msgdomain_list (mdlp))
 846         mdlp = iconv_msgdomain_list (mdlp, "UTF-8", true, file_name);
 847
 848       dir_list_restore (saved_directory_list);
 849     }
 850
 851   /* Process all input files.  */
 852   for (i = 0; i < file_list->nitems; i++)
 853     {
 854       const char *filename;
 855       extractor_ty this_file_extractor;
 856       its_rule_list_ty *its_rules = NULL;
 857
 858       filename = file_list->item[i];
 859
 860       if (extractor.func)
 861         this_file_extractor = extractor;
 862       else if (explicit_its_filename != NULL)
 863         {
 864           its_rules = its_rule_list_alloc ();
 865           if (!its_rule_list_add_from_file (its_rules,
 866                                             explicit_its_filename))
 867             {
 868               error (EXIT_FAILURE, 0, _("\
 869 warning: ITS rule file '%s' does not exist"), explicit_its_filename);
 870             }
 871         }
 872       else
 873         {
 874           const char *language_from_extension = NULL;
 875           const char *base;
 876           char *reduced;
 877
 878           base = strrchr (filename, '/');
 879           if (!base)
 880             base = filename;
 881
 882           reduced = xstrdup (base);
 883           /* Remove a trailing ".in" - it's a generic suffix.  */
 884           while (strlen (reduced) >= 3
 885                  && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0)
 886             reduced[strlen (reduced) - 3] = '\0';
 887
 888           /* If no language is specified with -L, deduce it the extension.  */
 889           if (language == NULL)
 890             {
 891               const char *p;
 892
 893               /* Work out what the file extension is.  */
 894               p = reduced + strlen (reduced);
 895               for (; p > reduced && language_from_extension == NULL; p--)
 896                 {
 897                   if (*p == '.')
 898                     {
 899                       const char *extension = p + 1;
 900
 901                       /* Derive the language from the extension, and
 902                          the extractor function from the language.  */
 903                       language_from_extension =
 904                         extension_to_language (extension);
 905                     }
 906                 }
 907             }
 908
 909           /* If language is not determined from the file name
 910              extension, check ITS locating rules.  */
 911           if (language_from_extension == NULL
 912               && strcmp (filename, "-") != 0)
 913             {
 914               const char *its_basename;
 915
 916               its_basename = locating_rule_list_locate (its_locating_rules,
 917                                                         filename,
 918                                                         language);
 919
 920               if (its_basename != NULL)
 921                 {
 922                   size_t j;
 923
 924                   its_rules = its_rule_list_alloc ();
 925
 926                   /* If the ITS file is identified by the name,
 927                      set the root element untranslatable.  */
 928                   if (language != NULL)
 929                     its_rule_list_add_from_string (its_rules,
 930                                                    ITS_ROOT_UNTRANSLATABLE);
 931
 932                   for (j = 0; j < SIZEOF (its_dirs); j++)
 933                     {
 934                       char *its_filename =
 935                         xconcatenated_filename (its_dirs[j], its_basename,
 936                                                 NULL);
 937                       struct stat statbuf;
 938                       bool ok = false;
 939
 940                       if (stat (its_filename, &statbuf) == 0)
 941                         ok = its_rule_list_add_from_file (its_rules,
 942                                                           its_filename);
 943                       free (its_filename);
 944                       if (ok)
 945                         break;
 946                     }
 947                   if (j == SIZEOF (its_dirs))
 948                     {
 949                       error (0, 0, _("\
 950 warning: ITS rule file '%s' does not exist; check your gettext installation"),
 951                              its_basename);
 952                       its_rule_list_free (its_rules);
 953                       its_rules = NULL;
 954                     }
 955                 }
 956             }
 957
 958           if (its_rules == NULL)
 959             {
 960               if (language_from_extension == NULL)
 961                 {
 962                   const char *extension = strrchr (reduced, '.');
 963                   if (extension == NULL)
 964                     extension = "";
 965                   else
 966                     extension++;
 967                   error (0, 0, _("\
 968 warning: file '%s' extension '%s' is unknown; will try C"), filename, extension);
 969                   language_from_extension = "C";
 970                 }
 971
 972               this_file_extractor =
 973                 language_to_extractor (language_from_extension);
 974             }
 975
 976           free (reduced);
 977         }
 978
 979       if (its_rules != NULL)
 980         {
 981           /* Extract the strings from the file, using ITS.  */
 982           extract_from_xml_file (filename, its_rules, mdlp);
 983           its_rule_list_free (its_rules);
 984         }
 985       else
 986         /* Extract the strings from the file.  */
 987         extract_from_file (filename, this_file_extractor, mdlp);
 988     }
 989   string_list_free (file_list);
 990
 991   /* Finalize the constructed header.  */
 992   if (!xgettext_omit_header)
 993     finalize_header (mdlp);
 994
 995   /* Free the allocated converter.  */
 996 #if HAVE_ICONV
 997   if (xgettext_global_source_encoding != po_charset_ascii
 998       && xgettext_global_source_encoding != po_charset_utf8)
 999     iconv_close (xgettext_global_source_iconv);
1000 #endif
1001
1002   /* Sorting the list of messages.  */
1003   if (sort_by_filepos)
1004     msgdomain_list_sort_by_filepos (mdlp);
1005   else if (sort_by_msgid)
1006     msgdomain_list_sort_by_msgid (mdlp);
1007
1008   /* Check syntax of messages.  */
1009   {
1010     int nerrors = 0;
1011
1012     for (i = 0; i < mdlp->nitems; i++)
1013       {
1014         message_list_ty *mlp = mdlp->item[i]->messages;
1015         nerrors = syntax_check_message_list (mlp);
1016       }
1017
1018     /* Exit with status 1 on any error.  */
1019     if (nerrors > 0)
1020       error (EXIT_FAILURE, 0,
1021              ngettext ("found %d fatal error", "found %d fatal errors",
1022                        nerrors),
1023              nerrors);
1024   }
1025
1026   /* Write the PO file.  */
1027   msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug);
1028
1029   if (its_locating_rules)
1030     locating_rule_list_free (its_locating_rules);
1031
1032   for (i = 0; i < SIZEOF (its_dirs); i++)
1033     free (its_dirs[i]);
1034
1035   exit (EXIT_SUCCESS);
1036 }
1037
1038
1039 /* Display usage information and exit.  */
1040 static void
1041 usage (int status)
1042 {
1043   if (status != EXIT_SUCCESS)
1044     fprintf (stderr, _("Try '%s --help' for more information.\n"),
1045              program_name);
1046   else
1047     {
1048       printf (_("\
1049 Usage: %s [OPTION] [INPUTFILE]...\n\
1050 "), program_name);
1051       printf ("\n");
1052       printf (_("\
1053 Extract translatable strings from given input files.\n\
1054 "));
1055       printf ("\n");
1056       /* xgettext: no-wrap */
1057       printf (_("\
1058 Mandatory arguments to long options are mandatory for short options too.\n\
1059 Similarly for optional arguments.\n\
1060 "));
1061       printf ("\n");
1062       printf (_("\
1063 Input file location:\n"));
1064       printf (_("\
1065   INPUTFILE ...               input files\n"));
1066       printf (_("\
1067   -f, --files-from=FILE       get list of input files from FILE\n"));
1068       printf (_("\
1069   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
1070       printf (_("\
1071 If input file is -, standard input is read.\n"));
1072       printf ("\n");
1073       printf (_("\
1074 Output file location:\n"));
1075       printf (_("\
1076   -d, --default-domain=NAME   use NAME.po for output (instead of messages.po)\n"));
1077       printf (_("\
1078   -o, --output=FILE           write output to specified file\n"));
1079       printf (_("\
1080   -p, --output-dir=DIR        output files will be placed in directory DIR\n"));
1081       printf (_("\
1082 If output file is -, output is written to standard output.\n"));
1083       printf ("\n");
1084       printf (_("\
1085 Choice of input file language:\n"));
1086       printf (_("\
1087   -L, --language=NAME         recognise the specified language\n\
1088                                 (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
1089                                 EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
1090                                 JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
1091                                 GCC-source, NXStringTable, RST, Glade, Lua,\n\
1092                                 JavaScript, Vala, Desktop)\n"));
1093       printf (_("\
1094   -C, --c++                   shorthand for --language=C++\n"));
1095       printf (_("\
1096 By default the language is guessed depending on the input file name extension.\n"));
1097       printf ("\n");
1098       printf (_("\
1099 Input file interpretation:\n"));
1100       printf (_("\
1101       --from-code=NAME        encoding of input files\n\
1102                                 (except for Python, Tcl, Glade)\n"));
1103       printf (_("\
1104 By default the input files are assumed to be in ASCII.\n"));
1105       printf ("\n");
1106       printf (_("\
1107 Operation mode:\n"));
1108       printf (_("\
1109   -j, --join-existing         join messages with existing file\n"));
1110       printf (_("\
1111   -x, --exclude-file=FILE.po  entries from FILE.po are not extracted\n"));
1112       printf (_("\
1113   -cTAG, --add-comments=TAG   place comment blocks starting with TAG and\n\
1114                                 preceding keyword lines in output file\n\
1115   -c, --add-comments          place all comment blocks preceding keyword lines\n\
1116                                 in output file\n"));
1117       printf (_("\
1118       --check=NAME            perform syntax check on messages\n\
1119                                 (ellipsis-unicode, space-ellipsis,\n\
1120                                  quote-unicode)\n"));
1121       printf (_("\
1122       --sentence-end=TYPE     type describing the end of sentence\n\
1123                                 (single-space, which is the default, \n\
1124                                  or double-space)\n"));
1125       printf ("\n");
1126       printf (_("\
1127 Language specific options:\n"));
1128       printf (_("\
1129   -a, --extract-all           extract all strings\n"));
1130       printf (_("\
1131                                 (only languages C, C++, ObjectiveC, Shell,\n\
1132                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
1133                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
1134                                 Lua, JavaScript, Vala)\n"));
1135       printf (_("\
1136   -kWORD, --keyword=WORD      look for WORD as an additional keyword\n\
1137   -k, --keyword               do not to use default keywords\n"));
1138       printf (_("\
1139                                 (only languages C, C++, ObjectiveC, Shell,\n\
1140                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
1141                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
1142                                 Lua, JavaScript, Vala, Desktop)\n"));
1143       printf (_("\
1144       --flag=WORD:ARG:FLAG    additional flag for strings inside the argument\n\
1145                               number ARG of keyword WORD\n"));
1146       printf (_("\
1147                                 (only languages C, C++, ObjectiveC, Shell,\n\
1148                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
1149                                 C#, awk, YCP, Tcl, Perl, PHP, GCC-source,\n\
1150                                 Lua, JavaScript, Vala)\n"));
1151       printf (_("\
1152   -T, --trigraphs             understand ANSI C trigraphs for input\n"));
1153       printf (_("\
1154                                 (only languages C, C++, ObjectiveC)\n"));
1155       printf (_("\
1156       --qt                    recognize Qt format strings\n"));
1157       printf (_("\
1158                                 (only language C++)\n"));
1159       printf (_("\
1160       --kde                   recognize KDE 4 format strings\n"));
1161       printf (_("\
1162                                 (only language C++)\n"));
1163       printf (_("\
1164       --boost                 recognize Boost format strings\n"));
1165       printf (_("\
1166                                 (only language C++)\n"));
1167       printf (_("\
1168       --debug                 more detailed formatstring recognition result\n"));
1169       printf ("\n");
1170       printf (_("\
1171 Output details:\n"));
1172       printf (_("\
1173       --color                 use colors and other text attributes always\n\
1174       --color=WHEN            use colors and other text attributes if WHEN.\n\
1175                               WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
1176       printf (_("\
1177       --style=STYLEFILE       specify CSS style rule file for --color\n"));
1178       printf (_("\
1179   -e, --no-escape             do not use C escapes in output (default)\n"));
1180       printf (_("\
1181   -E, --escape                use C escapes in output, no extended chars\n"));
1182       printf (_("\
1183       --force-po              write PO file even if empty\n"));
1184       printf (_("\
1185   -i, --indent                write the .po file using indented style\n"));
1186       printf (_("\
1187       --no-location           do not write '#: filename:line' lines\n"));
1188       printf (_("\
1189   -n, --add-location          generate '#: filename:line' lines (default)\n"));
1190       printf (_("\
1191       --strict                write out strict Uniforum conforming .po file\n"));
1192       printf (_("\
1193       --properties-output     write out a Java .properties file\n"));
1194       printf (_("\
1195       --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
1196       printf (_("\
1197       --its=FILE              apply ITS rules from FILE\n"));
1198       printf (_("\
1199       --itstool               write out itstool comments\n"));
1200       printf (_("\
1201   -w, --width=NUMBER          set output page width\n"));
1202       printf (_("\
1203       --no-wrap               do not break long message lines, longer than\n\
1204                               the output page width, into several lines\n"));
1205       printf (_("\
1206   -s, --sort-output           generate sorted output\n"));
1207       printf (_("\
1208   -F, --sort-by-file          sort output by file location\n"));
1209       printf (_("\
1210       --omit-header           don't write header with 'msgid \"\"' entry\n"));
1211       printf (_("\
1212       --copyright-holder=STRING  set copyright holder in output\n"));
1213       printf (_("\
1214       --foreign-user          omit FSF copyright in output for foreign user\n"));
1215       printf (_("\
1216       --package-name=PACKAGE  set package name in output\n"));
1217       printf (_("\
1218       --package-version=VERSION  set package version in output\n"));
1219       printf (_("\
1220       --msgid-bugs-address=EMAIL@ADDRESS  set report address for msgid bugs\n"));
1221       printf (_("\
1222   -m[STRING], --msgstr-prefix[=STRING]  use STRING or \"\" as prefix for msgstr\n\
1223                                 values\n"));
1224       printf (_("\
1225   -M[STRING], --msgstr-suffix[=STRING]  use STRING or \"\" as suffix for msgstr\n\
1226                                 values\n"));
1227       printf ("\n");
1228       printf (_("\
1229 Informative output:\n"));
1230       printf (_("\
1231   -h, --help                  display this help and exit\n"));
1232       printf (_("\
1233   -V, --version               output version information and exit\n"));
1234       printf ("\n");
1235       /* TRANSLATORS: The placeholder indicates the bug-reporting address
1236          for this package.  Please add _another line_ saying
1237          "Report translation bugs to <...>\n" with the address for translation
1238          bugs (typically your translation team's web or email address).  */
1239       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
1240              stdout);
1241     }
1242
1243   exit (status);
1244 }
1245
1246
1247 static void
1248 exclude_directive_domain (abstract_catalog_reader_ty *pop, char *name)
1249 {
1250   po_gram_error_at_line (&gram_pos,
1251                          _("this file may not contain domain directives"));
1252 }
1253
1254
1255 static void
1256 exclude_directive_message (abstract_catalog_reader_ty *pop,
1257                            char *msgctxt,
1258                            char *msgid,
1259                            lex_pos_ty *msgid_pos,
1260                            char *msgid_plural,
1261                            char *msgstr, size_t msgstr_len,
1262                            lex_pos_ty *msgstr_pos,
1263                            char *prev_msgctxt,
1264                            char *prev_msgid,
1265                            char *prev_msgid_plural,
1266                            bool force_fuzzy, bool obsolete)
1267 {
1268   message_ty *mp;
1269
1270   /* See if this message ID has been seen before.  */
1271   if (exclude == NULL)
1272     exclude = message_list_alloc (true);
1273   mp = message_list_search (exclude, msgctxt, msgid);
1274   if (mp != NULL)
1275     free (msgid);
1276   else
1277     {
1278       mp = message_alloc (msgctxt, msgid, msgid_plural, "", 1, msgstr_pos);
1279       /* Do not free msgid.  */
1280       message_list_append (exclude, mp);
1281     }
1282
1283   /* All we care about is the msgid.  Throw the msgstr away.
1284      Don't even check for duplicate msgids.  */
1285   free (msgstr);
1286 }
1287
1288
1289 /* So that the one parser can be used for multiple programs, and also
1290    use good data hiding and encapsulation practices, an object
1291    oriented approach has been taken.  An object instance is allocated,
1292    and all actions resulting from the parse will be through
1293    invocations of method functions of that object.  */
1294
1295 static abstract_catalog_reader_class_ty exclude_methods =
1296 {
1297   sizeof (abstract_catalog_reader_ty),
1298   NULL, /* constructor */
1299   NULL, /* destructor */
1300   NULL, /* parse_brief */
1301   NULL, /* parse_debrief */
1302   exclude_directive_domain,
1303   exclude_directive_message,
1304   NULL, /* comment */
1305   NULL, /* comment_dot */
1306   NULL, /* comment_filepos */
1307   NULL, /* comment_special */
1308 };
1309
1310
1311 static void
1312 read_exclusion_file (char *filename)
1313 {
1314   char *real_filename;
1315   FILE *fp = open_catalog_file (filename, &real_filename, true);
1316   abstract_catalog_reader_ty *pop;
1317
1318   pop = catalog_reader_alloc (&exclude_methods);
1319   catalog_reader_parse (pop, fp, real_filename, filename, &input_format_po);
1320   catalog_reader_free (pop);
1321
1322   if (fp != stdin)
1323     fclose (fp);
1324 }
1325
1326
1327 void
1328 split_keywordspec (const char *spec,
1329                    const char **endp, struct callshape *shapep)
1330 {
1331   const char *p;
1332   int argnum1 = 0;
1333   int argnum2 = 0;
1334   int argnumc = 0;
1335   bool argnum1_glib_context = false;
1336   bool argnum2_glib_context = false;
1337   int argtotal = 0;
1338   string_list_ty xcomments;
1339
1340   string_list_init (&xcomments);
1341
1342   /* Start parsing from the end.  */
1343   p = spec + strlen (spec);
1344   while (p > spec)
1345     {
1346       if (isdigit ((unsigned char) p[-1])
1347           || ((p[-1] == 'c' || p[-1] == 'g' || p[-1] == 't')
1348               && p - 1 > spec && isdigit ((unsigned char) p[-2])))
1349         {
1350           bool contextp = (p[-1] == 'c');
1351           bool glibp = (p[-1] == 'g');
1352           bool totalp = (p[-1] == 't');
1353
1354           do
1355             p--;
1356           while (p > spec && isdigit ((unsigned char) p[-1]));
1357
1358           if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1359             {
1360               char *dummy;
1361               int arg = strtol (p, &dummy, 10);
1362
1363               if (contextp)
1364                 {
1365                   if (argnumc != 0)
1366                     /* Only one context argument can be given.  */
1367                     break;
1368                   argnumc = arg;
1369                 }
1370               else if (totalp)
1371                 {
1372                   if (argtotal != 0)
1373                     /* Only one total number of arguments can be given.  */
1374                     break;
1375                   argtotal = arg;
1376                 }
1377               else
1378                 {
1379                   if (argnum2 != 0)
1380                     /* At most two normal arguments can be given.  */
1381                     break;
1382                   argnum2 = argnum1;
1383                   argnum2_glib_context = argnum1_glib_context;
1384                   argnum1 = arg;
1385                   argnum1_glib_context = glibp;
1386                 }
1387             }
1388           else
1389             break;
1390         }
1391       else if (p[-1] == '"')
1392         {
1393           const char *xcomment_end;
1394
1395           p--;
1396           xcomment_end = p;
1397
1398           while (p > spec && p[-1] != '"')
1399             p--;
1400
1401           if (p > spec /* && p[-1] == '"' */)
1402             {
1403               const char *xcomment_start;
1404
1405               xcomment_start = p;
1406               p--;
1407               if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1408                 {
1409                   size_t xcomment_len = xcomment_end - xcomment_start;
1410                   char *xcomment = XNMALLOC (xcomment_len + 1, char);
1411
1412                   memcpy (xcomment, xcomment_start, xcomment_len);
1413                   xcomment[xcomment_len] = '\0';
1414                   string_list_append (&xcomments, xcomment);
1415                 }
1416               else
1417                 break;
1418             }
1419           else
1420             break;
1421         }
1422       else
1423         break;
1424
1425       /* Here an element of the comma-separated list has been parsed.  */
1426       if (!(p > spec && (p[-1] == ',' || p[-1] == ':')))
1427         abort ();
1428       p--;
1429       if (*p == ':')
1430         {
1431           size_t i;
1432
1433           if (argnum1 == 0 && argnum2 == 0)
1434             /* At least one non-context argument must be given.  */
1435             break;
1436           if (argnumc != 0
1437               && (argnum1_glib_context || argnum2_glib_context))
1438             /* Incompatible ways to specify the context.  */
1439             break;
1440           *endp = p;
1441           shapep->argnum1 = (argnum1 > 0 ? argnum1 : 1);
1442           shapep->argnum2 = argnum2;
1443           shapep->argnumc = argnumc;
1444           shapep->argnum1_glib_context = argnum1_glib_context;
1445           shapep->argnum2_glib_context = argnum2_glib_context;
1446           shapep->argtotal = argtotal;
1447           /* Reverse the order of the xcomments.  */
1448           string_list_init (&shapep->xcomments);
1449           for (i = xcomments.nitems; i > 0; )
1450             string_list_append (&shapep->xcomments, xcomments.item[--i]);
1451           string_list_destroy (&xcomments);
1452           return;
1453         }
1454     }
1455
1456   /* Couldn't parse the desired syntax.  */
1457   *endp = spec + strlen (spec);
1458   shapep->argnum1 = 1;
1459   shapep->argnum2 = 0;
1460   shapep->argnumc = 0;
1461   shapep->argnum1_glib_context = false;
1462   shapep->argnum2_glib_context = false;
1463   shapep->argtotal = 0;
1464   string_list_init (&shapep->xcomments);
1465   string_list_destroy (&xcomments);
1466 }
1467
1468
1469 void
1470 insert_keyword_callshape (hash_table *table,
1471                           const char *keyword, size_t keyword_len,
1472                           const struct callshape *shape)
1473 {
1474   void *old_value;
1475
1476   if (hash_find_entry (table, keyword, keyword_len, &old_value))
1477     {
1478       /* Create a one-element 'struct callshapes'.  */
1479       struct callshapes *shapes = XMALLOC (struct callshapes);
1480       shapes->nshapes = 1;
1481       shapes->shapes[0] = *shape;
1482       keyword =
1483         (const char *) hash_insert_entry (table, keyword, keyword_len, shapes);
1484       if (keyword == NULL)
1485         abort ();
1486       shapes->keyword = keyword;
1487       shapes->keyword_len = keyword_len;
1488     }
1489   else
1490     {
1491       /* Found a 'struct callshapes'.  See whether it already contains the
1492          desired shape.  */
1493       struct callshapes *old_shapes = (struct callshapes *) old_value;
1494       bool found;
1495       size_t i;
1496
1497       found = false;
1498       for (i = 0; i < old_shapes->nshapes; i++)
1499         if (old_shapes->shapes[i].argnum1 == shape->argnum1
1500             && old_shapes->shapes[i].argnum2 == shape->argnum2
1501             && old_shapes->shapes[i].argnumc == shape->argnumc
1502             && old_shapes->shapes[i].argnum1_glib_context
1503                == shape->argnum1_glib_context
1504             && old_shapes->shapes[i].argnum2_glib_context
1505                == shape->argnum2_glib_context
1506             && old_shapes->shapes[i].argtotal == shape->argtotal)
1507           {
1508             old_shapes->shapes[i].xcomments = shape->xcomments;
1509             found = true;
1510             break;
1511           }
1512
1513       if (!found)
1514         {
1515           /* Replace the existing 'struct callshapes' with a new one.  */
1516           struct callshapes *shapes =
1517             (struct callshapes *)
1518             xmalloc (xsum (sizeof (struct callshapes),
1519                            xtimes (old_shapes->nshapes,
1520                                    sizeof (struct callshape))));
1521
1522           shapes->keyword = old_shapes->keyword;
1523           shapes->keyword_len = old_shapes->keyword_len;
1524           shapes->nshapes = old_shapes->nshapes + 1;
1525           for (i = 0; i < old_shapes->nshapes; i++)
1526             shapes->shapes[i] = old_shapes->shapes[i];
1527           shapes->shapes[i] = *shape;
1528           if (hash_set_value (table, keyword, keyword_len, shapes))
1529             abort ();
1530           free (old_shapes);
1531         }
1532     }
1533 }
1534
1535
1536 /* Null context.  */
1537 flag_context_ty null_context = { undecided, false, undecided, false };
1538
1539 /* Transparent context.  */
1540 flag_context_ty passthrough_context = { undecided, true, undecided, true };
1541
1542
1543 flag_context_ty
1544 inherited_context (flag_context_ty outer_context,
1545                    flag_context_ty modifier_context)
1546 {
1547   flag_context_ty result = modifier_context;
1548
1549   if (result.pass_format1)
1550     {
1551       result.is_format1 = outer_context.is_format1;
1552       result.pass_format1 = false;
1553     }
1554   if (result.pass_format2)
1555     {
1556       result.is_format2 = outer_context.is_format2;
1557       result.pass_format2 = false;
1558     }
1559   if (result.pass_format3)
1560     {
1561       result.is_format3 = outer_context.is_format3;
1562       result.pass_format3 = false;
1563     }
1564   return result;
1565 }
1566
1567
1568 /* Null context list iterator.  */
1569 flag_context_list_iterator_ty null_context_list_iterator = { 1, NULL };
1570
1571 /* Transparent context list iterator.  */
1572 static flag_context_list_ty passthrough_context_circular_list =
1573   {
1574     1,
1575     { undecided, true, undecided, true },
1576     &passthrough_context_circular_list
1577   };
1578 flag_context_list_iterator_ty passthrough_context_list_iterator =
1579   {
1580     1,
1581     &passthrough_context_circular_list
1582   };
1583
1584
1585 flag_context_list_iterator_ty
1586 flag_context_list_iterator (flag_context_list_ty *list)
1587 {
1588   flag_context_list_iterator_ty result;
1589
1590   result.argnum = 1;
1591   result.head = list;
1592   return result;
1593 }
1594
1595
1596 flag_context_ty
1597 flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter)
1598 {
1599   if (iter->head == NULL)
1600     return null_context;
1601   if (iter->argnum == iter->head->argnum)
1602     {
1603       flag_context_ty result = iter->head->flags;
1604
1605       /* Special casing of circular list.  */
1606       if (iter->head != iter->head->next)
1607         {
1608           iter->head = iter->head->next;
1609           iter->argnum++;
1610         }
1611
1612       return result;
1613     }
1614   else
1615     {
1616       iter->argnum++;
1617       return null_context;
1618     }
1619 }
1620
1621
1622 flag_context_list_ty *
1623 flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
1624                                 const void *key, size_t keylen)
1625 {
1626   void *entry;
1627
1628   if (flag_table->table != NULL
1629       && hash_find_entry (flag_table, key, keylen, &entry) == 0)
1630     return (flag_context_list_ty *) entry;
1631   else
1632     return NULL;
1633 }
1634
1635
1636 static void
1637 flag_context_list_table_insert (flag_context_list_table_ty *table,
1638                                 unsigned int index,
1639                                 const char *name_start, const char *name_end,
1640                                 int argnum, enum is_format value, bool pass)
1641 {
1642   char *allocated_name = NULL;
1643
1644   if (table == &flag_table_lisp)
1645     {
1646       /* Convert NAME to upper case.  */
1647       size_t name_len = name_end - name_start;
1648       char *name = allocated_name = (char *) xmalloca (name_len);
1649       size_t i;
1650
1651       for (i = 0; i < name_len; i++)
1652         name[i] = (name_start[i] >= 'a' && name_start[i] <= 'z'
1653                    ? name_start[i] - 'a' + 'A'
1654                    : name_start[i]);
1655       name_start = name;
1656       name_end = name + name_len;
1657     }
1658   else if (table == &flag_table_tcl)
1659     {
1660       /* Remove redundant "::" prefix.  */
1661       if (name_end - name_start > 2
1662           && name_start[0] == ':' && name_start[1] == ':')
1663         name_start += 2;
1664     }
1665
1666   /* Insert the pair (VALUE, PASS) at INDEX in the element numbered ARGNUM
1667      of the list corresponding to NAME in the TABLE.  */
1668   if (table->table == NULL)
1669     hash_init (table, 100);
1670   {
1671     void *entry;
1672
1673     if (hash_find_entry (table, name_start, name_end - name_start, &entry) != 0)
1674       {
1675         /* Create new hash table entry.  */
1676         flag_context_list_ty *list = XMALLOC (flag_context_list_ty);
1677         list->argnum = argnum;
1678         memset (&list->flags, '\0', sizeof (list->flags));
1679         switch (index)
1680           {
1681           case 0:
1682             list->flags.is_format1 = value;
1683             list->flags.pass_format1 = pass;
1684             break;
1685           case 1:
1686             list->flags.is_format2 = value;
1687             list->flags.pass_format2 = pass;
1688             break;
1689           case 2:
1690             list->flags.is_format3 = value;
1691             list->flags.pass_format3 = pass;
1692             break;
1693           default:
1694             abort ();
1695           }
1696         list->next = NULL;
1697         hash_insert_entry (table, name_start, name_end - name_start, list);
1698       }
1699     else
1700       {
1701         flag_context_list_ty *list = (flag_context_list_ty *)entry;
1702         flag_context_list_ty **lastp = NULL;
1703         /* Invariant: list == (lastp != NULL ? *lastp : entry).  */
1704
1705         while (list != NULL && list->argnum < argnum)
1706           {
1707             lastp = &list->next;
1708             list = *lastp;
1709           }
1710         if (list != NULL && list->argnum == argnum)
1711           {
1712             /* Add this flag to the current argument number.  */
1713             switch (index)
1714               {
1715               case 0:
1716                 list->flags.is_format1 = value;
1717                 list->flags.pass_format1 = pass;
1718                 break;
1719               case 1:
1720                 list->flags.is_format2 = value;
1721                 list->flags.pass_format2 = pass;
1722                 break;
1723               case 2:
1724                 list->flags.is_format3 = value;
1725                 list->flags.pass_format3 = pass;
1726                 break;
1727               default:
1728                 abort ();
1729               }
1730           }
1731         else if (lastp != NULL)
1732           {
1733             /* Add a new list entry for this argument number.  */
1734             list = XMALLOC (flag_context_list_ty);
1735             list->argnum = argnum;
1736             memset (&list->flags, '\0', sizeof (list->flags));
1737             switch (index)
1738               {
1739               case 0:
1740                 list->flags.is_format1 = value;
1741                 list->flags.pass_format1 = pass;
1742                 break;
1743               case 1:
1744                 list->flags.is_format2 = value;
1745                 list->flags.pass_format2 = pass;
1746                 break;
1747               case 2:
1748                 list->flags.is_format3 = value;
1749                 list->flags.pass_format3 = pass;
1750                 break;
1751               default:
1752                 abort ();
1753               }
1754             list->next = *lastp;
1755             *lastp = list;
1756           }
1757         else
1758           {
1759             /* Add a new list entry for this argument number, at the beginning
1760                of the list.  Since we don't have an API for replacing the
1761                value of a key in the hash table, we have to copy the first
1762                list element.  */
1763             flag_context_list_ty *copy = XMALLOC (flag_context_list_ty);
1764             *copy = *list;
1765
1766             list->argnum = argnum;
1767             memset (&list->flags, '\0', sizeof (list->flags));
1768             switch (index)
1769               {
1770               case 0:
1771                 list->flags.is_format1 = value;
1772                 list->flags.pass_format1 = pass;
1773                 break;
1774               case 1:
1775                 list->flags.is_format2 = value;
1776                 list->flags.pass_format2 = pass;
1777                 break;
1778               case 2:
1779                 list->flags.is_format3 = value;
1780                 list->flags.pass_format3 = pass;
1781                 break;
1782               default:
1783                 abort ();
1784               }
1785             list->next = copy;
1786           }
1787       }
1788   }
1789
1790   if (allocated_name != NULL)
1791     freea (allocated_name);
1792 }
1793
1794
1795 void
1796 xgettext_record_flag (const char *optionstring)
1797 {
1798   /* Check the string has at least two colons.  (Colons in the name are
1799      allowed, needed for the Lisp and the Tcl backends.)  */
1800   const char *colon1;
1801   const char *colon2;
1802
1803   for (colon2 = optionstring + strlen (optionstring); ; )
1804     {
1805       if (colon2 == optionstring)
1806         goto err;
1807       colon2--;
1808       if (*colon2 == ':')
1809         break;
1810     }
1811   for (colon1 = colon2; ; )
1812     {
1813       if (colon1 == optionstring)
1814         goto err;
1815       colon1--;
1816       if (*colon1 == ':')
1817         break;
1818     }
1819   {
1820     const char *name_start = optionstring;
1821     const char *name_end = colon1;
1822     const char *argnum_start = colon1 + 1;
1823     const char *argnum_end = colon2;
1824     const char *flag = colon2 + 1;
1825     int argnum;
1826
1827     /* Check the parts' syntax.  */
1828     if (name_end == name_start)
1829       goto err;
1830     if (argnum_end == argnum_start)
1831       goto err;
1832     {
1833       char *endp;
1834       argnum = strtol (argnum_start, &endp, 10);
1835       if (endp != argnum_end)
1836         goto err;
1837     }
1838     if (argnum <= 0)
1839       goto err;
1840
1841     /* Analyze the flag part.  */
1842     {
1843       bool pass;
1844
1845       pass = false;
1846       if (strlen (flag) >= 5 && memcmp (flag, "pass-", 5) == 0)
1847         {
1848           pass = true;
1849           flag += 5;
1850         }
1851
1852       /* Unlike po_parse_comment_special(), we don't accept "fuzzy",
1853          "wrap", or "check" here - it has no sense.  */
1854       if (strlen (flag) >= 7
1855           && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
1856         {
1857           const char *p;
1858           size_t n;
1859           enum is_format value;
1860           size_t type;
1861
1862           p = flag;
1863           n = strlen (flag) - 7;
1864
1865           if (n >= 3 && memcmp (p, "no-", 3) == 0)
1866             {
1867               p += 3;
1868               n -= 3;
1869               value = no;
1870             }
1871           else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
1872             {
1873               p += 9;
1874               n -= 9;
1875               value = possible;
1876             }
1877           else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
1878             {
1879               p += 11;
1880               n -= 11;
1881               value = impossible;
1882             }
1883           else
1884             value = yes_according_to_context;
1885
1886           for (type = 0; type < NFORMATS; type++)
1887             if (strlen (format_language[type]) == n
1888                 && memcmp (format_language[type], p, n) == 0)
1889               {
1890                 switch (type)
1891                   {
1892                   case format_c:
1893                     flag_context_list_table_insert (&flag_table_c, 0,
1894                                                     name_start, name_end,
1895                                                     argnum, value, pass);
1896                     flag_context_list_table_insert (&flag_table_cxx_qt, 0,
1897                                                     name_start, name_end,
1898                                                     argnum, value, pass);
1899                     flag_context_list_table_insert (&flag_table_cxx_kde, 0,
1900                                                     name_start, name_end,
1901                                                     argnum, value, pass);
1902                     flag_context_list_table_insert (&flag_table_cxx_boost, 0,
1903                                                     name_start, name_end,
1904                                                     argnum, value, pass);
1905                     flag_context_list_table_insert (&flag_table_objc, 0,
1906                                                     name_start, name_end,
1907                                                     argnum, value, pass);
1908                     break;
1909                   case format_objc:
1910                     flag_context_list_table_insert (&flag_table_objc, 1,
1911                                                     name_start, name_end,
1912                                                     argnum, value, pass);
1913                     break;
1914                   case format_sh:
1915                     flag_context_list_table_insert (&flag_table_sh, 0,
1916                                                     name_start, name_end,
1917                                                     argnum, value, pass);
1918                     break;
1919                   case format_python:
1920                     flag_context_list_table_insert (&flag_table_python, 0,
1921                                                     name_start, name_end,
1922                                                     argnum, value, pass);
1923                     break;
1924                   case format_python_brace:
1925                     flag_context_list_table_insert (&flag_table_python, 0,
1926                                                     name_start, name_end,
1927                                                     argnum, value, pass);
1928                     break;
1929                   case format_lisp:
1930                     flag_context_list_table_insert (&flag_table_lisp, 0,
1931                                                     name_start, name_end,
1932                                                     argnum, value, pass);
1933                     break;
1934                   case format_elisp:
1935                     flag_context_list_table_insert (&flag_table_elisp, 0,
1936                                                     name_start, name_end,
1937                                                     argnum, value, pass);
1938                     break;
1939                   case format_librep:
1940                     flag_context_list_table_insert (&flag_table_librep, 0,
1941                                                     name_start, name_end,
1942                                                     argnum, value, pass);
1943                     break;
1944                   case format_scheme:
1945                     flag_context_list_table_insert (&flag_table_scheme, 0,
1946                                                     name_start, name_end,
1947                                                     argnum, value, pass);
1948                     break;
1949                   case format_smalltalk:
1950                     break;
1951                   case format_java:
1952                     flag_context_list_table_insert (&flag_table_java, 0,
1953                                                     name_start, name_end,
1954                                                     argnum, value, pass);
1955                     break;
1956                   case format_csharp:
1957                     flag_context_list_table_insert (&flag_table_csharp, 0,
1958                                                     name_start, name_end,
1959                                                     argnum, value, pass);
1960                     break;
1961                   case format_awk:
1962                     flag_context_list_table_insert (&flag_table_awk, 0,
1963                                                     name_start, name_end,
1964                                                     argnum, value, pass);
1965                     break;
1966                   case format_pascal:
1967                     break;
1968                   case format_ycp:
1969                     flag_context_list_table_insert (&flag_table_ycp, 0,
1970                                                     name_start, name_end,
1971                                                     argnum, value, pass);
1972                     break;
1973                   case format_tcl:
1974                     flag_context_list_table_insert (&flag_table_tcl, 0,
1975                                                     name_start, name_end,
1976                                                     argnum, value, pass);
1977                     break;
1978                   case format_perl:
1979                     flag_context_list_table_insert (&flag_table_perl, 0,
1980                                                     name_start, name_end,
1981                                                     argnum, value, pass);
1982                     break;
1983                   case format_perl_brace:
1984                     flag_context_list_table_insert (&flag_table_perl, 1,
1985                                                     name_start, name_end,
1986                                                     argnum, value, pass);
1987                     break;
1988                   case format_php:
1989                     flag_context_list_table_insert (&flag_table_php, 0,
1990                                                     name_start, name_end,
1991                                                     argnum, value, pass);
1992                     break;
1993                   case format_gcc_internal:
1994                     flag_context_list_table_insert (&flag_table_gcc_internal, 0,
1995                                                     name_start, name_end,
1996                                                     argnum, value, pass);
1997                     break;
1998                   case format_gfc_internal:
1999                     flag_context_list_table_insert (&flag_table_gcc_internal, 1,
2000                                                     name_start, name_end,
2001                                                     argnum, value, pass);
2002                     break;
2003                   case format_qt:
2004                     flag_context_list_table_insert (&flag_table_cxx_qt, 1,
2005                                                     name_start, name_end,
2006                                                     argnum, value, pass);
2007                     break;
2008                   case format_qt_plural:
2009                     flag_context_list_table_insert (&flag_table_cxx_qt, 2,
2010                                                     name_start, name_end,
2011                                                     argnum, value, pass);
2012                     break;
2013                   case format_kde:
2014                     flag_context_list_table_insert (&flag_table_cxx_kde, 1,
2015                                                     name_start, name_end,
2016                                                     argnum, value, pass);
2017                     break;
2018                   case format_kde_kuit:
2019                     flag_context_list_table_insert (&flag_table_cxx_kde, 2,
2020                                                     name_start, name_end,
2021                                                     argnum, value, pass);
2022                     break;
2023                   case format_boost:
2024                     flag_context_list_table_insert (&flag_table_cxx_boost, 1,
2025                                                     name_start, name_end,
2026                                                     argnum, value, pass);
2027                     break;
2028                   case format_lua:
2029                     flag_context_list_table_insert (&flag_table_lua, 0,
2030                                                     name_start, name_end,
2031                                                     argnum, value, pass);
2032                     break;
2033                   case format_javascript:
2034                     flag_context_list_table_insert (&flag_table_javascript, 0,
2035                                                     name_start, name_end,
2036                                                     argnum, value, pass);
2037                     break;
2038                   default:
2039                     abort ();
2040                   }
2041                 return;
2042               }
2043           /* If the flag is not among the valid values, the optionstring is
2044              invalid.  */
2045         }
2046     }
2047   }
2048
2049 err:
2050   error (EXIT_FAILURE, 0, _("\
2051 A --flag argument doesn't have the <keyword>:<argnum>:[pass-]<flag> syntax: %s"),
2052          optionstring);
2053 }
2054
2055
2056 /* Comment handling: There is a list of automatic comments that may be appended
2057    to the next message.  Used by remember_a_message().  */
2058
2059 static string_list_ty *comment;
2060
2061 static void
2062 xgettext_comment_add (const char *str)
2063 {
2064   if (comment == NULL)
2065     comment = string_list_alloc ();
2066   string_list_append (comment, str);
2067 }
2068
2069 static const char *
2070 xgettext_comment (size_t n)
2071 {
2072   if (comment == NULL || n >= comment->nitems)
2073     return NULL;
2074   return comment->item[n];
2075 }
2076
2077 static void
2078 xgettext_comment_reset ()
2079 {
2080   if (comment != NULL)
2081     {
2082       string_list_free (comment);
2083       comment = NULL;
2084     }
2085 }
2086
2087
2088 refcounted_string_list_ty *savable_comment;
2089
2090 void
2091 savable_comment_add (const char *str)
2092 {
2093   if (savable_comment == NULL)
2094     {
2095       savable_comment = XMALLOC (refcounted_string_list_ty);
2096       savable_comment->refcount = 1;
2097       string_list_init (&savable_comment->contents);
2098     }
2099   else if (savable_comment->refcount > 1)
2100     {
2101       /* Unshare the list by making copies.  */
2102       struct string_list_ty *oldcontents;
2103       size_t i;
2104
2105       savable_comment->refcount--;
2106       oldcontents = &savable_comment->contents;
2107
2108       savable_comment = XMALLOC (refcounted_string_list_ty);
2109       savable_comment->refcount = 1;
2110       string_list_init (&savable_comment->contents);
2111       for (i = 0; i < oldcontents->nitems; i++)
2112         string_list_append (&savable_comment->contents, oldcontents->item[i]);
2113     }
2114   string_list_append (&savable_comment->contents, str);
2115 }
2116
2117 void
2118 savable_comment_reset ()
2119 {
2120   drop_reference (savable_comment);
2121   savable_comment = NULL;
2122 }
2123
2124 static void
2125 savable_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
2126 {
2127   xgettext_comment_reset ();
2128   if (rslp != NULL)
2129     {
2130       size_t i;
2131
2132       for (i = 0; i < rslp->contents.nitems; i++)
2133         xgettext_comment_add (rslp->contents.item[i]);
2134     }
2135 }
2136
2137 refcounted_string_list_ty *
2138 savable_comment_convert_encoding (refcounted_string_list_ty *comment,
2139                                   lex_pos_ty *pos)
2140 {
2141   refcounted_string_list_ty *result;
2142   size_t i;
2143
2144   result = XMALLOC (refcounted_string_list_ty);
2145   result->refcount = 1;
2146   string_list_init (&result->contents);
2147
2148   for (i = 0; i < comment->contents.nitems; i++)
2149     {
2150       const char *old_string = comment->contents.item[i];
2151       char *string = from_current_source_encoding (old_string,
2152                                                    lc_comment,
2153                                                    pos->file_name,
2154                                                    pos->line_number);
2155       string_list_append (&result->contents, string);
2156       if (string != old_string)
2157         free (string);
2158     }
2159
2160   return result;
2161 }
2162
2163
2164
2165 static FILE *
2166 xgettext_open (const char *fn,
2167                char **logical_file_name_p, char **real_file_name_p)
2168 {
2169   FILE *fp;
2170   char *new_name;
2171   char *logical_file_name;
2172
2173   if (strcmp (fn, "-") == 0)
2174     {
2175       new_name = xstrdup (_("standard input"));
2176       logical_file_name = xstrdup (new_name);
2177       fp = stdin;
2178     }
2179   else if (IS_ABSOLUTE_PATH (fn))
2180     {
2181       new_name = xstrdup (fn);
2182       fp = fopen (fn, "r");
2183       if (fp == NULL)
2184         error (EXIT_FAILURE, errno, _("\
2185 error while opening \"%s\" for reading"), fn);
2186       logical_file_name = xstrdup (new_name);
2187     }
2188   else
2189     {
2190       int j;
2191
2192       for (j = 0; ; ++j)
2193         {
2194           const char *dir = dir_list_nth (j);
2195
2196           if (dir == NULL)
2197             error (EXIT_FAILURE, ENOENT, _("\
2198 error while opening \"%s\" for reading"), fn);
2199
2200           new_name = xconcatenated_filename (dir, fn, NULL);
2201
2202           fp = fopen (new_name, "r");
2203           if (fp != NULL)
2204             break;
2205
2206           if (errno != ENOENT)
2207             error (EXIT_FAILURE, errno, _("\
2208 error while opening \"%s\" for reading"), new_name);
2209           free (new_name);
2210         }
2211
2212       /* Note that the NEW_NAME variable contains the actual file name
2213          and the logical file name is what is reported by xgettext.  In
2214          this case NEW_NAME is set to the file which was found along the
2215          directory search path, and LOGICAL_FILE_NAME is is set to the
2216          file name which was searched for.  */
2217       logical_file_name = xstrdup (fn);
2218     }
2219
2220   *logical_file_name_p = logical_file_name;
2221   *real_file_name_p = new_name;
2222   return fp;
2223 }
2224
2225
2226 /* Language dependent format string parser.
2227    NULL if the language has no notion of format strings.  */
2228 static struct formatstring_parser *current_formatstring_parser1;
2229 static struct formatstring_parser *current_formatstring_parser2;
2230 static struct formatstring_parser *current_formatstring_parser3;
2231
2232 static struct literalstring_parser *current_literalstring_parser;
2233
2234 static void
2235 extract_from_file (const char *file_name, extractor_ty extractor,
2236                    msgdomain_list_ty *mdlp)
2237 {
2238   char *logical_file_name;
2239   char *real_file_name;
2240   FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
2241
2242   /* Set the default for the source file encoding.  May be overridden by
2243      the extractor function.  */
2244   xgettext_current_source_encoding = xgettext_global_source_encoding;
2245 #if HAVE_ICONV
2246   xgettext_current_source_iconv = xgettext_global_source_iconv;
2247 #endif
2248
2249   current_formatstring_parser1 = extractor.formatstring_parser1;
2250   current_formatstring_parser2 = extractor.formatstring_parser2;
2251   current_formatstring_parser3 = extractor.formatstring_parser3;
2252   current_literalstring_parser = extractor.literalstring_parser;
2253   extractor.func (fp, real_file_name, logical_file_name, extractor.flag_table,
2254                   mdlp);
2255
2256   if (fp != stdin)
2257     fclose (fp);
2258   free (logical_file_name);
2259   free (real_file_name);
2260 }
2261
2262 static message_ty *
2263 xgettext_its_extract_callback (message_list_ty *mlp,
2264                                const char *msgctxt,
2265                                const char *msgid,
2266                                lex_pos_ty *pos,
2267                                const char *extracted_comment,
2268                                const char *marker,
2269                                enum its_whitespace_type_ty whitespace)
2270 {
2271   message_ty *message;
2272
2273   message = remember_a_message (mlp,
2274                                 msgctxt == NULL ? NULL : xstrdup (msgctxt),
2275                                 xstrdup (msgid),
2276                                 null_context, pos,
2277                                 extracted_comment, NULL);
2278
2279   if (add_itstool_comments)
2280     {
2281       char *dot = xasprintf ("(itstool) path: %s", marker);
2282       message_comment_dot_append (message, dot);
2283       free (dot);
2284
2285       if (whitespace == ITS_WHITESPACE_PRESERVE)
2286         message->do_wrap = no;
2287     }
2288
2289   return message;
2290 }
2291
2292 static void
2293 extract_from_xml_file (const char *file_name,
2294                        its_rule_list_ty *rules,
2295                        msgdomain_list_ty *mdlp)
2296 {
2297   char *logical_file_name;
2298   char *real_file_name;
2299   FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
2300
2301   /* The default encoding for XML is UTF-8.  It can be overridden by
2302      an XML declaration in the XML file itself, not through the
2303      --from-code option.  */
2304   xgettext_current_source_encoding = po_charset_utf8;
2305
2306 #if HAVE_ICONV
2307   xgettext_current_source_iconv = xgettext_global_source_iconv;
2308 #endif
2309
2310   its_rule_list_extract (rules, fp, real_file_name, logical_file_name,
2311                          NULL,
2312                          mdlp,
2313                          xgettext_its_extract_callback);
2314
2315   if (fp != stdin)
2316     fclose (fp);
2317   free (logical_file_name);
2318   free (real_file_name);
2319 }
2320
2321
2322
2323 /* Error message about non-ASCII character in a specific lexical context.  */
2324 char *
2325 non_ascii_error_message (lexical_context_ty lcontext,
2326                          const char *file_name, size_t line_number)
2327 {
2328   char buffer[21];
2329   char *errmsg;
2330
2331   if (line_number == (size_t)(-1))
2332     buffer[0] = '\0';
2333   else
2334     sprintf (buffer, ":%ld", (long) line_number);
2335
2336   switch (lcontext)
2337     {
2338     case lc_outside:
2339       errmsg =
2340         xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer);
2341       break;
2342     case lc_comment:
2343       errmsg =
2344         xasprintf (_("Non-ASCII comment at or before %s%s."),
2345                    file_name, buffer);
2346       break;
2347     case lc_string:
2348       errmsg =
2349         xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer);
2350       break;
2351     default:
2352       abort ();
2353     }
2354   return errmsg;
2355 }
2356
2357 /* Convert the given string from xgettext_current_source_encoding to
2358    the output file encoding (i.e. ASCII or UTF-8).
2359    The resulting string is either the argument string, or freshly allocated.
2360    The file_name and line_number are only used for error message purposes.  */
2361 char *
2362 from_current_source_encoding (const char *string,
2363                               lexical_context_ty lcontext,
2364                               const char *file_name, size_t line_number)
2365 {
2366   if (xgettext_current_source_encoding == po_charset_ascii)
2367     {
2368       if (!is_ascii_string (string))
2369         {
2370           multiline_error (xstrdup (""),
2371                            xasprintf ("%s\n%s\n",
2372                                       non_ascii_error_message (lcontext,
2373                                                                file_name,
2374                                                                line_number),
2375                                       _("\
2376 Please specify the source encoding through --from-code.")));
2377           exit (EXIT_FAILURE);
2378         }
2379     }
2380   else if (xgettext_current_source_encoding != po_charset_utf8)
2381     {
2382 #if HAVE_ICONV
2383       struct conversion_context context;
2384
2385       context.from_code = xgettext_current_source_encoding;
2386       context.to_code = po_charset_utf8;
2387       context.from_filename = file_name;
2388       context.message = NULL;
2389
2390       string = convert_string_directly (xgettext_current_source_iconv, string,
2391                                         &context);
2392 #else
2393       /* If we don't have iconv(), the only supported values for
2394          xgettext_global_source_encoding and thus also for
2395          xgettext_current_source_encoding are ASCII and UTF-8.
2396          convert_string_directly() should not be called in this case.  */
2397       abort ();
2398 #endif
2399     }
2400
2401   return (char *) string;
2402 }
2403
2404 #define CONVERT_STRING(string, lcontext) \
2405   string = from_current_source_encoding (string, lcontext, pos->file_name, \
2406                                          pos->line_number);
2407
2408
2409 /* Update the is_format[] flags depending on the information given in the
2410    context.  */
2411 static void
2412 set_format_flags_from_context (enum is_format is_format[NFORMATS],
2413                                flag_context_ty context, const char *string,
2414                                lex_pos_ty *pos, const char *pretty_msgstr)
2415 {
2416   size_t i;
2417
2418   if (context.is_format1 != undecided
2419       || context.is_format2 != undecided
2420       || context.is_format3 != undecided)
2421     for (i = 0; i < NFORMATS; i++)
2422       {
2423         if (is_format[i] == undecided)
2424           {
2425             if (formatstring_parsers[i] == current_formatstring_parser1
2426                 && context.is_format1 != undecided)
2427               is_format[i] = (enum is_format) context.is_format1;
2428             if (formatstring_parsers[i] == current_formatstring_parser2
2429                 && context.is_format2 != undecided)
2430               is_format[i] = (enum is_format) context.is_format2;
2431             if (formatstring_parsers[i] == current_formatstring_parser3
2432                 && context.is_format3 != undecided)
2433               is_format[i] = (enum is_format) context.is_format3;
2434           }
2435         if (possible_format_p (is_format[i]))
2436           {
2437             struct formatstring_parser *parser = formatstring_parsers[i];
2438             char *invalid_reason = NULL;
2439             void *descr = parser->parse (string, false, NULL, &invalid_reason);
2440
2441             if (descr != NULL)
2442               parser->free (descr);
2443             else
2444               {
2445                 /* The string is not a valid format string.  */
2446                 if (is_format[i] != possible)
2447                   {
2448                     char buffer[21];
2449
2450                     error_with_progname = false;
2451                     if (pos->line_number == (size_t)(-1))
2452                       buffer[0] = '\0';
2453                     else
2454                       sprintf (buffer, ":%ld", (long) pos->line_number);
2455                     multiline_warning (xasprintf (_("%s%s: warning: "),
2456                                                   pos->file_name, buffer),
2457                                        xasprintf (is_format[i] == yes_according_to_context
2458                                                   ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n")
2459                                                   : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
2460                                                   pretty_msgstr,
2461                                                   format_language_pretty[i],
2462                                                   invalid_reason));
2463                     error_with_progname = true;
2464                   }
2465
2466                 is_format[i] = impossible;
2467                 free (invalid_reason);
2468               }
2469           }
2470       }
2471 }
2472
2473
2474 static void
2475 warn_format_string (enum is_format is_format[NFORMATS], const char *string,
2476                     lex_pos_ty *pos, const char *pretty_msgstr)
2477 {
2478   if (possible_format_p (is_format[format_python])
2479       && get_python_format_unnamed_arg_count (string) > 1)
2480     {
2481       char buffer[21];
2482
2483       error_with_progname = false;
2484       if (pos->line_number == (size_t)(-1))
2485         buffer[0] = '\0';
2486       else
2487         sprintf (buffer, ":%ld", (long) pos->line_number);
2488       multiline_warning (xasprintf (_("%s%s: warning: "),
2489                                     pos->file_name, buffer),
2490                          xasprintf (_("\
2491 '%s' format string with unnamed arguments cannot be properly localized:\n\
2492 The translator cannot reorder the arguments.\n\
2493 Please consider using a format string with named arguments,\n\
2494 and a mapping instead of a tuple for the arguments.\n"),
2495                                     pretty_msgstr));
2496       error_with_progname = true;
2497     }
2498 }
2499
2500
2501 message_ty *
2502 remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
2503                     flag_context_ty context, lex_pos_ty *pos,
2504                     const char *extracted_comment,
2505                     refcounted_string_list_ty *comment)
2506 {
2507   enum is_format is_format[NFORMATS];
2508   struct argument_range range;
2509   enum is_wrap do_wrap;
2510   enum is_syntax_check do_syntax_check[NSYNTAXCHECKS];
2511   message_ty *mp;
2512   char *msgstr;
2513   size_t i;
2514
2515   /* See whether we shall exclude this message.  */
2516   if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
2517     {
2518       /* Tell the lexer to reset its comment buffer, so that the next
2519          message gets the correct comments.  */
2520       xgettext_comment_reset ();
2521       savable_comment_reset ();
2522
2523       if (msgctxt != NULL)
2524         free (msgctxt);
2525       free (msgid);
2526
2527       return NULL;
2528     }
2529
2530   savable_comment_to_xgettext_comment (comment);
2531
2532   for (i = 0; i < NFORMATS; i++)
2533     is_format[i] = undecided;
2534   range.min = -1;
2535   range.max = -1;
2536   do_wrap = undecided;
2537   for (i = 0; i < NSYNTAXCHECKS; i++)
2538     do_syntax_check[i] = undecided;
2539
2540   if (msgctxt != NULL)
2541     CONVERT_STRING (msgctxt, lc_string);
2542   CONVERT_STRING (msgid, lc_string);
2543
2544   if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
2545     {
2546       char buffer[21];
2547
2548       error_with_progname = false;
2549       if (pos->line_number == (size_t)(-1))
2550         buffer[0] = '\0';
2551       else
2552         sprintf (buffer, ":%ld", (long) pos->line_number);
2553       multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
2554                                     buffer),
2555                          xstrdup (_("\
2556 Empty msgid.  It is reserved by GNU gettext:\n\
2557 gettext(\"\") returns the header entry with\n\
2558 meta information, not the empty string.\n")));
2559       error_with_progname = true;
2560     }
2561
2562   /* See if we have seen this message before.  */
2563   mp = message_list_search (mlp, msgctxt, msgid);
2564   if (mp != NULL)
2565     {
2566       if (msgctxt != NULL)
2567         free (msgctxt);
2568       free (msgid);
2569       for (i = 0; i < NFORMATS; i++)
2570         is_format[i] = mp->is_format[i];
2571       do_wrap = mp->do_wrap;
2572       for (i = 0; i < NSYNTAXCHECKS; i++)
2573         do_syntax_check[i] = mp->do_syntax_check[i];
2574     }
2575   else
2576     {
2577       /* Construct the msgstr from the prefix and suffix, otherwise use the
2578          empty string.  */
2579       if (msgstr_prefix)
2580         msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix);
2581       else
2582         msgstr = "";
2583
2584       /* Allocate a new message and append the message to the list.  */
2585       mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1,
2586                           pos);
2587       /* Do not free msgctxt and msgid.  */
2588       message_list_append (mlp, mp);
2589     }
2590
2591   /* Determine whether the context specifies that the msgid is a format
2592      string.  */
2593   set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
2594
2595   /* Ask the lexer for the comments it has seen.  */
2596   {
2597     size_t nitems_before;
2598     size_t nitems_after;
2599     int j;
2600     bool add_all_remaining_comments;
2601     /* The string before the comment tag.  For example, If "** TRANSLATORS:"
2602        is seen and the comment tag is "TRANSLATORS:",
2603        then comment_tag_prefix is set to "** ".  */
2604     const char *comment_tag_prefix = "";
2605     size_t comment_tag_prefix_length = 0;
2606
2607     nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2608
2609     if (extracted_comment != NULL)
2610       {
2611         char *copy = xstrdup (extracted_comment);
2612         char *rest;
2613
2614         rest = copy;
2615         while (*rest != '\0')
2616           {
2617             char *newline = strchr (rest, '\n');
2618
2619             if (newline != NULL)
2620               {
2621                 *newline = '\0';
2622                 message_comment_dot_append (mp, rest);
2623                 rest = newline + 1;
2624               }
2625             else
2626               {
2627                 message_comment_dot_append (mp, rest);
2628                 break;
2629               }
2630           }
2631         free (copy);
2632       }
2633
2634     add_all_remaining_comments = add_all_comments;
2635     for (j = 0; ; ++j)
2636       {
2637         const char *s = xgettext_comment (j);
2638         const char *t;
2639         if (s == NULL)
2640           break;
2641
2642         CONVERT_STRING (s, lc_comment);
2643
2644         /* To reduce the possibility of unwanted matches we do a two
2645            step match: the line must contain 'xgettext:' and one of
2646            the possible format description strings.  */
2647         if ((t = c_strstr (s, "xgettext:")) != NULL)
2648           {
2649             bool tmp_fuzzy;
2650             enum is_format tmp_format[NFORMATS];
2651             struct argument_range tmp_range;
2652             enum is_wrap tmp_wrap;
2653             enum is_syntax_check tmp_syntax_check[NSYNTAXCHECKS];
2654             bool interesting;
2655
2656             t += strlen ("xgettext:");
2657
2658             po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
2659                                       &tmp_wrap, tmp_syntax_check);
2660
2661             interesting = false;
2662             for (i = 0; i < NFORMATS; i++)
2663               if (tmp_format[i] != undecided)
2664                 {
2665                   is_format[i] = tmp_format[i];
2666                   interesting = true;
2667                 }
2668             if (has_range_p (tmp_range))
2669               {
2670                 range = tmp_range;
2671                 interesting = true;
2672               }
2673             if (tmp_wrap != undecided)
2674               {
2675                 do_wrap = tmp_wrap;
2676                 interesting = true;
2677               }
2678             for (i = 0; i < NSYNTAXCHECKS; i++)
2679               if (tmp_syntax_check[i] != undecided)
2680                 {
2681                   do_syntax_check[i] = tmp_syntax_check[i];
2682                   interesting = true;
2683                 }
2684
2685             /* If the "xgettext:" marker was followed by an interesting
2686                keyword, and we updated our is_format/do_wrap variables,
2687                we don't print the comment as a #. comment.  */
2688             if (interesting)
2689               continue;
2690           }
2691
2692         if (!add_all_remaining_comments && comment_tag != NULL)
2693           {
2694             /* When the comment tag is seen, it drags in not only the line
2695                which it starts, but all remaining comment lines.  */
2696             if ((t = c_strstr (s, comment_tag)) != NULL)
2697               {
2698                 add_all_remaining_comments = true;
2699                 comment_tag_prefix = s;
2700                 comment_tag_prefix_length = t - s;
2701               }
2702           }
2703
2704         if (add_all_remaining_comments)
2705           {
2706             if (strncmp (s, comment_tag_prefix, comment_tag_prefix_length) == 0)
2707               s += comment_tag_prefix_length;
2708             message_comment_dot_append (mp, s);
2709           }
2710       }
2711
2712     nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2713
2714     /* Don't add the comments if they are a repetition of the tail of the
2715        already present comments.  This avoids unneeded duplication if the
2716        same message appears several times, each time with the same comment.  */
2717     if (nitems_before < nitems_after)
2718       {
2719         size_t added = nitems_after - nitems_before;
2720
2721         if (added <= nitems_before)
2722           {
2723             bool repeated = true;
2724
2725             for (i = 0; i < added; i++)
2726               if (strcmp (mp->comment_dot->item[nitems_before - added + i],
2727                           mp->comment_dot->item[nitems_before + i]) != 0)
2728                 {
2729                   repeated = false;
2730                   break;
2731                 }
2732
2733             if (repeated)
2734               {
2735                 for (i = 0; i < added; i++)
2736                   free ((char *) mp->comment_dot->item[nitems_before + i]);
2737                 mp->comment_dot->nitems = nitems_before;
2738               }
2739           }
2740       }
2741   }
2742
2743   /* If it is not already decided, through programmer comments, whether the
2744      msgid is a format string, examine the msgid.  This is a heuristic.  */
2745   for (i = 0; i < NFORMATS; i++)
2746     {
2747       if (is_format[i] == undecided
2748           && (formatstring_parsers[i] == current_formatstring_parser1
2749               || formatstring_parsers[i] == current_formatstring_parser2
2750               || formatstring_parsers[i] == current_formatstring_parser3)
2751           /* But avoid redundancy: objc-format is stronger than c-format.  */
2752           && !(i == format_c && possible_format_p (is_format[format_objc]))
2753           && !(i == format_objc && possible_format_p (is_format[format_c]))
2754           /* Avoid flagging a string as c-format when it's known to be a
2755              qt-format or qt-plural-format or kde-format or boost-format
2756              string.  */
2757           && !(i == format_c
2758                && (possible_format_p (is_format[format_qt])
2759                    || possible_format_p (is_format[format_qt_plural])
2760                    || possible_format_p (is_format[format_kde])
2761                    || possible_format_p (is_format[format_kde_kuit])
2762                    || possible_format_p (is_format[format_boost])))
2763           /* Avoid flagging a string as kde-format when it's known to
2764              be a kde-kuit-format string.  */
2765           && !(i == format_kde
2766                && possible_format_p (is_format[format_kde_kuit]))
2767           /* Avoid flagging a string as kde-kuit-format when it's
2768              known to be a kde-format string.  Note that this relies
2769              on the fact that format_kde < format_kde_kuit, so a
2770              string will be marked as kde-format if both are
2771              undecided.  */
2772           && !(i == format_kde_kuit
2773                && possible_format_p (is_format[format_kde])))
2774         {
2775           struct formatstring_parser *parser = formatstring_parsers[i];
2776           char *invalid_reason = NULL;
2777           void *descr = parser->parse (mp->msgid, false, NULL, &invalid_reason);
2778
2779           if (descr != NULL)
2780             {
2781               /* msgid is a valid format string.  We mark only those msgids
2782                  as format strings which contain at least one format directive
2783                  and thus are format strings with a high probability.  We
2784                  don't mark strings without directives as format strings,
2785                  because that would force the programmer to add
2786                  "xgettext: no-c-format" anywhere where a translator wishes
2787                  to use a percent sign.  So, the msgfmt checking will not be
2788                  perfect.  Oh well.  */
2789               if (parser->get_number_of_directives (descr) > 0
2790                   && !(parser->is_unlikely_intentional != NULL
2791                        && parser->is_unlikely_intentional (descr)))
2792                 is_format[i] = possible;
2793
2794               parser->free (descr);
2795             }
2796           else
2797             {
2798               /* msgid is not a valid format string.  */
2799               is_format[i] = impossible;
2800               free (invalid_reason);
2801             }
2802         }
2803       mp->is_format[i] = is_format[i];
2804     }
2805
2806   if (has_range_p (range))
2807     {
2808       if (has_range_p (mp->range))
2809         {
2810           if (range.min < mp->range.min)
2811             mp->range.min = range.min;
2812           if (range.max > mp->range.max)
2813             mp->range.max = range.max;
2814         }
2815       else
2816         mp->range = range;
2817     }
2818
2819   mp->do_wrap = do_wrap == no ? no : yes;       /* By default we wrap.  */
2820
2821   for (i = 0; i < NSYNTAXCHECKS; i++)
2822     {
2823       if (do_syntax_check[i] == undecided)
2824         do_syntax_check[i] = default_syntax_check[i] == yes ? yes : no;
2825
2826       mp->do_syntax_check[i] = do_syntax_check[i];
2827     }
2828
2829   /* Warn about the use of non-reorderable format strings when the programming
2830      language also provides reorderable format strings.  */
2831   warn_format_string (is_format, mp->msgid, pos, "msgid");
2832
2833   /* Remember where we saw this msgid.  */
2834   message_comment_filepos (mp, pos->file_name, pos->line_number);
2835
2836   /* Tell the lexer to reset its comment buffer, so that the next
2837      message gets the correct comments.  */
2838   xgettext_comment_reset ();
2839   savable_comment_reset ();
2840
2841   return mp;
2842 }
2843
2844
2845 void
2846 remember_a_message_plural (message_ty *mp, char *string,
2847                            flag_context_ty context, lex_pos_ty *pos,
2848                            refcounted_string_list_ty *comment)
2849 {
2850   char *msgid_plural;
2851   char *msgstr1;
2852   size_t msgstr1_len;
2853   char *msgstr;
2854   size_t i;
2855
2856   msgid_plural = string;
2857
2858   savable_comment_to_xgettext_comment (comment);
2859
2860   CONVERT_STRING (msgid_plural, lc_string);
2861
2862   /* See if the message is already a plural message.  */
2863   if (mp->msgid_plural == NULL)
2864     {
2865       mp->msgid_plural = msgid_plural;
2866
2867       /* Construct the first plural form from the prefix and suffix,
2868          otherwise use the empty string.  The translator will have to
2869          provide additional plural forms.  */
2870       if (msgstr_prefix)
2871         msgstr1 =
2872           xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix);
2873       else
2874         msgstr1 = "";
2875       msgstr1_len = strlen (msgstr1) + 1;
2876       msgstr = XNMALLOC (mp->msgstr_len + msgstr1_len, char);
2877       memcpy (msgstr, mp->msgstr, mp->msgstr_len);
2878       memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
2879       mp->msgstr = msgstr;
2880       mp->msgstr_len = mp->msgstr_len + msgstr1_len;
2881       if (msgstr_prefix)
2882         free (msgstr1);
2883
2884       /* Determine whether the context specifies that the msgid_plural is a
2885          format string.  */
2886       set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
2887                                      pos, "msgid_plural");
2888
2889       /* If it is not already decided, through programmer comments or
2890          the msgid, whether the msgid is a format string, examine the
2891          msgid_plural.  This is a heuristic.  */
2892       for (i = 0; i < NFORMATS; i++)
2893         if ((formatstring_parsers[i] == current_formatstring_parser1
2894              || formatstring_parsers[i] == current_formatstring_parser2
2895              || formatstring_parsers[i] == current_formatstring_parser3)
2896             && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
2897             /* But avoid redundancy: objc-format is stronger than c-format.  */
2898             && !(i == format_c
2899                  && possible_format_p (mp->is_format[format_objc]))
2900             && !(i == format_objc
2901                  && possible_format_p (mp->is_format[format_c]))
2902             /* Avoid flagging a string as c-format when it's known to be a
2903                qt-format or qt-plural-format or boost-format string.  */
2904             && !(i == format_c
2905                  && (possible_format_p (mp->is_format[format_qt])
2906                      || possible_format_p (mp->is_format[format_qt_plural])
2907                      || possible_format_p (mp->is_format[format_kde])
2908                      || possible_format_p (mp->is_format[format_kde_kuit])
2909                      || possible_format_p (mp->is_format[format_boost])))
2910             /* Avoid flagging a string as kde-format when it's known
2911                to be a kde-kuit-format string.  */
2912             && !(i == format_kde
2913                  && possible_format_p (mp->is_format[format_kde_kuit]))
2914             /* Avoid flagging a string as kde-kuit-format when it's
2915                known to be a kde-format string.  Note that this relies
2916                on the fact that format_kde < format_kde_kuit, so a
2917                string will be marked as kde-format if both are
2918                undecided.  */
2919             && !(i == format_kde_kuit
2920                  && possible_format_p (mp->is_format[format_kde])))
2921           {
2922             struct formatstring_parser *parser = formatstring_parsers[i];
2923             char *invalid_reason = NULL;
2924             void *descr =
2925               parser->parse (mp->msgid_plural, false, NULL, &invalid_reason);
2926
2927             if (descr != NULL)
2928               {
2929                 /* Same heuristic as in remember_a_message.  */
2930                 if (parser->get_number_of_directives (descr) > 0
2931                     && !(parser->is_unlikely_intentional != NULL
2932                          && parser->is_unlikely_intentional (descr)))
2933                   mp->is_format[i] = possible;
2934
2935                 parser->free (descr);
2936               }
2937             else
2938               {
2939                 /* msgid_plural is not a valid format string.  */
2940                 mp->is_format[i] = impossible;
2941                 free (invalid_reason);
2942               }
2943           }
2944
2945       /* Warn about the use of non-reorderable format strings when the programming
2946          language also provides reorderable format strings.  */
2947       warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural");
2948     }
2949   else
2950     free (msgid_plural);
2951
2952   /* Tell the lexer to reset its comment buffer, so that the next
2953      message gets the correct comments.  */
2954   xgettext_comment_reset ();
2955   savable_comment_reset ();
2956 }
2957
2958
2959 struct arglist_parser *
2960 arglist_parser_alloc (message_list_ty *mlp, const struct callshapes *shapes)
2961 {
2962   if (shapes == NULL || shapes->nshapes == 0)
2963     {
2964       struct arglist_parser *ap =
2965         (struct arglist_parser *)
2966         xmalloc (offsetof (struct arglist_parser, alternative[0]));
2967
2968       ap->mlp = mlp;
2969       ap->keyword = NULL;
2970       ap->keyword_len = 0;
2971       ap->nalternatives = 0;
2972
2973       return ap;
2974     }
2975   else
2976     {
2977       struct arglist_parser *ap =
2978         (struct arglist_parser *)
2979         xmalloc (xsum (sizeof (struct arglist_parser),
2980                        xtimes (shapes->nshapes - 1,
2981                                sizeof (struct partial_call))));
2982       size_t i;
2983
2984       ap->mlp = mlp;
2985       ap->keyword = shapes->keyword;
2986       ap->keyword_len = shapes->keyword_len;
2987       ap->nalternatives = shapes->nshapes;
2988       for (i = 0; i < shapes->nshapes; i++)
2989         {
2990           ap->alternative[i].argnumc = shapes->shapes[i].argnumc;
2991           ap->alternative[i].argnum1 = shapes->shapes[i].argnum1;
2992           ap->alternative[i].argnum2 = shapes->shapes[i].argnum2;
2993           ap->alternative[i].argnum1_glib_context =
2994             shapes->shapes[i].argnum1_glib_context;
2995           ap->alternative[i].argnum2_glib_context =
2996             shapes->shapes[i].argnum2_glib_context;
2997           ap->alternative[i].argtotal = shapes->shapes[i].argtotal;
2998           ap->alternative[i].xcomments = shapes->shapes[i].xcomments;
2999           ap->alternative[i].msgctxt = NULL;
3000           ap->alternative[i].msgctxt_escape = LET_NONE;
3001           ap->alternative[i].msgctxt_pos.file_name = NULL;
3002           ap->alternative[i].msgctxt_pos.line_number = (size_t)(-1);
3003           ap->alternative[i].msgid = NULL;
3004           ap->alternative[i].msgid_escape = LET_NONE;
3005           ap->alternative[i].msgid_context = null_context;
3006           ap->alternative[i].msgid_pos.file_name = NULL;
3007           ap->alternative[i].msgid_pos.line_number = (size_t)(-1);
3008           ap->alternative[i].msgid_comment = NULL;
3009           ap->alternative[i].msgid_plural = NULL;
3010           ap->alternative[i].msgid_plural_escape = LET_NONE;
3011           ap->alternative[i].msgid_plural_context = null_context;
3012           ap->alternative[i].msgid_plural_pos.file_name = NULL;
3013           ap->alternative[i].msgid_plural_pos.line_number = (size_t)(-1);
3014         }
3015
3016       return ap;
3017     }
3018 }
3019
3020
3021 struct arglist_parser *
3022 arglist_parser_clone (struct arglist_parser *ap)
3023 {
3024   struct arglist_parser *copy =
3025     (struct arglist_parser *)
3026     xmalloc (xsum (sizeof (struct arglist_parser) - sizeof (struct partial_call),
3027                    xtimes (ap->nalternatives, sizeof (struct partial_call))));
3028   size_t i;
3029
3030   copy->mlp = ap->mlp;
3031   copy->keyword = ap->keyword;
3032   copy->keyword_len = ap->keyword_len;
3033   copy->nalternatives = ap->nalternatives;
3034   for (i = 0; i < ap->nalternatives; i++)
3035     {
3036       const struct partial_call *cp = &ap->alternative[i];
3037       struct partial_call *ccp = &copy->alternative[i];
3038
3039       ccp->argnumc = cp->argnumc;
3040       ccp->argnum1 = cp->argnum1;
3041       ccp->argnum2 = cp->argnum2;
3042       ccp->argnum1_glib_context = cp->argnum1_glib_context;
3043       ccp->argnum2_glib_context = cp->argnum2_glib_context;
3044       ccp->argtotal = cp->argtotal;
3045       ccp->xcomments = cp->xcomments;
3046       ccp->msgctxt = (cp->msgctxt != NULL ? xstrdup (cp->msgctxt) : NULL);
3047       ccp->msgctxt_escape = cp->msgctxt_escape;
3048       ccp->msgctxt_pos = cp->msgctxt_pos;
3049       ccp->msgid = (cp->msgid != NULL ? xstrdup (cp->msgid) : NULL);
3050       ccp->msgid_escape = cp->msgid_escape;
3051       ccp->msgid_context = cp->msgid_context;
3052       ccp->msgid_pos = cp->msgctxt_pos;
3053       ccp->msgid_comment = add_reference (cp->msgid_comment);
3054       ccp->msgid_plural =
3055         (cp->msgid_plural != NULL ? xstrdup (cp->msgid_plural) : NULL);
3056       ccp->msgid_plural_escape = cp->msgid_plural_escape;
3057       ccp->msgid_plural_context = cp->msgid_plural_context;
3058       ccp->msgid_plural_pos = cp->msgid_plural_pos;
3059     }
3060
3061   return copy;
3062 }
3063
3064
3065 void
3066 arglist_parser_remember_literal (struct arglist_parser *ap,
3067                                  int argnum, char *string,
3068                                  flag_context_ty context,
3069                                  char *file_name, size_t line_number,
3070                                  refcounted_string_list_ty *comment,
3071                                  enum literalstring_escape_type type)
3072 {
3073   bool stored_string = false;
3074   size_t nalternatives = ap->nalternatives;
3075   size_t i;
3076
3077   if (!(argnum > 0))
3078     abort ();
3079   for (i = 0; i < nalternatives; i++)
3080     {
3081       struct partial_call *cp = &ap->alternative[i];
3082
3083       if (argnum == cp->argnumc)
3084         {
3085           cp->msgctxt = string;
3086           cp->msgctxt_escape = type;
3087           cp->msgctxt_pos.file_name = file_name;
3088           cp->msgctxt_pos.line_number = line_number;
3089           stored_string = true;
3090           /* Mark msgctxt as done.  */
3091           cp->argnumc = 0;
3092         }
3093       else
3094         {
3095           if (argnum == cp->argnum1)
3096             {
3097               cp->msgid = string;
3098               cp->msgid_escape = type;
3099               cp->msgid_context = context;
3100               cp->msgid_pos.file_name = file_name;
3101               cp->msgid_pos.line_number = line_number;
3102               cp->msgid_comment = add_reference (comment);
3103               stored_string = true;
3104               /* Mark msgid as done.  */
3105               cp->argnum1 = 0;
3106             }
3107           if (argnum == cp->argnum2)
3108             {
3109               cp->msgid_plural = string;
3110               cp->msgid_plural_escape = type;
3111               cp->msgid_plural_context = context;
3112               cp->msgid_plural_pos.file_name = file_name;
3113               cp->msgid_plural_pos.line_number = line_number;
3114               stored_string = true;
3115               /* Mark msgid_plural as done.  */
3116               cp->argnum2 = 0;
3117             }
3118         }
3119     }
3120   /* Note: There is a memory leak here: When string was stored but is later
3121      not used by arglist_parser_done, we don't free it.  */
3122   if (!stored_string)
3123     free (string);
3124 }
3125
3126 void
3127 arglist_parser_remember (struct arglist_parser *ap,
3128                          int argnum, char *string,
3129                          flag_context_ty context,
3130                          char *file_name, size_t line_number,
3131                          refcounted_string_list_ty *comment)
3132 {
3133   arglist_parser_remember_literal (ap, argnum, string, context,
3134                                    file_name, line_number,
3135                                    comment, LET_NONE);
3136 }
3137
3138 bool
3139 arglist_parser_decidedp (struct arglist_parser *ap, int argnum)
3140 {
3141   size_t i;
3142
3143   /* Test whether all alternatives are decided.
3144      Note: A decided alternative can be complete
3145        cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3146        && cp->argtotal == 0
3147      or it can be failed if no literal strings were found at the specified
3148      argument positions:
3149        cp->argnumc <= argnum && cp->argnum1 <= argnum && cp->argnum2 <= argnum
3150      or it can be failed if the number of arguments is exceeded:
3151        cp->argtotal > 0 && cp->argtotal < argnum
3152    */
3153   for (i = 0; i < ap->nalternatives; i++)
3154     {
3155       struct partial_call *cp = &ap->alternative[i];
3156
3157       if (!((cp->argnumc <= argnum
3158              && cp->argnum1 <= argnum
3159              && cp->argnum2 <= argnum)
3160             || (cp->argtotal > 0 && cp->argtotal < argnum)))
3161         /* cp is still undecided.  */
3162         return false;
3163     }
3164   return true;
3165 }
3166
3167
3168 void
3169 arglist_parser_done (struct arglist_parser *ap, int argnum)
3170 {
3171   size_t ncomplete;
3172   size_t i;
3173
3174   /* Determine the number of complete calls.  */
3175   ncomplete = 0;
3176   for (i = 0; i < ap->nalternatives; i++)
3177     {
3178       struct partial_call *cp = &ap->alternative[i];
3179
3180       if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3181           && (cp->argtotal == 0 || cp->argtotal == argnum))
3182         ncomplete++;
3183     }
3184
3185   if (ncomplete > 0)
3186     {
3187       struct partial_call *best_cp = NULL;
3188       bool ambiguous = false;
3189
3190       /* Find complete calls where msgctxt, msgid, msgid_plural are all
3191          provided.  */
3192       for (i = 0; i < ap->nalternatives; i++)
3193         {
3194           struct partial_call *cp = &ap->alternative[i];
3195
3196           if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3197               && (cp->argtotal == 0 || cp->argtotal == argnum)
3198               && cp->msgctxt != NULL
3199               && cp->msgid != NULL
3200               && cp->msgid_plural != NULL)
3201             {
3202               if (best_cp != NULL)
3203                 {
3204                   ambiguous = true;
3205                   break;
3206                 }
3207               best_cp = cp;
3208             }
3209         }
3210
3211       if (best_cp == NULL)
3212         {
3213           struct partial_call *best_cp1 = NULL;
3214           struct partial_call *best_cp2 = NULL;
3215
3216           /* Find complete calls where msgctxt, msgid are provided.  */
3217           for (i = 0; i < ap->nalternatives; i++)
3218             {
3219               struct partial_call *cp = &ap->alternative[i];
3220
3221               if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3222                   && (cp->argtotal == 0 || cp->argtotal == argnum)
3223                   && cp->msgctxt != NULL
3224                   && cp->msgid != NULL)
3225                 {
3226                   if (best_cp1 != NULL)
3227                     {
3228                       ambiguous = true;
3229                       break;
3230                     }
3231                   best_cp1 = cp;
3232                 }
3233             }
3234
3235           /* Find complete calls where msgid, msgid_plural are provided.  */
3236           for (i = 0; i < ap->nalternatives; i++)
3237             {
3238               struct partial_call *cp = &ap->alternative[i];
3239
3240               if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3241                   && (cp->argtotal == 0 || cp->argtotal == argnum)
3242                   && cp->msgid != NULL
3243                   && cp->msgid_plural != NULL)
3244                 {
3245                   if (best_cp2 != NULL)
3246                     {
3247                       ambiguous = true;
3248                       break;
3249                     }
3250                   best_cp2 = cp;
3251                 }
3252             }
3253
3254           if (best_cp1 != NULL)
3255             best_cp = best_cp1;
3256           if (best_cp2 != NULL)
3257             {
3258               if (best_cp != NULL)
3259                 ambiguous = true;
3260               else
3261                 best_cp = best_cp2;
3262             }
3263         }
3264
3265       if (best_cp == NULL)
3266         {
3267           /* Find complete calls where msgid is provided.  */
3268           for (i = 0; i < ap->nalternatives; i++)
3269             {
3270               struct partial_call *cp = &ap->alternative[i];
3271
3272               if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
3273                   && (cp->argtotal == 0 || cp->argtotal == argnum)
3274                   && cp->msgid != NULL)
3275                 {
3276                   if (best_cp != NULL)
3277                     {
3278                       ambiguous = true;
3279                       break;
3280                     }
3281                   best_cp = cp;
3282                 }
3283             }
3284         }
3285
3286       if (ambiguous)
3287         {
3288           error_with_progname = false;
3289           error_at_line (0, 0,
3290                          best_cp->msgid_pos.file_name,
3291                          best_cp->msgid_pos.line_number,
3292                          _("ambiguous argument specification for keyword '%.*s'"),
3293                          (int) ap->keyword_len, ap->keyword);
3294           error_with_progname = true;
3295         }
3296
3297       if (best_cp != NULL)
3298         {
3299           /* best_cp indicates the best found complete call.
3300              Now call remember_a_message.  */
3301           message_ty *mp;
3302
3303           /* Split strings in the GNOME glib syntax "msgctxt|msgid".  */
3304           if (best_cp->argnum1_glib_context || best_cp->argnum2_glib_context)
3305             /* split_keywordspec should not allow the context to be specified
3306                in two different ways.  */
3307             if (best_cp->msgctxt != NULL)
3308               abort ();
3309           if (best_cp->argnum1_glib_context)
3310             {
3311               const char *separator = strchr (best_cp->msgid, '|');
3312
3313               if (separator == NULL)
3314                 {
3315                   error_with_progname = false;
3316                   error_at_line (0, 0,
3317                                  best_cp->msgid_pos.file_name,
3318                                  best_cp->msgid_pos.line_number,
3319                                  _("warning: missing context for keyword '%.*s'"),
3320                                  (int) ap->keyword_len, ap->keyword);
3321                   error_with_progname = true;
3322                 }
3323               else
3324                 {
3325                   size_t ctxt_len = separator - best_cp->msgid;
3326                   char *ctxt = XNMALLOC (ctxt_len + 1, char);
3327
3328                   memcpy (ctxt, best_cp->msgid, ctxt_len);
3329                   ctxt[ctxt_len] = '\0';
3330                   best_cp->msgctxt = ctxt;
3331                   best_cp->msgid = xstrdup (separator + 1);
3332                 }
3333             }
3334           if (best_cp->msgid_plural != NULL && best_cp->argnum2_glib_context)
3335             {
3336               const char *separator = strchr (best_cp->msgid_plural, '|');
3337
3338               if (separator == NULL)
3339                 {
3340                   error_with_progname = false;
3341                   error_at_line (0, 0,
3342                                  best_cp->msgid_plural_pos.file_name,
3343                                  best_cp->msgid_plural_pos.line_number,
3344                                  _("warning: missing context for plural argument of keyword '%.*s'"),
3345                                  (int) ap->keyword_len, ap->keyword);
3346                   error_with_progname = true;
3347                 }
3348               else
3349                 {
3350                   size_t ctxt_len = separator - best_cp->msgid_plural;
3351                   char *ctxt = XNMALLOC (ctxt_len + 1, char);
3352
3353                   memcpy (ctxt, best_cp->msgid_plural, ctxt_len);
3354                   ctxt[ctxt_len] = '\0';
3355                   if (best_cp->msgctxt == NULL)
3356                     best_cp->msgctxt = ctxt;
3357                   else
3358                     {
3359                       if (strcmp (ctxt, best_cp->msgctxt) != 0)
3360                         {
3361                           error_with_progname = false;
3362                           error_at_line (0, 0,
3363                                          best_cp->msgid_plural_pos.file_name,
3364                                          best_cp->msgid_plural_pos.line_number,
3365                                          _("context mismatch between singular and plural form"));
3366                           error_with_progname = true;
3367                         }
3368                       free (ctxt);
3369                     }
3370                   best_cp->msgid_plural = xstrdup (separator + 1);
3371                 }
3372             }
3373
3374           {
3375             flag_context_ty msgid_context = best_cp->msgid_context;
3376             flag_context_ty msgid_plural_context = best_cp->msgid_plural_context;
3377             struct literalstring_parser *parser = current_literalstring_parser;
3378             const char *encoding;
3379
3380             /* Special support for the 3-argument tr operator in Qt:
3381                When --qt and --keyword=tr:1,1,2c,3t are specified, add to the
3382                context the information that the argument is expeected to be a
3383                qt-plural-format.  */
3384             if (recognize_format_qt
3385                 && current_formatstring_parser3 == &formatstring_qt_plural
3386                 && best_cp->msgid_plural == best_cp->msgid)
3387               {
3388                 msgid_context.is_format3 = yes_according_to_context;
3389                 msgid_plural_context.is_format3 = yes_according_to_context;
3390               }
3391
3392             if (best_cp->msgctxt != NULL)
3393               {
3394                 if (parser != NULL && best_cp->msgctxt_escape != 0)
3395                   {
3396                     char *msgctxt =
3397                       parser->parse (best_cp->msgctxt,
3398                                      &best_cp->msgctxt_pos,
3399                                      best_cp->msgctxt_escape);
3400                     free (best_cp->msgctxt);
3401                     best_cp->msgctxt = msgctxt;
3402                   }
3403                 else
3404                   {
3405                     lex_pos_ty *pos = &best_cp->msgctxt_pos;
3406                     CONVERT_STRING (best_cp->msgctxt, lc_string);
3407                   }
3408               }
3409
3410             if (parser != NULL && best_cp->msgid_escape != 0)
3411               {
3412                 char *msgid = parser->parse (best_cp->msgid,
3413                                              &best_cp->msgid_pos,
3414                                              best_cp->msgid_escape);
3415                 if (best_cp->msgid_plural == best_cp->msgid)
3416                   best_cp->msgid_plural = msgid;
3417                 free (best_cp->msgid);
3418                 best_cp->msgid = msgid;
3419               }
3420             else
3421               {
3422                 lex_pos_ty *pos = &best_cp->msgid_pos;
3423                 CONVERT_STRING (best_cp->msgid, lc_string);
3424               }
3425
3426             if (best_cp->msgid_plural)
3427               {
3428                 /* best_cp->msgid_plural may point to best_cp->msgid.
3429                    In that case, it is already interpreted and converted.  */
3430                 if (best_cp->msgid_plural != best_cp->msgid)
3431                   {
3432                     if (parser != NULL
3433                         && best_cp->msgid_plural_escape != 0)
3434                       {
3435                         char *msgid_plural =
3436                           parser->parse (best_cp->msgid_plural,
3437                                          &best_cp->msgid_plural_pos,
3438                                          best_cp->msgid_plural_escape);
3439                         free (best_cp->msgid_plural);
3440                         best_cp->msgid_plural = msgid_plural;
3441                       }
3442                     else
3443                       {
3444                         lex_pos_ty *pos = &best_cp->msgid_plural_pos;
3445                         CONVERT_STRING (best_cp->msgid_plural, lc_string);
3446                       }
3447                   }
3448
3449                 /* If best_cp->msgid_plural equals to best_cp->msgid,
3450                    the ownership will be transferred to
3451                    remember_a_message before it is passed to
3452                    remember_a_message_plural.
3453
3454                    Make a copy of the string in that case.  */
3455                 if (best_cp->msgid_plural == best_cp->msgid)
3456                   best_cp->msgid_plural = xstrdup (best_cp->msgid);
3457               }
3458
3459             if (best_cp->msgid_comment != NULL)
3460               {
3461                 refcounted_string_list_ty *msgid_comment =
3462                   savable_comment_convert_encoding (best_cp->msgid_comment,
3463                                                     &best_cp->msgid_pos);
3464                 drop_reference (best_cp->msgid_comment);
3465                 best_cp->msgid_comment = msgid_comment;
3466               }
3467
3468             /* best_cp->msgctxt, best_cp->msgid, and best_cp->msgid_plural
3469                are already in UTF-8.  Prevent further conversion in
3470                remember_a_message.  */
3471             encoding = xgettext_current_source_encoding;
3472             xgettext_current_source_encoding = po_charset_utf8;
3473             mp = remember_a_message (ap->mlp, best_cp->msgctxt, best_cp->msgid,
3474                                      msgid_context,
3475                                      &best_cp->msgid_pos,
3476                                      NULL, best_cp->msgid_comment);
3477             if (mp != NULL && best_cp->msgid_plural != NULL)
3478               remember_a_message_plural (mp,
3479                                          best_cp->msgid_plural,
3480                                          msgid_plural_context,
3481                                          &best_cp->msgid_plural_pos,
3482                                          NULL);
3483             xgettext_current_source_encoding = encoding;
3484           }
3485
3486           if (best_cp->xcomments.nitems > 0)
3487             {
3488               /* Add best_cp->xcomments to mp->comment_dot, unless already
3489                  present.  */
3490               size_t i;
3491
3492               for (i = 0; i < best_cp->xcomments.nitems; i++)
3493                 {
3494                   const char *xcomment = best_cp->xcomments.item[i];
3495                   bool found = false;
3496
3497                   if (mp != NULL && mp->comment_dot != NULL)
3498                     {
3499                       size_t j;
3500
3501                       for (j = 0; j < mp->comment_dot->nitems; j++)
3502                         if (strcmp (xcomment, mp->comment_dot->item[j]) == 0)
3503                           {
3504                             found = true;
3505                             break;
3506                           }
3507                     }
3508                   if (!found)
3509                     message_comment_dot_append (mp, xcomment);
3510                 }
3511             }
3512         }
3513     }
3514   else
3515     {
3516       /* No complete call was parsed.  */
3517       /* Note: There is a memory leak here: When there is more than one
3518          alternative, the same string can be stored in multiple alternatives,
3519          and it's not easy to free all strings reliably.  */
3520       if (ap->nalternatives == 1)
3521         {
3522           if (ap->alternative[0].msgctxt != NULL)
3523             free (ap->alternative[0].msgctxt);
3524           if (ap->alternative[0].msgid != NULL)
3525             free (ap->alternative[0].msgid);
3526           if (ap->alternative[0].msgid_plural != NULL)
3527             free (ap->alternative[0].msgid_plural);
3528         }
3529     }
3530
3531   for (i = 0; i < ap->nalternatives; i++)
3532     drop_reference (ap->alternative[i].msgid_comment);
3533   free (ap);
3534 }
3535
3536
3537 struct mixed_string_buffer *
3538 mixed_string_buffer_alloc (lexical_context_ty lcontext,
3539                            const char *logical_file_name,
3540                            int line_number)
3541 {
3542   struct mixed_string_buffer *bp = XMALLOC (struct mixed_string_buffer);
3543   bp->utf8_buffer = NULL;
3544   bp->utf8_buflen = 0;
3545   bp->utf8_allocated = 0;
3546   bp->utf16_surr = 0;
3547   bp->curr_buffer = NULL;
3548   bp->curr_buflen = 0;
3549   bp->curr_allocated = 0;
3550   bp->lcontext = lcontext;
3551   bp->logical_file_name = logical_file_name;
3552   bp->line_number = line_number;
3553   return bp;
3554 }
3555
3556 /* Auxiliary function: Append a byte to bp->curr.  */
3557 static inline void
3558 mixed_string_buffer_append_to_curr_buffer (struct mixed_string_buffer *bp,
3559                                            unsigned char c)
3560 {
3561   if (bp->curr_buflen == bp->curr_allocated)
3562     {
3563       bp->curr_allocated = 2 * bp->curr_allocated + 10;
3564       bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
3565     }
3566   bp->curr_buffer[bp->curr_buflen++] = c;
3567 }
3568
3569 /* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
3570 static inline void
3571 mixed_string_buffer_grow_utf8_buffer (struct mixed_string_buffer *bp,
3572                                          size_t count)
3573 {
3574   if (bp->utf8_buflen + count > bp->utf8_allocated)
3575     {
3576       size_t new_allocated = 2 * bp->utf8_allocated + 10;
3577       if (new_allocated < bp->utf8_buflen + count)
3578         new_allocated = bp->utf8_buflen + count;
3579       bp->utf8_allocated = new_allocated;
3580       bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
3581     }
3582 }
3583
3584 /* Auxiliary function: Append a Unicode character to bp->utf8.
3585    uc must be < 0x110000.  */
3586 static inline void
3587 mixed_string_buffer_append_to_utf8_buffer (struct mixed_string_buffer *bp,
3588                                            ucs4_t uc)
3589 {
3590   unsigned char utf8buf[6];
3591   int count = u8_uctomb (utf8buf, uc, 6);
3592
3593   if (count < 0)
3594     /* The caller should have ensured that uc is not out-of-range.  */
3595     abort ();
3596
3597   mixed_string_buffer_grow_utf8_buffer (bp, count);
3598   memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
3599   bp->utf8_buflen += count;
3600 }
3601
3602 /* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer.  */
3603 static inline void
3604 mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
3605 {
3606   if (bp->utf16_surr != 0)
3607     {
3608       /* A half surrogate is invalid, therefore use U+FFFD instead.  */
3609       mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
3610       bp->utf16_surr = 0;
3611     }
3612 }
3613
3614 /* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer.  */
3615 static inline void
3616 mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp,
3617                                        int line_number)
3618 {
3619   if (bp->curr_buflen > 0)
3620     {
3621       char *curr;
3622       size_t count;
3623
3624       mixed_string_buffer_append_to_curr_buffer (bp, '\0');
3625
3626       /* Convert from the source encoding to UTF-8.  */
3627       curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
3628                                            bp->logical_file_name,
3629                                            line_number);
3630
3631       /* Append it to bp->utf8_buffer.  */
3632       count = strlen (curr);
3633       mixed_string_buffer_grow_utf8_buffer (bp, count);
3634       memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
3635       bp->utf8_buflen += count;
3636
3637       if (curr != bp->curr_buffer)
3638         free (curr);
3639       bp->curr_buflen = 0;
3640     }
3641 }
3642
3643 void
3644 mixed_string_buffer_append_char (struct mixed_string_buffer *bp, int c)
3645 {
3646   /* Switch from Unicode character mode to multibyte character mode.  */
3647   mixed_string_buffer_flush_utf16_surr (bp);
3648
3649   /* When a newline is seen, convert the accumulated multibyte sequence.
3650      This ensures a correct line number in the error message in case of
3651      a conversion error.  The "- 1" is to account for the newline.  */
3652   if (c == '\n')
3653     mixed_string_buffer_flush_curr_buffer (bp, bp->line_number - 1);
3654
3655   mixed_string_buffer_append_to_curr_buffer (bp, (unsigned char) c);
3656 }
3657
3658 void
3659 mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, int c)
3660 {
3661   /* Switch from multibyte character mode to Unicode character mode.  */
3662   mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
3663
3664   /* Test whether this character and the previous one form a Unicode
3665      surrogate character pair.  */
3666   if (bp->utf16_surr != 0 && (c >= 0xdc00 && c < 0xe000))
3667     {
3668       unsigned short utf16buf[2];
3669       ucs4_t uc;
3670
3671       utf16buf[0] = bp->utf16_surr;
3672       utf16buf[1] = c;
3673       if (u16_mbtouc (&uc, utf16buf, 2) != 2)
3674         abort ();
3675
3676       mixed_string_buffer_append_to_utf8_buffer (bp, uc);
3677       bp->utf16_surr = 0;
3678     }
3679   else
3680     {
3681       mixed_string_buffer_flush_utf16_surr (bp);
3682
3683       if (c >= 0xd800 && c < 0xdc00)
3684         bp->utf16_surr = c;
3685       else if (c >= 0xdc00 && c < 0xe000)
3686         {
3687           /* A half surrogate is invalid, therefore use U+FFFD instead.  */
3688           mixed_string_buffer_append_to_utf8_buffer (bp, 0xfffd);
3689         }
3690       else
3691         mixed_string_buffer_append_to_utf8_buffer (bp, c);
3692     }
3693 }
3694
3695 char *
3696 mixed_string_buffer_done (struct mixed_string_buffer *bp)
3697 {
3698   char *utf8_buffer;
3699
3700   /* Flush all into bp->utf8_buffer.  */
3701   mixed_string_buffer_flush_utf16_surr (bp);
3702   mixed_string_buffer_flush_curr_buffer (bp, bp->line_number);
3703   /* NUL-terminate it.  */
3704   mixed_string_buffer_grow_utf8_buffer (bp, 1);
3705   bp->utf8_buffer[bp->utf8_buflen] = '\0';
3706
3707   /* Free curr_buffer and bp itself.  */
3708   utf8_buffer = bp->utf8_buffer;
3709   free (bp->curr_buffer);
3710   free (bp);
3711
3712   /* Return it.  */
3713   return utf8_buffer;
3714 }
3715
3716
3717 static message_ty *
3718 construct_header ()
3719 {
3720   char *project_id_version;
3721   time_t now;
3722   char *timestring;
3723   message_ty *mp;
3724   char *msgstr;
3725   char *comment;
3726   static lex_pos_ty pos = { __FILE__, __LINE__ };
3727
3728   if (package_name != NULL)
3729     {
3730       if (package_version != NULL)
3731         project_id_version = xasprintf ("%s %s", package_name, package_version);
3732       else
3733         project_id_version = xasprintf ("%s", package_name);
3734     }
3735   else
3736     project_id_version = xstrdup ("PACKAGE VERSION");
3737
3738   if (msgid_bugs_address != NULL && msgid_bugs_address[0] == '\0')
3739     multiline_warning (xasprintf (_("warning: ")),
3740                        xstrdup (_("\
3741 The option --msgid-bugs-address was not specified.\n\
3742 If you are using a 'Makevars' file, please specify\n\
3743 the MSGID_BUGS_ADDRESS variable there; otherwise please\n\
3744 specify an --msgid-bugs-address command line option.\n\
3745 ")));
3746
3747   time (&now);
3748   timestring = po_strftime (&now);
3749
3750   msgstr = xasprintf ("\
3751 Project-Id-Version: %s\n\
3752 Report-Msgid-Bugs-To: %s\n\
3753 POT-Creation-Date: %s\n\
3754 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n\
3755 Last-Translator: FULL NAME <EMAIL@ADDRESS>\n\
3756 Language-Team: LANGUAGE <LL@li.org>\n\
3757 Language: \n\
3758 MIME-Version: 1.0\n\
3759 Content-Type: text/plain; charset=CHARSET\n\
3760 Content-Transfer-Encoding: 8bit\n",
3761                       project_id_version,
3762                       msgid_bugs_address != NULL ? msgid_bugs_address : "",
3763                       timestring);
3764   free (timestring);
3765   free (project_id_version);
3766
3767   mp = message_alloc (NULL, "", NULL, msgstr, strlen (msgstr) + 1, &pos);
3768
3769   if (copyright_holder[0] != '\0')
3770     comment = xasprintf ("\
3771 SOME DESCRIPTIVE TITLE.\n\
3772 Copyright (C) YEAR %s\n\
3773 This file is distributed under the same license as the %s package.\n\
3774 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n",
3775                            copyright_holder,
3776                            package_name != NULL ? package_name : "PACKAGE");
3777   else
3778     comment = xstrdup ("\
3779 SOME DESCRIPTIVE TITLE.\n\
3780 This file is put in the public domain.\n\
3781 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
3782   message_comment_append (mp, comment);
3783   free (comment);
3784
3785   mp->is_fuzzy = true;
3786
3787   return mp;
3788 }
3789
3790 static void
3791 finalize_header (msgdomain_list_ty *mdlp)
3792 {
3793   /* If the generated PO file has plural forms, add a Plural-Forms template
3794      to the constructed header.  */
3795   {
3796     bool has_plural;
3797     size_t i, j;
3798
3799     has_plural = false;
3800     for (i = 0; i < mdlp->nitems; i++)
3801       {
3802         message_list_ty *mlp = mdlp->item[i]->messages;
3803
3804         for (j = 0; j < mlp->nitems; j++)
3805           {
3806             message_ty *mp = mlp->item[j];
3807
3808             if (mp->msgid_plural != NULL)
3809               {
3810                 has_plural = true;
3811                 break;
3812               }
3813           }
3814         if (has_plural)
3815           break;
3816       }
3817
3818     if (has_plural)
3819       {
3820         message_ty *header =
3821           message_list_search (mdlp->item[0]->messages, NULL, "");
3822         if (header != NULL
3823             && c_strstr (header->msgstr, "Plural-Forms:") == NULL)
3824           {
3825             size_t insertpos = strlen (header->msgstr);
3826             const char *suffix;
3827             size_t suffix_len;
3828             char *new_msgstr;
3829
3830             suffix = "\nPlural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n";
3831             if (insertpos == 0 || header->msgstr[insertpos-1] == '\n')
3832               suffix++;
3833             suffix_len = strlen (suffix);
3834             new_msgstr = XNMALLOC (header->msgstr_len + suffix_len, char);
3835             memcpy (new_msgstr, header->msgstr, insertpos);
3836             memcpy (new_msgstr + insertpos, suffix, suffix_len);
3837             memcpy (new_msgstr + insertpos + suffix_len,
3838                     header->msgstr + insertpos,
3839                     header->msgstr_len - insertpos);
3840             header->msgstr = new_msgstr;
3841             header->msgstr_len = header->msgstr_len + suffix_len;
3842           }
3843       }
3844   }
3845
3846   /* If not all the strings were plain ASCII, or if the output syntax
3847      requires a charset conversion, set the charset in the header to UTF-8.
3848      All messages have already been converted to UTF-8 in remember_a_message
3849      and remember_a_message_plural.  */
3850   {
3851     bool has_nonascii = false;
3852     size_t i;
3853
3854     for (i = 0; i < mdlp->nitems; i++)
3855       {
3856         message_list_ty *mlp = mdlp->item[i]->messages;
3857
3858         if (!is_ascii_message_list (mlp))
3859           has_nonascii = true;
3860       }
3861
3862     if (has_nonascii || output_syntax->requires_utf8)
3863       {
3864         message_list_ty *mlp = mdlp->item[0]->messages;
3865
3866         iconv_message_list (mlp, po_charset_utf8, po_charset_utf8, NULL);
3867       }
3868   }
3869 }
3870
3871
3872 static extractor_ty
3873 language_to_extractor (const char *name)
3874 {
3875   struct table_ty
3876   {
3877     const char *name;
3878     extractor_func func;
3879     flag_context_list_table_ty *flag_table;
3880     struct formatstring_parser *formatstring_parser1;
3881     struct formatstring_parser *formatstring_parser2;
3882     struct literalstring_parser *literalstring_parser;
3883   };
3884   typedef struct table_ty table_ty;
3885
3886   static table_ty table[] =
3887   {
3888     SCANNERS_C
3889     SCANNERS_PO
3890     SCANNERS_SH
3891     SCANNERS_PYTHON
3892     SCANNERS_LISP
3893     SCANNERS_ELISP
3894     SCANNERS_LIBREP
3895     SCANNERS_SCHEME
3896     SCANNERS_SMALLTALK
3897     SCANNERS_JAVA
3898     SCANNERS_PROPERTIES
3899     SCANNERS_CSHARP
3900     SCANNERS_AWK
3901     SCANNERS_YCP
3902     SCANNERS_TCL
3903     SCANNERS_PERL
3904     SCANNERS_PHP
3905     SCANNERS_STRINGTABLE
3906     SCANNERS_RST
3907     SCANNERS_GLADE
3908     SCANNERS_LUA
3909     SCANNERS_JAVASCRIPT
3910     SCANNERS_VALA
3911     SCANNERS_GSETTINGS
3912     SCANNERS_DESKTOP
3913     SCANNERS_APPDATA
3914     /* Here may follow more languages and their scanners: pike, etc...
3915        Make sure new scanners honor the --exclude-file option.  */
3916   };
3917
3918   table_ty *tp;
3919
3920   for (tp = table; tp < ENDOF(table); ++tp)
3921     if (c_strcasecmp (name, tp->name) == 0)
3922       {
3923         extractor_ty result;
3924
3925         result.func = tp->func;
3926         result.flag_table = tp->flag_table;
3927         result.formatstring_parser1 = tp->formatstring_parser1;
3928         result.formatstring_parser2 = tp->formatstring_parser2;
3929         result.formatstring_parser3 = NULL;
3930         result.literalstring_parser = tp->literalstring_parser;
3931
3932         /* Handle --qt.  It's preferrable to handle this facility here rather
3933            than through an option --language=C++/Qt because the latter would
3934            conflict with the language "C++" regarding the file extensions.  */
3935         if (recognize_format_qt && strcmp (tp->name, "C++") == 0)
3936           {
3937             result.flag_table = &flag_table_cxx_qt;
3938             result.formatstring_parser2 = &formatstring_qt;
3939             result.formatstring_parser3 = &formatstring_qt_plural;
3940           }
3941         /* Likewise for --kde.  */
3942         if (recognize_format_kde && strcmp (tp->name, "C++") == 0)
3943           {
3944             result.flag_table = &flag_table_cxx_kde;
3945             result.formatstring_parser2 = &formatstring_kde;
3946             result.formatstring_parser3 = &formatstring_kde_kuit;
3947           }
3948         /* Likewise for --boost.  */
3949         if (recognize_format_boost && strcmp (tp->name, "C++") == 0)
3950           {
3951             result.flag_table = &flag_table_cxx_boost;
3952             result.formatstring_parser2 = &formatstring_boost;
3953           }
3954
3955         return result;
3956       }
3957
3958   error (EXIT_FAILURE, 0, _("language '%s' unknown"), name);
3959   /* NOTREACHED */
3960   {
3961     extractor_ty result = { NULL, NULL, NULL, NULL };
3962     return result;
3963   }
3964 }
3965
3966
3967 static const char *
3968 extension_to_language (const char *extension)
3969 {
3970   struct table_ty
3971   {
3972     const char *extension;
3973     const char *language;
3974   };
3975   typedef struct table_ty table_ty;
3976
3977   static table_ty table[] =
3978   {
3979     EXTENSIONS_C
3980     EXTENSIONS_PO
3981     EXTENSIONS_SH
3982     EXTENSIONS_PYTHON
3983     EXTENSIONS_LISP
3984     EXTENSIONS_ELISP
3985     EXTENSIONS_LIBREP
3986     EXTENSIONS_SCHEME
3987     EXTENSIONS_SMALLTALK
3988     EXTENSIONS_JAVA
3989     EXTENSIONS_PROPERTIES
3990     EXTENSIONS_CSHARP
3991     EXTENSIONS_AWK
3992     EXTENSIONS_YCP
3993     EXTENSIONS_TCL
3994     EXTENSIONS_PERL
3995     EXTENSIONS_PHP
3996     EXTENSIONS_STRINGTABLE
3997     EXTENSIONS_RST
3998     EXTENSIONS_GLADE
3999     EXTENSIONS_LUA
4000     EXTENSIONS_JAVASCRIPT
4001     EXTENSIONS_VALA
4002     EXTENSIONS_GSETTINGS
4003     EXTENSIONS_DESKTOP
4004     EXTENSIONS_APPDATA
4005     /* Here may follow more file extensions... */
4006   };
4007
4008   table_ty *tp;
4009
4010   for (tp = table; tp < ENDOF(table); ++tp)
4011     if (strcmp (extension, tp->extension) == 0)
4012       return tp->language;
4013   return NULL;
4014 }