gettext-tools/src/xgettext.c

   1 /* Extracts strings from C source file to Uniforum style .po file.
   2    Copyright (C) 1995-1998, 2000-2012 Free Software Foundation, Inc.
   3    Written by Ulrich Drepper <drepper@gnu.ai.mit.edu>, April 1995.
   4
   5    This program is free software: you can redistribute it and/or modify
   6    it under the terms of the GNU General Public License as published by
   7    the Free Software Foundation; either version 3 of the License, or
   8    (at your option) any later version.
   9
  10    This program is distributed in the hope that it will be useful,
  11    but WITHOUT ANY WARRANTY; without even the implied warranty of
  12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  13    GNU General Public License for more details.
  14
  15    You should have received a copy of the GNU General Public License
  16    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  17
  18 #ifdef HAVE_CONFIG_H
  19 # include <config.h>
  20 #endif
  21 #include <alloca.h>
  22
  23 #include <ctype.h>
  24 #include <errno.h>
  25 #include <getopt.h>
  26 #include <stdio.h>
  27 #include <time.h>
  28 #include <stdlib.h>
  29 #include <stdbool.h>
  30 #include <string.h>
  31 #include <locale.h>
  32 #include <limits.h>
  33
  34 #include "xgettext.h"
  35 #include "closeout.h"
  36 #include "dir-list.h"
  37 #include "file-list.h"
  38 #include "str-list.h"
  39 #include "error.h"
  40 #include "error-progname.h"
  41 #include "progname.h"
  42 #include "relocatable.h"
  43 #include "basename.h"
  44 #include "xerror.h"
  45 #include "xvasprintf.h"
  46 #include "xsize.h"
  47 #include "xalloc.h"
  48 #include "xmalloca.h"
  49 #include "c-strstr.h"
  50 #include "xerror.h"
  51 #include "filename.h"
  52 #include "concat-filename.h"
  53 #include "c-strcase.h"
  54 #include "open-catalog.h"
  55 #include "read-catalog-abstract.h"
  56 #include "read-po.h"
  57 #include "message.h"
  58 #include "po-charset.h"
  59 #include "msgl-iconv.h"
  60 #include "msgl-ascii.h"
  61 #include "po-time.h"
  62 #include "write-catalog.h"
  63 #include "write-po.h"
  64 #include "write-properties.h"
  65 #include "write-stringtable.h"
  66 #include "color.h"
  67 #include "format.h"
  68 #include "propername.h"
  69 #include "gettext.h"
  70
  71 /* A convenience macro.  I don't like writing gettext() every time.  */
  72 #define _(str) gettext (str)
  73
  74
  75 #include "x-c.h"
  76 #include "x-po.h"
  77 #include "x-sh.h"
  78 #include "x-python.h"
  79 #include "x-lisp.h"
  80 #include "x-elisp.h"
  81 #include "x-librep.h"
  82 #include "x-scheme.h"
  83 #include "x-smalltalk.h"
  84 #include "x-java.h"
  85 #include "x-properties.h"
  86 #include "x-csharp.h"
  87 #include "x-awk.h"
  88 #include "x-ycp.h"
  89 #include "x-tcl.h"
  90 #include "x-perl.h"
  91 #include "x-php.h"
  92 #include "x-stringtable.h"
  93 #include "x-rst.h"
  94 #include "x-glade.h"
  95 #include "x-lua.h"
  96 #include "x-javascript.h"
  97 #include "x-vala.h"
  98
  99
 100 /* If nonzero add all comments immediately preceding one of the keywords. */
 101 static bool add_all_comments = false;
 102
 103 /* Tag used in comment of prevailing domain.  */
 104 static char *comment_tag;
 105
 106 /* Name of default domain file.  If not set defaults to messages.po.  */
 107 static const char *default_domain;
 108
 109 /* If called with --debug option the output reflects whether format
 110    string recognition is done automatically or forced by the user.  */
 111 static int do_debug;
 112
 113 /* Content of .po files with symbols to be excluded.  */
 114 message_list_ty *exclude;
 115
 116 /* Force output of PO file even if empty.  */
 117 static int force_po;
 118
 119 /* Copyright holder of the output file and the translations.  */
 120 static const char *copyright_holder = "THE PACKAGE'S COPYRIGHT HOLDER";
 121
 122 /* Package name.  */
 123 static const char *package_name = NULL;
 124
 125 /* Package version.  */
 126 static const char *package_version = NULL;
 127
 128 /* Email address or URL for reports of bugs in msgids.  */
 129 static const char *msgid_bugs_address = NULL;
 130
 131 /* String used as prefix for msgstr.  */
 132 static const char *msgstr_prefix;
 133
 134 /* String used as suffix for msgstr.  */
 135 static const char *msgstr_suffix;
 136
 137 /* Directory in which output files are created.  */
 138 static char *output_dir;
 139
 140 /* The output syntax: .pot or .properties or .strings.  */
 141 static catalog_output_format_ty output_syntax = &output_format_po;
 142
 143 /* If nonzero omit header with information about this run.  */
 144 int xgettext_omit_header;
 145
 146 /* Table of flag_context_list_ty tables.  */
 147 static flag_context_list_table_ty flag_table_c;
 148 static flag_context_list_table_ty flag_table_cxx_qt;
 149 static flag_context_list_table_ty flag_table_cxx_kde;
 150 static flag_context_list_table_ty flag_table_cxx_boost;
 151 static flag_context_list_table_ty flag_table_objc;
 152 static flag_context_list_table_ty flag_table_gcc_internal;
 153 static flag_context_list_table_ty flag_table_sh;
 154 static flag_context_list_table_ty flag_table_python;
 155 static flag_context_list_table_ty flag_table_lisp;
 156 static flag_context_list_table_ty flag_table_elisp;
 157 static flag_context_list_table_ty flag_table_librep;
 158 static flag_context_list_table_ty flag_table_scheme;
 159 static flag_context_list_table_ty flag_table_java;
 160 static flag_context_list_table_ty flag_table_csharp;
 161 static flag_context_list_table_ty flag_table_awk;
 162 static flag_context_list_table_ty flag_table_ycp;
 163 static flag_context_list_table_ty flag_table_tcl;
 164 static flag_context_list_table_ty flag_table_perl;
 165 static flag_context_list_table_ty flag_table_php;
 166 static flag_context_list_table_ty flag_table_lua;
 167 static flag_context_list_table_ty flag_table_javascript;
 168 static flag_context_list_table_ty flag_table_vala;
 169
 170 /* If true, recognize Qt format strings.  */
 171 static bool recognize_format_qt;
 172
 173 /* If true, recognize KDE format strings.  */
 174 static bool recognize_format_kde;
 175
 176 /* If true, recognize Boost format strings.  */
 177 static bool recognize_format_boost;
 178
 179 /* Canonicalized encoding name for all input files.  */
 180 const char *xgettext_global_source_encoding;
 181
 182 #if HAVE_ICONV
 183 /* Converter from xgettext_global_source_encoding to UTF-8 (except from
 184    ASCII or UTF-8, when this conversion is a no-op).  */
 185 iconv_t xgettext_global_source_iconv;
 186 #endif
 187
 188 /* Canonicalized encoding name for the current input file.  */
 189 const char *xgettext_current_source_encoding;
 190
 191 #if HAVE_ICONV
 192 /* Converter from xgettext_current_source_encoding to UTF-8 (except from
 193    ASCII or UTF-8, when this conversion is a no-op).  */
 194 iconv_t xgettext_current_source_iconv;
 195 #endif
 196
 197 /* Long options.  */
 198 static const struct option long_options[] =
 199 {
 200   { "add-comments", optional_argument, NULL, 'c' },
 201   { "add-location", no_argument, &line_comment, 1 },
 202   { "boost", no_argument, NULL, CHAR_MAX + 11 },
 203   { "c++", no_argument, NULL, 'C' },
 204   { "color", optional_argument, NULL, CHAR_MAX + 14 },
 205   { "copyright-holder", required_argument, NULL, CHAR_MAX + 1 },
 206   { "debug", no_argument, &do_debug, 1 },
 207   { "default-domain", required_argument, NULL, 'd' },
 208   { "directory", required_argument, NULL, 'D' },
 209   { "escape", no_argument, NULL, 'E' },
 210   { "exclude-file", required_argument, NULL, 'x' },
 211   { "extract-all", no_argument, NULL, 'a' },
 212   { "files-from", required_argument, NULL, 'f' },
 213   { "flag", required_argument, NULL, CHAR_MAX + 8 },
 214   { "force-po", no_argument, &force_po, 1 },
 215   { "foreign-user", no_argument, NULL, CHAR_MAX + 2 },
 216   { "from-code", required_argument, NULL, CHAR_MAX + 3 },
 217   { "help", no_argument, NULL, 'h' },
 218   { "indent", no_argument, NULL, 'i' },
 219   { "join-existing", no_argument, NULL, 'j' },
 220   { "kde", no_argument, NULL, CHAR_MAX + 10 },
 221   { "keyword", optional_argument, NULL, 'k' },
 222   { "language", required_argument, NULL, 'L' },
 223   { "msgid-bugs-address", required_argument, NULL, CHAR_MAX + 5 },
 224   { "msgstr-prefix", optional_argument, NULL, 'm' },
 225   { "msgstr-suffix", optional_argument, NULL, 'M' },
 226   { "no-escape", no_argument, NULL, 'e' },
 227   { "no-location", no_argument, &line_comment, 0 },
 228   { "no-wrap", no_argument, NULL, CHAR_MAX + 4 },
 229   { "omit-header", no_argument, &xgettext_omit_header, 1 },
 230   { "output", required_argument, NULL, 'o' },
 231   { "output-dir", required_argument, NULL, 'p' },
 232   { "package-name", required_argument, NULL, CHAR_MAX + 12 },
 233   { "package-version", required_argument, NULL, CHAR_MAX + 13 },
 234   { "properties-output", no_argument, NULL, CHAR_MAX + 6 },
 235   { "qt", no_argument, NULL, CHAR_MAX + 9 },
 236   { "sort-by-file", no_argument, NULL, 'F' },
 237   { "sort-output", no_argument, NULL, 's' },
 238   { "strict", no_argument, NULL, 'S' },
 239   { "string-limit", required_argument, NULL, 'l' },
 240   { "stringtable-output", no_argument, NULL, CHAR_MAX + 7 },
 241   { "style", required_argument, NULL, CHAR_MAX + 15 },
 242   { "trigraphs", no_argument, NULL, 'T' },
 243   { "version", no_argument, NULL, 'V' },
 244   { "width", required_argument, NULL, 'w', },
 245   { NULL, 0, NULL, 0 }
 246 };
 247
 248
 249 /* The extractors must all be functions returning void and taking three
 250    arguments designating the input stream and one message domain list argument
 251    in which to add the messages.  */
 252 typedef void (*extractor_func) (FILE *fp, const char *real_filename,
 253                                 const char *logical_filename,
 254                                 flag_context_list_table_ty *flag_table,
 255                                 msgdomain_list_ty *mdlp);
 256
 257 typedef struct extractor_ty extractor_ty;
 258 struct extractor_ty
 259 {
 260   extractor_func func;
 261   flag_context_list_table_ty *flag_table;
 262   struct formatstring_parser *formatstring_parser1;
 263   struct formatstring_parser *formatstring_parser2;
 264   struct formatstring_parser *formatstring_parser3;
 265 };
 266
 267
 268 /* Forward declaration of local functions.  */
 269 static void usage (int status)
 270 #if defined __GNUC__ && ((__GNUC__ == 2 && __GNUC_MINOR__ > 4) || __GNUC__ > 2)
 271         __attribute__ ((noreturn))
 272 #endif
 273 ;
 274 static void read_exclusion_file (char *file_name);
 275 static void extract_from_file (const char *file_name, extractor_ty extractor,
 276                                msgdomain_list_ty *mdlp);
 277 static message_ty *construct_header (void);
 278 static void finalize_header (msgdomain_list_ty *mdlp);
 279 static extractor_ty language_to_extractor (const char *name);
 280 static const char *extension_to_language (const char *extension);
 281
 282
 283 int
 284 main (int argc, char *argv[])
 285 {
 286   int optchar;
 287   bool do_help = false;
 288   bool do_version = false;
 289   msgdomain_list_ty *mdlp;
 290   bool join_existing = false;
 291   bool no_default_keywords = false;
 292   bool some_additional_keywords = false;
 293   bool sort_by_msgid = false;
 294   bool sort_by_filepos = false;
 295   const char *file_name;
 296   const char *files_from = NULL;
 297   string_list_ty *file_list;
 298   char *output_file = NULL;
 299   const char *language = NULL;
 300   extractor_ty extractor = { NULL, NULL, NULL, NULL };
 301   int cnt;
 302   size_t i;
 303
 304   /* Set program name for messages.  */
 305   set_program_name (argv[0]);
 306   error_print_progname = maybe_print_progname;
 307
 308 #ifdef HAVE_SETLOCALE
 309   /* Set locale via LC_ALL.  */
 310   setlocale (LC_ALL, "");
 311 #endif
 312
 313   /* Set the text message domain.  */
 314   bindtextdomain (PACKAGE, relocate (LOCALEDIR));
 315   bindtextdomain ("bison-runtime", relocate (BISON_LOCALEDIR));
 316   textdomain (PACKAGE);
 317
 318   /* Ensure that write errors on stdout are detected.  */
 319   atexit (close_stdout);
 320
 321   /* Set initial value of variables.  */
 322   default_domain = MESSAGE_DOMAIN_DEFAULT;
 323   xgettext_global_source_encoding = po_charset_ascii;
 324   init_flag_table_c ();
 325   init_flag_table_objc ();
 326   init_flag_table_gcc_internal ();
 327   init_flag_table_sh ();
 328   init_flag_table_python ();
 329   init_flag_table_lisp ();
 330   init_flag_table_elisp ();
 331   init_flag_table_librep ();
 332   init_flag_table_scheme ();
 333   init_flag_table_java ();
 334   init_flag_table_csharp ();
 335   init_flag_table_awk ();
 336   init_flag_table_ycp ();
 337   init_flag_table_tcl ();
 338   init_flag_table_perl ();
 339   init_flag_table_php ();
 340   init_flag_table_lua ();
 341   init_flag_table_javascript ();
 342   init_flag_table_vala ();
 343
 344   while ((optchar = getopt_long (argc, argv,
 345                                  "ac::Cd:D:eEf:Fhijk::l:L:m::M::no:p:sTVw:x:",
 346                                  long_options, NULL)) != EOF)
 347     switch (optchar)
 348       {
 349       case '\0':                /* Long option.  */
 350         break;
 351
 352       case 'a':
 353         x_c_extract_all ();
 354         x_sh_extract_all ();
 355         x_python_extract_all ();
 356         x_lisp_extract_all ();
 357         x_elisp_extract_all ();
 358         x_librep_extract_all ();
 359         x_scheme_extract_all ();
 360         x_java_extract_all ();
 361         x_csharp_extract_all ();
 362         x_awk_extract_all ();
 363         x_tcl_extract_all ();
 364         x_perl_extract_all ();
 365         x_php_extract_all ();
 366         x_glade_extract_all ();
 367         x_lua_extract_all ();
 368         x_javascript_extract_all ();
 369         x_vala_extract_all ();
 370         break;
 371
 372       case 'c':
 373         if (optarg == NULL)
 374           {
 375             add_all_comments = true;
 376             comment_tag = NULL;
 377           }
 378         else
 379           {
 380             add_all_comments = false;
 381             comment_tag = optarg;
 382             /* We ignore leading white space.  */
 383             while (isspace ((unsigned char) *comment_tag))
 384               ++comment_tag;
 385           }
 386         break;
 387
 388       case 'C':
 389         language = "C++";
 390         break;
 391
 392       case 'd':
 393         default_domain = optarg;
 394         break;
 395
 396       case 'D':
 397         dir_list_append (optarg);
 398         break;
 399
 400       case 'e':
 401         message_print_style_escape (false);
 402         break;
 403
 404       case 'E':
 405         message_print_style_escape (true);
 406         break;
 407
 408       case 'f':
 409         files_from = optarg;
 410         break;
 411
 412       case 'F':
 413         sort_by_filepos = true;
 414         break;
 415
 416       case 'h':
 417         do_help = true;
 418         break;
 419
 420       case 'i':
 421         message_print_style_indent ();
 422         break;
 423
 424       case 'j':
 425         join_existing = true;
 426         break;
 427
 428       case 'k':
 429         if (optarg != NULL && *optarg == '\0')
 430           /* Make "--keyword=" work like "--keyword" and "-k".  */
 431           optarg = NULL;
 432         x_c_keyword (optarg);
 433         x_objc_keyword (optarg);
 434         x_sh_keyword (optarg);
 435         x_python_keyword (optarg);
 436         x_lisp_keyword (optarg);
 437         x_elisp_keyword (optarg);
 438         x_librep_keyword (optarg);
 439         x_scheme_keyword (optarg);
 440         x_java_keyword (optarg);
 441         x_csharp_keyword (optarg);
 442         x_awk_keyword (optarg);
 443         x_tcl_keyword (optarg);
 444         x_perl_keyword (optarg);
 445         x_php_keyword (optarg);
 446         x_glade_keyword (optarg);
 447         x_lua_keyword (optarg);
 448         x_javascript_keyword (optarg);
 449         x_vala_keyword (optarg);
 450         if (optarg == NULL)
 451           no_default_keywords = true;
 452         else
 453           some_additional_keywords = true;
 454         break;
 455
 456       case 'l':
 457         /* Accepted for backward compatibility with 0.10.35.  */
 458         break;
 459
 460       case 'L':
 461         language = optarg;
 462         break;
 463
 464       case 'm':
 465         /* -m takes an optional argument.  If none is given "" is assumed. */
 466         msgstr_prefix = optarg == NULL ? "" : optarg;
 467         break;
 468
 469       case 'M':
 470         /* -M takes an optional argument.  If none is given "" is assumed. */
 471         msgstr_suffix = optarg == NULL ? "" : optarg;
 472         break;
 473
 474       case 'n':
 475         line_comment = 1;
 476         break;
 477
 478       case 'o':
 479         output_file = optarg;
 480         break;
 481
 482       case 'p':
 483         {
 484           size_t len = strlen (optarg);
 485
 486           if (output_dir != NULL)
 487             free (output_dir);
 488
 489           if (optarg[len - 1] == '/')
 490             output_dir = xstrdup (optarg);
 491           else
 492             output_dir = xasprintf ("%s/", optarg);
 493         }
 494         break;
 495
 496       case 's':
 497         sort_by_msgid = true;
 498         break;
 499
 500       case 'S':
 501         message_print_style_uniforum ();
 502         break;
 503
 504       case 'T':
 505         x_c_trigraphs ();
 506         break;
 507
 508       case 'V':
 509         do_version = true;
 510         break;
 511
 512       case 'w':
 513         {
 514           int value;
 515           char *endp;
 516           value = strtol (optarg, &endp, 10);
 517           if (endp != optarg)
 518             message_page_width_set (value);
 519         }
 520         break;
 521
 522       case 'x':
 523         read_exclusion_file (optarg);
 524         break;
 525
 526       case CHAR_MAX + 1:        /* --copyright-holder */
 527         copyright_holder = optarg;
 528         break;
 529
 530       case CHAR_MAX + 2:        /* --foreign-user */
 531         copyright_holder = "";
 532         break;
 533
 534       case CHAR_MAX + 3:        /* --from-code */
 535         xgettext_global_source_encoding = po_charset_canonicalize (optarg);
 536         if (xgettext_global_source_encoding == NULL)
 537           xgettext_global_source_encoding = po_charset_ascii;
 538         break;
 539
 540       case CHAR_MAX + 4:        /* --no-wrap */
 541         message_page_width_ignore ();
 542         break;
 543
 544       case CHAR_MAX + 5:        /* --msgid-bugs-address */
 545         msgid_bugs_address = optarg;
 546         break;
 547
 548       case CHAR_MAX + 6:        /* --properties-output */
 549         output_syntax = &output_format_properties;
 550         break;
 551
 552       case CHAR_MAX + 7:        /* --stringtable-output */
 553         output_syntax = &output_format_stringtable;
 554         break;
 555
 556       case CHAR_MAX + 8:        /* --flag */
 557         xgettext_record_flag (optarg);
 558         break;
 559
 560       case CHAR_MAX + 9:        /* --qt */
 561         recognize_format_qt = true;
 562         break;
 563
 564       case CHAR_MAX + 10:       /* --kde */
 565         recognize_format_kde = true;
 566         break;
 567
 568       case CHAR_MAX + 11:       /* --boost */
 569         recognize_format_boost = true;
 570         break;
 571
 572       case CHAR_MAX + 12:       /* --package-name */
 573         package_name = optarg;
 574         break;
 575
 576       case CHAR_MAX + 13:       /* --package-version */
 577         package_version = optarg;
 578         break;
 579
 580       case CHAR_MAX + 14: /* --color */
 581         if (handle_color_option (optarg) || color_test_mode)
 582           usage (EXIT_FAILURE);
 583         break;
 584
 585       case CHAR_MAX + 15: /* --style */
 586         handle_style_option (optarg);
 587         break;
 588
 589       default:
 590         usage (EXIT_FAILURE);
 591         /* NOTREACHED */
 592       }
 593
 594   /* Version information requested.  */
 595   if (do_version)
 596     {
 597       printf ("%s (GNU %s) %s\n", basename (program_name), PACKAGE, VERSION);
 598       /* xgettext: no-wrap */
 599       printf (_("Copyright (C) %s Free Software Foundation, Inc.\n\
 600 License GPLv3+: GNU GPL version 3 or later <http://gnu.org/licenses/gpl.html>\n\
 601 This is free software: you are free to change and redistribute it.\n\
 602 There is NO WARRANTY, to the extent permitted by law.\n\
 603 "),
 604               "1995-1998, 2000-2013");
 605       printf (_("Written by %s.\n"), proper_name ("Ulrich Drepper"));
 606       exit (EXIT_SUCCESS);
 607     }
 608
 609   /* Help is requested.  */
 610   if (do_help)
 611     usage (EXIT_SUCCESS);
 612
 613   /* Verify selected options.  */
 614   if (!line_comment && sort_by_filepos)
 615     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 616            "--no-location", "--sort-by-file");
 617
 618   if (sort_by_msgid && sort_by_filepos)
 619     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 620            "--sort-output", "--sort-by-file");
 621
 622   /* We cannot support both Qt and KDE, or Qt and Boost, or KDE and Boost
 623      format strings, because there are only two formatstring parsers per
 624      language, and formatstring_c is the first one for C++.  */
 625   if (recognize_format_qt && recognize_format_kde)
 626     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 627            "--qt", "--kde");
 628   if (recognize_format_qt && recognize_format_boost)
 629     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 630            "--qt", "--boost");
 631   if (recognize_format_kde && recognize_format_boost)
 632     error (EXIT_FAILURE, 0, _("%s and %s are mutually exclusive"),
 633            "--kde", "--boost");
 634
 635   if (join_existing && strcmp (default_domain, "-") == 0)
 636     error (EXIT_FAILURE, 0, _("\
 637 --join-existing cannot be used when output is written to stdout"));
 638
 639   if (no_default_keywords && !some_additional_keywords)
 640     {
 641       error (0, 0, _("\
 642 xgettext cannot work without keywords to look for"));
 643       usage (EXIT_FAILURE);
 644     }
 645
 646   /* Test whether we have some input files given.  */
 647   if (files_from == NULL && optind >= argc)
 648     {
 649       error (EXIT_SUCCESS, 0, _("no input file given"));
 650       usage (EXIT_FAILURE);
 651     }
 652
 653   /* Determine extractor from language.  */
 654   if (language != NULL)
 655     extractor = language_to_extractor (language);
 656
 657   /* Canonize msgstr prefix/suffix.  */
 658   if (msgstr_prefix != NULL && msgstr_suffix == NULL)
 659     msgstr_suffix = "";
 660   else if (msgstr_prefix == NULL && msgstr_suffix != NULL)
 661     msgstr_prefix = "";
 662
 663   /* Default output directory is the current directory.  */
 664   if (output_dir == NULL)
 665     output_dir = ".";
 666
 667   /* Construct the name of the output file.  If the default domain has
 668      the special name "-" we write to stdout.  */
 669   if (output_file)
 670     {
 671       if (IS_ABSOLUTE_PATH (output_file) || strcmp (output_file, "-") == 0)
 672         file_name = xstrdup (output_file);
 673       else
 674         /* Please do NOT add a .po suffix! */
 675         file_name = xconcatenated_filename (output_dir, output_file, NULL);
 676     }
 677   else if (strcmp (default_domain, "-") == 0)
 678     file_name = "-";
 679   else
 680     file_name = xconcatenated_filename (output_dir, default_domain, ".po");
 681
 682   /* Determine list of files we have to process.  */
 683   if (files_from != NULL)
 684     file_list = read_names_from_file (files_from);
 685   else
 686     file_list = string_list_alloc ();
 687   /* Append names from command line.  */
 688   for (cnt = optind; cnt < argc; ++cnt)
 689     string_list_append_unique (file_list, argv[cnt]);
 690
 691   /* Allocate converter from xgettext_global_source_encoding to UTF-8 (except
 692      from ASCII or UTF-8, when this conversion is a no-op).  */
 693   if (xgettext_global_source_encoding != po_charset_ascii
 694       && xgettext_global_source_encoding != po_charset_utf8)
 695     {
 696 #if HAVE_ICONV
 697       iconv_t cd;
 698
 699       /* Avoid glibc-2.1 bug with EUC-KR.  */
 700 # if ((__GLIBC__ == 2 && __GLIBC_MINOR__ <= 1) && !defined __UCLIBC__) \
 701      && !defined _LIBICONV_VERSION
 702       if (strcmp (xgettext_global_source_encoding, "EUC-KR") == 0)
 703         cd = (iconv_t)(-1);
 704       else
 705 # endif
 706       cd = iconv_open (po_charset_utf8, xgettext_global_source_encoding);
 707       if (cd == (iconv_t)(-1))
 708         error (EXIT_FAILURE, 0, _("\
 709 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(), \
 710 and iconv() does not support this conversion."),
 711                xgettext_global_source_encoding, po_charset_utf8,
 712                basename (program_name));
 713       xgettext_global_source_iconv = cd;
 714 #else
 715       error (EXIT_FAILURE, 0, _("\
 716 Cannot convert from \"%s\" to \"%s\". %s relies on iconv(). \
 717 This version was built without iconv()."),
 718              xgettext_global_source_encoding, po_charset_utf8,
 719              basename (program_name));
 720 #endif
 721     }
 722
 723   /* Allocate a message list to remember all the messages.  */
 724   mdlp = msgdomain_list_alloc (true);
 725
 726   /* Generate a header, so that we know how and when this PO file was
 727      created.  */
 728   if (!xgettext_omit_header)
 729     message_list_append (mdlp->item[0]->messages, construct_header ());
 730
 731   /* Read in the old messages, so that we can add to them.  */
 732   if (join_existing)
 733     {
 734       /* Temporarily reset the directory list to empty, because file_name
 735          is an output file and therefore should not be searched for.  */
 736       void *saved_directory_list = dir_list_save_reset ();
 737       extractor_ty po_extractor = { extract_po, NULL, NULL, NULL };
 738
 739       extract_from_file (file_name, po_extractor, mdlp);
 740       if (!is_ascii_msgdomain_list (mdlp))
 741         mdlp = iconv_msgdomain_list (mdlp, "UTF-8", true, file_name);
 742
 743       dir_list_restore (saved_directory_list);
 744     }
 745
 746   /* Process all input files.  */
 747   for (i = 0; i < file_list->nitems; i++)
 748     {
 749       const char *filename;
 750       extractor_ty this_file_extractor;
 751
 752       filename = file_list->item[i];
 753
 754       if (extractor.func)
 755         this_file_extractor = extractor;
 756       else
 757         {
 758           const char *base;
 759           char *reduced;
 760           const char *extension;
 761           const char *language;
 762
 763           base = strrchr (filename, '/');
 764           if (!base)
 765             base = filename;
 766
 767           reduced = xstrdup (base);
 768           /* Remove a trailing ".in" - it's a generic suffix.  */
 769           if (strlen (reduced) >= 3
 770               && memcmp (reduced + strlen (reduced) - 3, ".in", 3) == 0)
 771             reduced[strlen (reduced) - 3] = '\0';
 772
 773           /* Work out what the file extension is.  */
 774           extension = strrchr (reduced, '.');
 775           if (extension)
 776             ++extension;
 777           else
 778             extension = "";
 779
 780           /* Derive the language from the extension, and the extractor
 781              function from the language.  */
 782           language = extension_to_language (extension);
 783           if (language == NULL)
 784             {
 785               error (0, 0, _("\
 786 warning: file '%s' extension '%s' is unknown; will try C"), filename, extension);
 787               language = "C";
 788             }
 789           this_file_extractor = language_to_extractor (language);
 790
 791           free (reduced);
 792         }
 793
 794       /* Extract the strings from the file.  */
 795       extract_from_file (filename, this_file_extractor, mdlp);
 796     }
 797   string_list_free (file_list);
 798
 799   /* Finalize the constructed header.  */
 800   if (!xgettext_omit_header)
 801     finalize_header (mdlp);
 802
 803   /* Free the allocated converter.  */
 804 #if HAVE_ICONV
 805   if (xgettext_global_source_encoding != po_charset_ascii
 806       && xgettext_global_source_encoding != po_charset_utf8)
 807     iconv_close (xgettext_global_source_iconv);
 808 #endif
 809
 810   /* Sorting the list of messages.  */
 811   if (sort_by_filepos)
 812     msgdomain_list_sort_by_filepos (mdlp);
 813   else if (sort_by_msgid)
 814     msgdomain_list_sort_by_msgid (mdlp);
 815
 816   /* Write the PO file.  */
 817   msgdomain_list_print (mdlp, file_name, output_syntax, force_po, do_debug);
 818
 819   exit (EXIT_SUCCESS);
 820 }
 821
 822
 823 /* Display usage information and exit.  */
 824 static void
 825 usage (int status)
 826 {
 827   if (status != EXIT_SUCCESS)
 828     fprintf (stderr, _("Try '%s --help' for more information.\n"),
 829              program_name);
 830   else
 831     {
 832       printf (_("\
 833 Usage: %s [OPTION] [INPUTFILE]...\n\
 834 "), program_name);
 835       printf ("\n");
 836       printf (_("\
 837 Extract translatable strings from given input files.\n\
 838 "));
 839       printf ("\n");
 840       /* xgettext: no-wrap */
 841       printf (_("\
 842 Mandatory arguments to long options are mandatory for short options too.\n\
 843 Similarly for optional arguments.\n\
 844 "));
 845       printf ("\n");
 846       printf (_("\
 847 Input file location:\n"));
 848       printf (_("\
 849   INPUTFILE ...               input files\n"));
 850       printf (_("\
 851   -f, --files-from=FILE       get list of input files from FILE\n"));
 852       printf (_("\
 853   -D, --directory=DIRECTORY   add DIRECTORY to list for input files search\n"));
 854       printf (_("\
 855 If input file is -, standard input is read.\n"));
 856       printf ("\n");
 857       printf (_("\
 858 Output file location:\n"));
 859       printf (_("\
 860   -d, --default-domain=NAME   use NAME.po for output (instead of messages.po)\n"));
 861       printf (_("\
 862   -o, --output=FILE           write output to specified file\n"));
 863       printf (_("\
 864   -p, --output-dir=DIR        output files will be placed in directory DIR\n"));
 865       printf (_("\
 866 If output file is -, output is written to standard output.\n"));
 867       printf ("\n");
 868       printf (_("\
 869 Choice of input file language:\n"));
 870       printf (_("\
 871   -L, --language=NAME         recognise the specified language\n\
 872                                 (C, C++, ObjectiveC, PO, Shell, Python, Lisp,\n\
 873                                 EmacsLisp, librep, Scheme, Smalltalk, Java,\n\
 874                                 JavaProperties, C#, awk, YCP, Tcl, Perl, PHP,\n\
 875                                 GCC-source, NXStringTable, RST, Glade, Lua,\n\
 876                                 JavaScript, Vala)\n"));
 877       printf (_("\
 878   -C, --c++                   shorthand for --language=C++\n"));
 879       printf (_("\
 880 By default the language is guessed depending on the input file name extension.\n"));
 881       printf ("\n");
 882       printf (_("\
 883 Input file interpretation:\n"));
 884       printf (_("\
 885       --from-code=NAME        encoding of input files\n\
 886                                 (except for Python, Tcl, Glade)\n"));
 887       printf (_("\
 888 By default the input files are assumed to be in ASCII.\n"));
 889       printf ("\n");
 890       printf (_("\
 891 Operation mode:\n"));
 892       printf (_("\
 893   -j, --join-existing         join messages with existing file\n"));
 894       printf (_("\
 895   -x, --exclude-file=FILE.po  entries from FILE.po are not extracted\n"));
 896       printf (_("\
 897   -cTAG, --add-comments=TAG   place comment blocks starting with TAG and\n\
 898                                 preceding keyword lines in output file\n\
 899   -c, --add-comments          place all comment blocks preceding keyword lines\n\
 900                                 in output file\n"));
 901       printf ("\n");
 902       printf (_("\
 903 Language specific options:\n"));
 904       printf (_("\
 905   -a, --extract-all           extract all strings\n"));
 906       printf (_("\
 907                                 (only languages C, C++, ObjectiveC, Shell,\n\
 908                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
 909                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
 910                                 Lua, JavaScript, Vala)\n"));
 911       printf (_("\
 912   -kWORD, --keyword=WORD      look for WORD as an additional keyword\n\
 913   -k, --keyword               do not to use default keywords\n"));
 914       printf (_("\
 915                                 (only languages C, C++, ObjectiveC, Shell,\n\
 916                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
 917                                 C#, awk, Tcl, Perl, PHP, GCC-source, Glade,\n\
 918                                 Lua, JavaScript, Vala)\n"));
 919       printf (_("\
 920       --flag=WORD:ARG:FLAG    additional flag for strings inside the argument\n\
 921                               number ARG of keyword WORD\n"));
 922       printf (_("\
 923                                 (only languages C, C++, ObjectiveC, Shell,\n\
 924                                 Python, Lisp, EmacsLisp, librep, Scheme, Java,\n\
 925                                 C#, awk, YCP, Tcl, Perl, PHP, GCC-source,\n\
 926                                 Lua, JavaScript, Vala)\n"));
 927       printf (_("\
 928   -T, --trigraphs             understand ANSI C trigraphs for input\n"));
 929       printf (_("\
 930                                 (only languages C, C++, ObjectiveC)\n"));
 931       printf (_("\
 932       --qt                    recognize Qt format strings\n"));
 933       printf (_("\
 934                                 (only language C++)\n"));
 935       printf (_("\
 936       --kde                   recognize KDE 4 format strings\n"));
 937       printf (_("\
 938                                 (only language C++)\n"));
 939       printf (_("\
 940       --boost                 recognize Boost format strings\n"));
 941       printf (_("\
 942                                 (only language C++)\n"));
 943       printf (_("\
 944       --debug                 more detailed formatstring recognition result\n"));
 945       printf ("\n");
 946       printf (_("\
 947 Output details:\n"));
 948       printf (_("\
 949       --color                 use colors and other text attributes always\n\
 950       --color=WHEN            use colors and other text attributes if WHEN.\n\
 951                               WHEN may be 'always', 'never', 'auto', or 'html'.\n"));
 952       printf (_("\
 953       --style=STYLEFILE       specify CSS style rule file for --color\n"));
 954       printf (_("\
 955   -e, --no-escape             do not use C escapes in output (default)\n"));
 956       printf (_("\
 957   -E, --escape                use C escapes in output, no extended chars\n"));
 958       printf (_("\
 959       --force-po              write PO file even if empty\n"));
 960       printf (_("\
 961   -i, --indent                write the .po file using indented style\n"));
 962       printf (_("\
 963       --no-location           do not write '#: filename:line' lines\n"));
 964       printf (_("\
 965   -n, --add-location          generate '#: filename:line' lines (default)\n"));
 966       printf (_("\
 967       --strict                write out strict Uniforum conforming .po file\n"));
 968       printf (_("\
 969       --properties-output     write out a Java .properties file\n"));
 970       printf (_("\
 971       --stringtable-output    write out a NeXTstep/GNUstep .strings file\n"));
 972       printf (_("\
 973   -w, --width=NUMBER          set output page width\n"));
 974       printf (_("\
 975       --no-wrap               do not break long message lines, longer than\n\
 976                               the output page width, into several lines\n"));
 977       printf (_("\
 978   -s, --sort-output           generate sorted output\n"));
 979       printf (_("\
 980   -F, --sort-by-file          sort output by file location\n"));
 981       printf (_("\
 982       --omit-header           don't write header with 'msgid \"\"' entry\n"));
 983       printf (_("\
 984       --copyright-holder=STRING  set copyright holder in output\n"));
 985       printf (_("\
 986       --foreign-user          omit FSF copyright in output for foreign user\n"));
 987       printf (_("\
 988       --package-name=PACKAGE  set package name in output\n"));
 989       printf (_("\
 990       --package-version=VERSION  set package version in output\n"));
 991       printf (_("\
 992       --msgid-bugs-address=EMAIL@ADDRESS  set report address for msgid bugs\n"));
 993       printf (_("\
 994   -m[STRING], --msgstr-prefix[=STRING]  use STRING or \"\" as prefix for msgstr\n\
 995                                 values\n"));
 996       printf (_("\
 997   -M[STRING], --msgstr-suffix[=STRING]  use STRING or \"\" as suffix for msgstr\n\
 998                                 values\n"));
 999       printf ("\n");
1000       printf (_("\
1001 Informative output:\n"));
1002       printf (_("\
1003   -h, --help                  display this help and exit\n"));
1004       printf (_("\
1005   -V, --version               output version information and exit\n"));
1006       printf ("\n");
1007       /* TRANSLATORS: The placeholder indicates the bug-reporting address
1008          for this package.  Please add _another line_ saying
1009          "Report translation bugs to <...>\n" with the address for translation
1010          bugs (typically your translation team's web or email address).  */
1011       fputs (_("Report bugs to <bug-gnu-gettext@gnu.org>.\n"),
1012              stdout);
1013     }
1014
1015   exit (status);
1016 }
1017
1018
1019 static void
1020 exclude_directive_domain (abstract_catalog_reader_ty *pop, char *name)
1021 {
1022   po_gram_error_at_line (&gram_pos,
1023                          _("this file may not contain domain directives"));
1024 }
1025
1026
1027 static void
1028 exclude_directive_message (abstract_catalog_reader_ty *pop,
1029                            char *msgctxt,
1030                            char *msgid,
1031                            lex_pos_ty *msgid_pos,
1032                            char *msgid_plural,
1033                            char *msgstr, size_t msgstr_len,
1034                            lex_pos_ty *msgstr_pos,
1035                            char *prev_msgctxt,
1036                            char *prev_msgid,
1037                            char *prev_msgid_plural,
1038                            bool force_fuzzy, bool obsolete)
1039 {
1040   message_ty *mp;
1041
1042   /* See if this message ID has been seen before.  */
1043   if (exclude == NULL)
1044     exclude = message_list_alloc (true);
1045   mp = message_list_search (exclude, msgctxt, msgid);
1046   if (mp != NULL)
1047     free (msgid);
1048   else
1049     {
1050       mp = message_alloc (msgctxt, msgid, msgid_plural, "", 1, msgstr_pos);
1051       /* Do not free msgid.  */
1052       message_list_append (exclude, mp);
1053     }
1054
1055   /* All we care about is the msgid.  Throw the msgstr away.
1056      Don't even check for duplicate msgids.  */
1057   free (msgstr);
1058 }
1059
1060
1061 /* So that the one parser can be used for multiple programs, and also
1062    use good data hiding and encapsulation practices, an object
1063    oriented approach has been taken.  An object instance is allocated,
1064    and all actions resulting from the parse will be through
1065    invocations of method functions of that object.  */
1066
1067 static abstract_catalog_reader_class_ty exclude_methods =
1068 {
1069   sizeof (abstract_catalog_reader_ty),
1070   NULL, /* constructor */
1071   NULL, /* destructor */
1072   NULL, /* parse_brief */
1073   NULL, /* parse_debrief */
1074   exclude_directive_domain,
1075   exclude_directive_message,
1076   NULL, /* comment */
1077   NULL, /* comment_dot */
1078   NULL, /* comment_filepos */
1079   NULL, /* comment_special */
1080 };
1081
1082
1083 static void
1084 read_exclusion_file (char *filename)
1085 {
1086   char *real_filename;
1087   FILE *fp = open_catalog_file (filename, &real_filename, true);
1088   abstract_catalog_reader_ty *pop;
1089
1090   pop = catalog_reader_alloc (&exclude_methods);
1091   catalog_reader_parse (pop, fp, real_filename, filename, &input_format_po);
1092   catalog_reader_free (pop);
1093
1094   if (fp != stdin)
1095     fclose (fp);
1096 }
1097
1098
1099 void
1100 split_keywordspec (const char *spec,
1101                    const char **endp, struct callshape *shapep)
1102 {
1103   const char *p;
1104   int argnum1 = 0;
1105   int argnum2 = 0;
1106   int argnumc = 0;
1107   bool argnum1_glib_context = false;
1108   bool argnum2_glib_context = false;
1109   int argtotal = 0;
1110   string_list_ty xcomments;
1111
1112   string_list_init (&xcomments);
1113
1114   /* Start parsing from the end.  */
1115   p = spec + strlen (spec);
1116   while (p > spec)
1117     {
1118       if (isdigit ((unsigned char) p[-1])
1119           || ((p[-1] == 'c' || p[-1] == 'g' || p[-1] == 't')
1120               && p - 1 > spec && isdigit ((unsigned char) p[-2])))
1121         {
1122           bool contextp = (p[-1] == 'c');
1123           bool glibp = (p[-1] == 'g');
1124           bool totalp = (p[-1] == 't');
1125
1126           do
1127             p--;
1128           while (p > spec && isdigit ((unsigned char) p[-1]));
1129
1130           if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1131             {
1132               char *dummy;
1133               int arg = strtol (p, &dummy, 10);
1134
1135               if (contextp)
1136                 {
1137                   if (argnumc != 0)
1138                     /* Only one context argument can be given.  */
1139                     break;
1140                   argnumc = arg;
1141                 }
1142               else if (totalp)
1143                 {
1144                   if (argtotal != 0)
1145                     /* Only one total number of arguments can be given.  */
1146                     break;
1147                   argtotal = arg;
1148                 }
1149               else
1150                 {
1151                   if (argnum2 != 0)
1152                     /* At most two normal arguments can be given.  */
1153                     break;
1154                   argnum2 = argnum1;
1155                   argnum2_glib_context = argnum1_glib_context;
1156                   argnum1 = arg;
1157                   argnum1_glib_context = glibp;
1158                 }
1159             }
1160           else
1161             break;
1162         }
1163       else if (p[-1] == '"')
1164         {
1165           const char *xcomment_end;
1166
1167           p--;
1168           xcomment_end = p;
1169
1170           while (p > spec && p[-1] != '"')
1171             p--;
1172
1173           if (p > spec /* && p[-1] == '"' */)
1174             {
1175               const char *xcomment_start;
1176
1177               xcomment_start = p;
1178               p--;
1179               if (p > spec && (p[-1] == ',' || p[-1] == ':'))
1180                 {
1181                   size_t xcomment_len = xcomment_end - xcomment_start;
1182                   char *xcomment = XNMALLOC (xcomment_len + 1, char);
1183
1184                   memcpy (xcomment, xcomment_start, xcomment_len);
1185                   xcomment[xcomment_len] = '\0';
1186                   string_list_append (&xcomments, xcomment);
1187                 }
1188               else
1189                 break;
1190             }
1191           else
1192             break;
1193         }
1194       else
1195         break;
1196
1197       /* Here an element of the comma-separated list has been parsed.  */
1198       if (!(p > spec && (p[-1] == ',' || p[-1] == ':')))
1199         abort ();
1200       p--;
1201       if (*p == ':')
1202         {
1203           size_t i;
1204
1205           if (argnum1 == 0 && argnum2 == 0)
1206             /* At least one non-context argument must be given.  */
1207             break;
1208           if (argnumc != 0
1209               && (argnum1_glib_context || argnum2_glib_context))
1210             /* Incompatible ways to specify the context.  */
1211             break;
1212           *endp = p;
1213           shapep->argnum1 = (argnum1 > 0 ? argnum1 : 1);
1214           shapep->argnum2 = argnum2;
1215           shapep->argnumc = argnumc;
1216           shapep->argnum1_glib_context = argnum1_glib_context;
1217           shapep->argnum2_glib_context = argnum2_glib_context;
1218           shapep->argtotal = argtotal;
1219           /* Reverse the order of the xcomments.  */
1220           string_list_init (&shapep->xcomments);
1221           for (i = xcomments.nitems; i > 0; )
1222             string_list_append (&shapep->xcomments, xcomments.item[--i]);
1223           string_list_destroy (&xcomments);
1224           return;
1225         }
1226     }
1227
1228   /* Couldn't parse the desired syntax.  */
1229   *endp = spec + strlen (spec);
1230   shapep->argnum1 = 1;
1231   shapep->argnum2 = 0;
1232   shapep->argnumc = 0;
1233   shapep->argnum1_glib_context = false;
1234   shapep->argnum2_glib_context = false;
1235   shapep->argtotal = 0;
1236   string_list_init (&shapep->xcomments);
1237   string_list_destroy (&xcomments);
1238 }
1239
1240
1241 void
1242 insert_keyword_callshape (hash_table *table,
1243                           const char *keyword, size_t keyword_len,
1244                           const struct callshape *shape)
1245 {
1246   void *old_value;
1247
1248   if (hash_find_entry (table, keyword, keyword_len, &old_value))
1249     {
1250       /* Create a one-element 'struct callshapes'.  */
1251       struct callshapes *shapes = XMALLOC (struct callshapes);
1252       shapes->nshapes = 1;
1253       shapes->shapes[0] = *shape;
1254       keyword =
1255         (const char *) hash_insert_entry (table, keyword, keyword_len, shapes);
1256       if (keyword == NULL)
1257         abort ();
1258       shapes->keyword = keyword;
1259       shapes->keyword_len = keyword_len;
1260     }
1261   else
1262     {
1263       /* Found a 'struct callshapes'.  See whether it already contains the
1264          desired shape.  */
1265       struct callshapes *old_shapes = (struct callshapes *) old_value;
1266       bool found;
1267       size_t i;
1268
1269       found = false;
1270       for (i = 0; i < old_shapes->nshapes; i++)
1271         if (old_shapes->shapes[i].argnum1 == shape->argnum1
1272             && old_shapes->shapes[i].argnum2 == shape->argnum2
1273             && old_shapes->shapes[i].argnumc == shape->argnumc
1274             && old_shapes->shapes[i].argnum1_glib_context
1275                == shape->argnum1_glib_context
1276             && old_shapes->shapes[i].argnum2_glib_context
1277                == shape->argnum2_glib_context
1278             && old_shapes->shapes[i].argtotal == shape->argtotal)
1279           {
1280             old_shapes->shapes[i].xcomments = shape->xcomments;
1281             found = true;
1282             break;
1283           }
1284
1285       if (!found)
1286         {
1287           /* Replace the existing 'struct callshapes' with a new one.  */
1288           struct callshapes *shapes =
1289             (struct callshapes *)
1290             xmalloc (xsum (sizeof (struct callshapes),
1291                            xtimes (old_shapes->nshapes,
1292                                    sizeof (struct callshape))));
1293
1294           shapes->keyword = old_shapes->keyword;
1295           shapes->keyword_len = old_shapes->keyword_len;
1296           shapes->nshapes = old_shapes->nshapes + 1;
1297           for (i = 0; i < old_shapes->nshapes; i++)
1298             shapes->shapes[i] = old_shapes->shapes[i];
1299           shapes->shapes[i] = *shape;
1300           if (hash_set_value (table, keyword, keyword_len, shapes))
1301             abort ();
1302           free (old_shapes);
1303         }
1304     }
1305 }
1306
1307
1308 /* Null context.  */
1309 flag_context_ty null_context = { undecided, false, undecided, false };
1310
1311 /* Transparent context.  */
1312 flag_context_ty passthrough_context = { undecided, true, undecided, true };
1313
1314
1315 flag_context_ty
1316 inherited_context (flag_context_ty outer_context,
1317                    flag_context_ty modifier_context)
1318 {
1319   flag_context_ty result = modifier_context;
1320
1321   if (result.pass_format1)
1322     {
1323       result.is_format1 = outer_context.is_format1;
1324       result.pass_format1 = false;
1325     }
1326   if (result.pass_format2)
1327     {
1328       result.is_format2 = outer_context.is_format2;
1329       result.pass_format2 = false;
1330     }
1331   if (result.pass_format3)
1332     {
1333       result.is_format3 = outer_context.is_format3;
1334       result.pass_format3 = false;
1335     }
1336   return result;
1337 }
1338
1339
1340 /* Null context list iterator.  */
1341 flag_context_list_iterator_ty null_context_list_iterator = { 1, NULL };
1342
1343 /* Transparent context list iterator.  */
1344 static flag_context_list_ty passthrough_context_circular_list =
1345   {
1346     1,
1347     { undecided, true, undecided, true },
1348     &passthrough_context_circular_list
1349   };
1350 flag_context_list_iterator_ty passthrough_context_list_iterator =
1351   {
1352     1,
1353     &passthrough_context_circular_list
1354   };
1355
1356
1357 flag_context_list_iterator_ty
1358 flag_context_list_iterator (flag_context_list_ty *list)
1359 {
1360   flag_context_list_iterator_ty result;
1361
1362   result.argnum = 1;
1363   result.head = list;
1364   return result;
1365 }
1366
1367
1368 flag_context_ty
1369 flag_context_list_iterator_advance (flag_context_list_iterator_ty *iter)
1370 {
1371   if (iter->head == NULL)
1372     return null_context;
1373   if (iter->argnum == iter->head->argnum)
1374     {
1375       flag_context_ty result = iter->head->flags;
1376
1377       /* Special casing of circular list.  */
1378       if (iter->head != iter->head->next)
1379         {
1380           iter->head = iter->head->next;
1381           iter->argnum++;
1382         }
1383
1384       return result;
1385     }
1386   else
1387     {
1388       iter->argnum++;
1389       return null_context;
1390     }
1391 }
1392
1393
1394 flag_context_list_ty *
1395 flag_context_list_table_lookup (flag_context_list_table_ty *flag_table,
1396                                 const void *key, size_t keylen)
1397 {
1398   void *entry;
1399
1400   if (flag_table->table != NULL
1401       && hash_find_entry (flag_table, key, keylen, &entry) == 0)
1402     return (flag_context_list_ty *) entry;
1403   else
1404     return NULL;
1405 }
1406
1407
1408 static void
1409 flag_context_list_table_insert (flag_context_list_table_ty *table,
1410                                 unsigned int index,
1411                                 const char *name_start, const char *name_end,
1412                                 int argnum, enum is_format value, bool pass)
1413 {
1414   char *allocated_name = NULL;
1415
1416   if (table == &flag_table_lisp)
1417     {
1418       /* Convert NAME to upper case.  */
1419       size_t name_len = name_end - name_start;
1420       char *name = allocated_name = (char *) xmalloca (name_len);
1421       size_t i;
1422
1423       for (i = 0; i < name_len; i++)
1424         name[i] = (name_start[i] >= 'a' && name_start[i] <= 'z'
1425                    ? name_start[i] - 'a' + 'A'
1426                    : name_start[i]);
1427       name_start = name;
1428       name_end = name + name_len;
1429     }
1430   else if (table == &flag_table_tcl)
1431     {
1432       /* Remove redundant "::" prefix.  */
1433       if (name_end - name_start > 2
1434           && name_start[0] == ':' && name_start[1] == ':')
1435         name_start += 2;
1436     }
1437
1438   /* Insert the pair (VALUE, PASS) at INDEX in the element numbered ARGNUM
1439      of the list corresponding to NAME in the TABLE.  */
1440   if (table->table == NULL)
1441     hash_init (table, 100);
1442   {
1443     void *entry;
1444
1445     if (hash_find_entry (table, name_start, name_end - name_start, &entry) != 0)
1446       {
1447         /* Create new hash table entry.  */
1448         flag_context_list_ty *list = XMALLOC (flag_context_list_ty);
1449         list->argnum = argnum;
1450         memset (&list->flags, '\0', sizeof (list->flags));
1451         switch (index)
1452           {
1453           case 0:
1454             list->flags.is_format1 = value;
1455             list->flags.pass_format1 = pass;
1456             break;
1457           case 1:
1458             list->flags.is_format2 = value;
1459             list->flags.pass_format2 = pass;
1460             break;
1461           case 2:
1462             list->flags.is_format3 = value;
1463             list->flags.pass_format3 = pass;
1464             break;
1465           default:
1466             abort ();
1467           }
1468         list->next = NULL;
1469         hash_insert_entry (table, name_start, name_end - name_start, list);
1470       }
1471     else
1472       {
1473         flag_context_list_ty *list = (flag_context_list_ty *)entry;
1474         flag_context_list_ty **lastp = NULL;
1475         /* Invariant: list == (lastp != NULL ? *lastp : entry).  */
1476
1477         while (list != NULL && list->argnum < argnum)
1478           {
1479             lastp = &list->next;
1480             list = *lastp;
1481           }
1482         if (list != NULL && list->argnum == argnum)
1483           {
1484             /* Add this flag to the current argument number.  */
1485             switch (index)
1486               {
1487               case 0:
1488                 list->flags.is_format1 = value;
1489                 list->flags.pass_format1 = pass;
1490                 break;
1491               case 1:
1492                 list->flags.is_format2 = value;
1493                 list->flags.pass_format2 = pass;
1494                 break;
1495               case 2:
1496                 list->flags.is_format3 = value;
1497                 list->flags.pass_format3 = pass;
1498                 break;
1499               default:
1500                 abort ();
1501               }
1502           }
1503         else if (lastp != NULL)
1504           {
1505             /* Add a new list entry for this argument number.  */
1506             list = XMALLOC (flag_context_list_ty);
1507             list->argnum = argnum;
1508             memset (&list->flags, '\0', sizeof (list->flags));
1509             switch (index)
1510               {
1511               case 0:
1512                 list->flags.is_format1 = value;
1513                 list->flags.pass_format1 = pass;
1514                 break;
1515               case 1:
1516                 list->flags.is_format2 = value;
1517                 list->flags.pass_format2 = pass;
1518                 break;
1519               case 2:
1520                 list->flags.is_format3 = value;
1521                 list->flags.pass_format3 = pass;
1522                 break;
1523               default:
1524                 abort ();
1525               }
1526             list->next = *lastp;
1527             *lastp = list;
1528           }
1529         else
1530           {
1531             /* Add a new list entry for this argument number, at the beginning
1532                of the list.  Since we don't have an API for replacing the
1533                value of a key in the hash table, we have to copy the first
1534                list element.  */
1535             flag_context_list_ty *copy = XMALLOC (flag_context_list_ty);
1536             *copy = *list;
1537
1538             list->argnum = argnum;
1539             memset (&list->flags, '\0', sizeof (list->flags));
1540             switch (index)
1541               {
1542               case 0:
1543                 list->flags.is_format1 = value;
1544                 list->flags.pass_format1 = pass;
1545                 break;
1546               case 1:
1547                 list->flags.is_format2 = value;
1548                 list->flags.pass_format2 = pass;
1549                 break;
1550               case 2:
1551                 list->flags.is_format3 = value;
1552                 list->flags.pass_format3 = pass;
1553                 break;
1554               default:
1555                 abort ();
1556               }
1557             list->next = copy;
1558           }
1559       }
1560   }
1561
1562   if (allocated_name != NULL)
1563     freea (allocated_name);
1564 }
1565
1566
1567 void
1568 xgettext_record_flag (const char *optionstring)
1569 {
1570   /* Check the string has at least two colons.  (Colons in the name are
1571      allowed, needed for the Lisp and the Tcl backends.)  */
1572   const char *colon1;
1573   const char *colon2;
1574
1575   for (colon2 = optionstring + strlen (optionstring); ; )
1576     {
1577       if (colon2 == optionstring)
1578         goto err;
1579       colon2--;
1580       if (*colon2 == ':')
1581         break;
1582     }
1583   for (colon1 = colon2; ; )
1584     {
1585       if (colon1 == optionstring)
1586         goto err;
1587       colon1--;
1588       if (*colon1 == ':')
1589         break;
1590     }
1591   {
1592     const char *name_start = optionstring;
1593     const char *name_end = colon1;
1594     const char *argnum_start = colon1 + 1;
1595     const char *argnum_end = colon2;
1596     const char *flag = colon2 + 1;
1597     int argnum;
1598
1599     /* Check the parts' syntax.  */
1600     if (name_end == name_start)
1601       goto err;
1602     if (argnum_end == argnum_start)
1603       goto err;
1604     {
1605       char *endp;
1606       argnum = strtol (argnum_start, &endp, 10);
1607       if (endp != argnum_end)
1608         goto err;
1609     }
1610     if (argnum <= 0)
1611       goto err;
1612
1613     /* Analyze the flag part.  */
1614     {
1615       bool pass;
1616
1617       pass = false;
1618       if (strlen (flag) >= 5 && memcmp (flag, "pass-", 5) == 0)
1619         {
1620           pass = true;
1621           flag += 5;
1622         }
1623
1624       /* Unlike po_parse_comment_special(), we don't accept "fuzzy" or "wrap"
1625          here - it has no sense.  */
1626       if (strlen (flag) >= 7
1627           && memcmp (flag + strlen (flag) - 7, "-format", 7) == 0)
1628         {
1629           const char *p;
1630           size_t n;
1631           enum is_format value;
1632           size_t type;
1633
1634           p = flag;
1635           n = strlen (flag) - 7;
1636
1637           if (n >= 3 && memcmp (p, "no-", 3) == 0)
1638             {
1639               p += 3;
1640               n -= 3;
1641               value = no;
1642             }
1643           else if (n >= 9 && memcmp (p, "possible-", 9) == 0)
1644             {
1645               p += 9;
1646               n -= 9;
1647               value = possible;
1648             }
1649           else if (n >= 11 && memcmp (p, "impossible-", 11) == 0)
1650             {
1651               p += 11;
1652               n -= 11;
1653               value = impossible;
1654             }
1655           else
1656             value = yes_according_to_context;
1657
1658           for (type = 0; type < NFORMATS; type++)
1659             if (strlen (format_language[type]) == n
1660                 && memcmp (format_language[type], p, n) == 0)
1661               {
1662                 switch (type)
1663                   {
1664                   case format_c:
1665                     flag_context_list_table_insert (&flag_table_c, 0,
1666                                                     name_start, name_end,
1667                                                     argnum, value, pass);
1668                     flag_context_list_table_insert (&flag_table_cxx_qt, 0,
1669                                                     name_start, name_end,
1670                                                     argnum, value, pass);
1671                     flag_context_list_table_insert (&flag_table_cxx_kde, 0,
1672                                                     name_start, name_end,
1673                                                     argnum, value, pass);
1674                     flag_context_list_table_insert (&flag_table_cxx_boost, 0,
1675                                                     name_start, name_end,
1676                                                     argnum, value, pass);
1677                     flag_context_list_table_insert (&flag_table_objc, 0,
1678                                                     name_start, name_end,
1679                                                     argnum, value, pass);
1680                     break;
1681                   case format_objc:
1682                     flag_context_list_table_insert (&flag_table_objc, 1,
1683                                                     name_start, name_end,
1684                                                     argnum, value, pass);
1685                     break;
1686                   case format_sh:
1687                     flag_context_list_table_insert (&flag_table_sh, 0,
1688                                                     name_start, name_end,
1689                                                     argnum, value, pass);
1690                     break;
1691                   case format_python:
1692                     flag_context_list_table_insert (&flag_table_python, 0,
1693                                                     name_start, name_end,
1694                                                     argnum, value, pass);
1695                     break;
1696                   case format_python_brace:
1697                     flag_context_list_table_insert (&flag_table_python, 0,
1698                                                     name_start, name_end,
1699                                                     argnum, value, pass);
1700                     break;
1701                   case format_lisp:
1702                     flag_context_list_table_insert (&flag_table_lisp, 0,
1703                                                     name_start, name_end,
1704                                                     argnum, value, pass);
1705                     break;
1706                   case format_elisp:
1707                     flag_context_list_table_insert (&flag_table_elisp, 0,
1708                                                     name_start, name_end,
1709                                                     argnum, value, pass);
1710                     break;
1711                   case format_librep:
1712                     flag_context_list_table_insert (&flag_table_librep, 0,
1713                                                     name_start, name_end,
1714                                                     argnum, value, pass);
1715                     break;
1716                   case format_scheme:
1717                     flag_context_list_table_insert (&flag_table_scheme, 0,
1718                                                     name_start, name_end,
1719                                                     argnum, value, pass);
1720                     break;
1721                   case format_smalltalk:
1722                     break;
1723                   case format_java:
1724                     flag_context_list_table_insert (&flag_table_java, 0,
1725                                                     name_start, name_end,
1726                                                     argnum, value, pass);
1727                     break;
1728                   case format_csharp:
1729                     flag_context_list_table_insert (&flag_table_csharp, 0,
1730                                                     name_start, name_end,
1731                                                     argnum, value, pass);
1732                     break;
1733                   case format_awk:
1734                     flag_context_list_table_insert (&flag_table_awk, 0,
1735                                                     name_start, name_end,
1736                                                     argnum, value, pass);
1737                     break;
1738                   case format_pascal:
1739                     break;
1740                   case format_ycp:
1741                     flag_context_list_table_insert (&flag_table_ycp, 0,
1742                                                     name_start, name_end,
1743                                                     argnum, value, pass);
1744                     break;
1745                   case format_tcl:
1746                     flag_context_list_table_insert (&flag_table_tcl, 0,
1747                                                     name_start, name_end,
1748                                                     argnum, value, pass);
1749                     break;
1750                   case format_perl:
1751                     flag_context_list_table_insert (&flag_table_perl, 0,
1752                                                     name_start, name_end,
1753                                                     argnum, value, pass);
1754                     break;
1755                   case format_perl_brace:
1756                     flag_context_list_table_insert (&flag_table_perl, 1,
1757                                                     name_start, name_end,
1758                                                     argnum, value, pass);
1759                     break;
1760                   case format_php:
1761                     flag_context_list_table_insert (&flag_table_php, 0,
1762                                                     name_start, name_end,
1763                                                     argnum, value, pass);
1764                     break;
1765                   case format_gcc_internal:
1766                     flag_context_list_table_insert (&flag_table_gcc_internal, 0,
1767                                                     name_start, name_end,
1768                                                     argnum, value, pass);
1769                     break;
1770                   case format_gfc_internal:
1771                     flag_context_list_table_insert (&flag_table_gcc_internal, 1,
1772                                                     name_start, name_end,
1773                                                     argnum, value, pass);
1774                     break;
1775                   case format_qt:
1776                     flag_context_list_table_insert (&flag_table_cxx_qt, 1,
1777                                                     name_start, name_end,
1778                                                     argnum, value, pass);
1779                     break;
1780                   case format_qt_plural:
1781                     flag_context_list_table_insert (&flag_table_cxx_qt, 2,
1782                                                     name_start, name_end,
1783                                                     argnum, value, pass);
1784                     break;
1785                   case format_kde:
1786                     flag_context_list_table_insert (&flag_table_cxx_kde, 1,
1787                                                     name_start, name_end,
1788                                                     argnum, value, pass);
1789                     break;
1790                   case format_boost:
1791                     flag_context_list_table_insert (&flag_table_cxx_boost, 1,
1792                                                     name_start, name_end,
1793                                                     argnum, value, pass);
1794                     break;
1795                   case format_lua:
1796                     flag_context_list_table_insert (&flag_table_lua, 0,
1797                                                     name_start, name_end,
1798                                                     argnum, value, pass);
1799                     break;
1800                   case format_javascript:
1801                     flag_context_list_table_insert (&flag_table_javascript, 0,
1802                                                     name_start, name_end,
1803                                                     argnum, value, pass);
1804                     break;
1805                   default:
1806                     abort ();
1807                   }
1808                 return;
1809               }
1810           /* If the flag is not among the valid values, the optionstring is
1811              invalid.  */
1812         }
1813     }
1814   }
1815
1816 err:
1817   error (EXIT_FAILURE, 0, _("\
1818 A --flag argument doesn't have the <keyword>:<argnum>:[pass-]<flag> syntax: %s"),
1819          optionstring);
1820 }
1821
1822
1823 /* Comment handling: There is a list of automatic comments that may be appended
1824    to the next message.  Used by remember_a_message().  */
1825
1826 static string_list_ty *comment;
1827
1828 static void
1829 xgettext_comment_add (const char *str)
1830 {
1831   if (comment == NULL)
1832     comment = string_list_alloc ();
1833   string_list_append (comment, str);
1834 }
1835
1836 static const char *
1837 xgettext_comment (size_t n)
1838 {
1839   if (comment == NULL || n >= comment->nitems)
1840     return NULL;
1841   return comment->item[n];
1842 }
1843
1844 static void
1845 xgettext_comment_reset ()
1846 {
1847   if (comment != NULL)
1848     {
1849       string_list_free (comment);
1850       comment = NULL;
1851     }
1852 }
1853
1854
1855 refcounted_string_list_ty *savable_comment;
1856
1857 void
1858 savable_comment_add (const char *str)
1859 {
1860   if (savable_comment == NULL)
1861     {
1862       savable_comment = XMALLOC (refcounted_string_list_ty);
1863       savable_comment->refcount = 1;
1864       string_list_init (&savable_comment->contents);
1865     }
1866   else if (savable_comment->refcount > 1)
1867     {
1868       /* Unshare the list by making copies.  */
1869       struct string_list_ty *oldcontents;
1870       size_t i;
1871
1872       savable_comment->refcount--;
1873       oldcontents = &savable_comment->contents;
1874
1875       savable_comment = XMALLOC (refcounted_string_list_ty);
1876       savable_comment->refcount = 1;
1877       string_list_init (&savable_comment->contents);
1878       for (i = 0; i < oldcontents->nitems; i++)
1879         string_list_append (&savable_comment->contents, oldcontents->item[i]);
1880     }
1881   string_list_append (&savable_comment->contents, str);
1882 }
1883
1884 void
1885 savable_comment_reset ()
1886 {
1887   drop_reference (savable_comment);
1888   savable_comment = NULL;
1889 }
1890
1891 static void
1892 savable_comment_to_xgettext_comment (refcounted_string_list_ty *rslp)
1893 {
1894   xgettext_comment_reset ();
1895   if (rslp != NULL)
1896     {
1897       size_t i;
1898
1899       for (i = 0; i < rslp->contents.nitems; i++)
1900         xgettext_comment_add (rslp->contents.item[i]);
1901     }
1902 }
1903
1904
1905
1906 static FILE *
1907 xgettext_open (const char *fn,
1908                char **logical_file_name_p, char **real_file_name_p)
1909 {
1910   FILE *fp;
1911   char *new_name;
1912   char *logical_file_name;
1913
1914   if (strcmp (fn, "-") == 0)
1915     {
1916       new_name = xstrdup (_("standard input"));
1917       logical_file_name = xstrdup (new_name);
1918       fp = stdin;
1919     }
1920   else if (IS_ABSOLUTE_PATH (fn))
1921     {
1922       new_name = xstrdup (fn);
1923       fp = fopen (fn, "r");
1924       if (fp == NULL)
1925         error (EXIT_FAILURE, errno, _("\
1926 error while opening \"%s\" for reading"), fn);
1927       logical_file_name = xstrdup (new_name);
1928     }
1929   else
1930     {
1931       int j;
1932
1933       for (j = 0; ; ++j)
1934         {
1935           const char *dir = dir_list_nth (j);
1936
1937           if (dir == NULL)
1938             error (EXIT_FAILURE, ENOENT, _("\
1939 error while opening \"%s\" for reading"), fn);
1940
1941           new_name = xconcatenated_filename (dir, fn, NULL);
1942
1943           fp = fopen (new_name, "r");
1944           if (fp != NULL)
1945             break;
1946
1947           if (errno != ENOENT)
1948             error (EXIT_FAILURE, errno, _("\
1949 error while opening \"%s\" for reading"), new_name);
1950           free (new_name);
1951         }
1952
1953       /* Note that the NEW_NAME variable contains the actual file name
1954          and the logical file name is what is reported by xgettext.  In
1955          this case NEW_NAME is set to the file which was found along the
1956          directory search path, and LOGICAL_FILE_NAME is is set to the
1957          file name which was searched for.  */
1958       logical_file_name = xstrdup (fn);
1959     }
1960
1961   *logical_file_name_p = logical_file_name;
1962   *real_file_name_p = new_name;
1963   return fp;
1964 }
1965
1966
1967 /* Language dependent format string parser.
1968    NULL if the language has no notion of format strings.  */
1969 static struct formatstring_parser *current_formatstring_parser1;
1970 static struct formatstring_parser *current_formatstring_parser2;
1971 static struct formatstring_parser *current_formatstring_parser3;
1972
1973
1974 static void
1975 extract_from_file (const char *file_name, extractor_ty extractor,
1976                    msgdomain_list_ty *mdlp)
1977 {
1978   char *logical_file_name;
1979   char *real_file_name;
1980   FILE *fp = xgettext_open (file_name, &logical_file_name, &real_file_name);
1981
1982   /* Set the default for the source file encoding.  May be overridden by
1983      the extractor function.  */
1984   xgettext_current_source_encoding = xgettext_global_source_encoding;
1985 #if HAVE_ICONV
1986   xgettext_current_source_iconv = xgettext_global_source_iconv;
1987 #endif
1988
1989   current_formatstring_parser1 = extractor.formatstring_parser1;
1990   current_formatstring_parser2 = extractor.formatstring_parser2;
1991   current_formatstring_parser3 = extractor.formatstring_parser3;
1992   extractor.func (fp, real_file_name, logical_file_name, extractor.flag_table,
1993                   mdlp);
1994
1995   if (fp != stdin)
1996     fclose (fp);
1997   free (logical_file_name);
1998   free (real_file_name);
1999 }
2000
2001
2002
2003 /* Error message about non-ASCII character in a specific lexical context.  */
2004 char *
2005 non_ascii_error_message (lexical_context_ty lcontext,
2006                          const char *file_name, size_t line_number)
2007 {
2008   char buffer[21];
2009   char *errmsg;
2010
2011   if (line_number == (size_t)(-1))
2012     buffer[0] = '\0';
2013   else
2014     sprintf (buffer, ":%ld", (long) line_number);
2015
2016   switch (lcontext)
2017     {
2018     case lc_outside:
2019       errmsg =
2020         xasprintf (_("Non-ASCII character at %s%s."), file_name, buffer);
2021       break;
2022     case lc_comment:
2023       errmsg =
2024         xasprintf (_("Non-ASCII comment at or before %s%s."),
2025                    file_name, buffer);
2026       break;
2027     case lc_string:
2028       errmsg =
2029         xasprintf (_("Non-ASCII string at %s%s."), file_name, buffer);
2030       break;
2031     default:
2032       abort ();
2033     }
2034   return errmsg;
2035 }
2036
2037 /* Convert the given string from xgettext_current_source_encoding to
2038    the output file encoding (i.e. ASCII or UTF-8).
2039    The resulting string is either the argument string, or freshly allocated.
2040    The file_name and line_number are only used for error message purposes.  */
2041 char *
2042 from_current_source_encoding (const char *string,
2043                               lexical_context_ty lcontext,
2044                               const char *file_name, size_t line_number)
2045 {
2046   if (xgettext_current_source_encoding == po_charset_ascii)
2047     {
2048       if (!is_ascii_string (string))
2049         {
2050           multiline_error (xstrdup (""),
2051                            xasprintf ("%s\n%s\n",
2052                                       non_ascii_error_message (lcontext,
2053                                                                file_name,
2054                                                                line_number),
2055                                       _("\
2056 Please specify the source encoding through --from-code.")));
2057           exit (EXIT_FAILURE);
2058         }
2059     }
2060   else if (xgettext_current_source_encoding != po_charset_utf8)
2061     {
2062 #if HAVE_ICONV
2063       struct conversion_context context;
2064
2065       context.from_code = xgettext_current_source_encoding;
2066       context.to_code = po_charset_utf8;
2067       context.from_filename = file_name;
2068       context.message = NULL;
2069
2070       string = convert_string_directly (xgettext_current_source_iconv, string,
2071                                         &context);
2072 #else
2073       /* If we don't have iconv(), the only supported values for
2074          xgettext_global_source_encoding and thus also for
2075          xgettext_current_source_encoding are ASCII and UTF-8.
2076          convert_string_directly() should not be called in this case.  */
2077       abort ();
2078 #endif
2079     }
2080
2081   return (char *) string;
2082 }
2083
2084 #define CONVERT_STRING(string, lcontext) \
2085   string = from_current_source_encoding (string, lcontext, pos->file_name, \
2086                                          pos->line_number);
2087
2088
2089 /* Update the is_format[] flags depending on the information given in the
2090    context.  */
2091 static void
2092 set_format_flags_from_context (enum is_format is_format[NFORMATS],
2093                                flag_context_ty context, const char *string,
2094                                lex_pos_ty *pos, const char *pretty_msgstr)
2095 {
2096   size_t i;
2097
2098   if (context.is_format1 != undecided
2099       || context.is_format2 != undecided
2100       || context.is_format3 != undecided)
2101     for (i = 0; i < NFORMATS; i++)
2102       {
2103         if (is_format[i] == undecided)
2104           {
2105             if (formatstring_parsers[i] == current_formatstring_parser1
2106                 && context.is_format1 != undecided)
2107               is_format[i] = (enum is_format) context.is_format1;
2108             if (formatstring_parsers[i] == current_formatstring_parser2
2109                 && context.is_format2 != undecided)
2110               is_format[i] = (enum is_format) context.is_format2;
2111             if (formatstring_parsers[i] == current_formatstring_parser3
2112                 && context.is_format3 != undecided)
2113               is_format[i] = (enum is_format) context.is_format3;
2114           }
2115         if (possible_format_p (is_format[i]))
2116           {
2117             struct formatstring_parser *parser = formatstring_parsers[i];
2118             char *invalid_reason = NULL;
2119             void *descr = parser->parse (string, false, NULL, &invalid_reason);
2120
2121             if (descr != NULL)
2122               parser->free (descr);
2123             else
2124               {
2125                 /* The string is not a valid format string.  */
2126                 if (is_format[i] != possible)
2127                   {
2128                     char buffer[21];
2129
2130                     error_with_progname = false;
2131                     if (pos->line_number == (size_t)(-1))
2132                       buffer[0] = '\0';
2133                     else
2134                       sprintf (buffer, ":%ld", (long) pos->line_number);
2135                     multiline_warning (xasprintf (_("%s%s: warning: "),
2136                                                   pos->file_name, buffer),
2137                                        xasprintf (is_format[i] == yes_according_to_context
2138                                                   ? _("Although being used in a format string position, the %s is not a valid %s format string. Reason: %s\n")
2139                                                   : _("Although declared as such, the %s is not a valid %s format string. Reason: %s\n"),
2140                                                   pretty_msgstr,
2141                                                   format_language_pretty[i],
2142                                                   invalid_reason));
2143                     error_with_progname = true;
2144                   }
2145
2146                 is_format[i] = impossible;
2147                 free (invalid_reason);
2148               }
2149           }
2150       }
2151 }
2152
2153
2154 static void
2155 warn_format_string (enum is_format is_format[NFORMATS], const char *string,
2156                     lex_pos_ty *pos, const char *pretty_msgstr)
2157 {
2158   if (possible_format_p (is_format[format_python])
2159       && get_python_format_unnamed_arg_count (string) > 1)
2160     {
2161       char buffer[21];
2162
2163       error_with_progname = false;
2164       if (pos->line_number == (size_t)(-1))
2165         buffer[0] = '\0';
2166       else
2167         sprintf (buffer, ":%ld", (long) pos->line_number);
2168       multiline_warning (xasprintf (_("%s%s: warning: "),
2169                                     pos->file_name, buffer),
2170                          xasprintf (_("\
2171 '%s' format string with unnamed arguments cannot be properly localized:\n\
2172 The translator cannot reorder the arguments.\n\
2173 Please consider using a format string with named arguments,\n\
2174 and a mapping instead of a tuple for the arguments.\n"),
2175                                     pretty_msgstr));
2176       error_with_progname = true;
2177     }
2178 }
2179
2180
2181 message_ty *
2182 remember_a_message (message_list_ty *mlp, char *msgctxt, char *msgid,
2183                     flag_context_ty context, lex_pos_ty *pos,
2184                     const char *extracted_comment,
2185                     refcounted_string_list_ty *comment)
2186 {
2187   enum is_format is_format[NFORMATS];
2188   struct argument_range range;
2189   enum is_wrap do_wrap;
2190   message_ty *mp;
2191   char *msgstr;
2192   size_t i;
2193
2194   /* See whether we shall exclude this message.  */
2195   if (exclude != NULL && message_list_search (exclude, msgctxt, msgid) != NULL)
2196     {
2197       /* Tell the lexer to reset its comment buffer, so that the next
2198          message gets the correct comments.  */
2199       xgettext_comment_reset ();
2200       savable_comment_reset ();
2201
2202       if (msgctxt != NULL)
2203         free (msgctxt);
2204       free (msgid);
2205
2206       return NULL;
2207     }
2208
2209   savable_comment_to_xgettext_comment (comment);
2210
2211   for (i = 0; i < NFORMATS; i++)
2212     is_format[i] = undecided;
2213   range.min = -1;
2214   range.max = -1;
2215   do_wrap = undecided;
2216
2217   if (msgctxt != NULL)
2218     CONVERT_STRING (msgctxt, lc_string);
2219   CONVERT_STRING (msgid, lc_string);
2220
2221   if (msgctxt == NULL && msgid[0] == '\0' && !xgettext_omit_header)
2222     {
2223       char buffer[21];
2224
2225       error_with_progname = false;
2226       if (pos->line_number == (size_t)(-1))
2227         buffer[0] = '\0';
2228       else
2229         sprintf (buffer, ":%ld", (long) pos->line_number);
2230       multiline_warning (xasprintf (_("%s%s: warning: "), pos->file_name,
2231                                     buffer),
2232                          xstrdup (_("\
2233 Empty msgid.  It is reserved by GNU gettext:\n\
2234 gettext(\"\") returns the header entry with\n\
2235 meta information, not the empty string.\n")));
2236       error_with_progname = true;
2237     }
2238
2239   /* See if we have seen this message before.  */
2240   mp = message_list_search (mlp, msgctxt, msgid);
2241   if (mp != NULL)
2242     {
2243       if (msgctxt != NULL)
2244         free (msgctxt);
2245       free (msgid);
2246       for (i = 0; i < NFORMATS; i++)
2247         is_format[i] = mp->is_format[i];
2248       do_wrap = mp->do_wrap;
2249     }
2250   else
2251     {
2252       /* Construct the msgstr from the prefix and suffix, otherwise use the
2253          empty string.  */
2254       if (msgstr_prefix)
2255         msgstr = xasprintf ("%s%s%s", msgstr_prefix, msgid, msgstr_suffix);
2256       else
2257         msgstr = "";
2258
2259       /* Allocate a new message and append the message to the list.  */
2260       mp = message_alloc (msgctxt, msgid, NULL, msgstr, strlen (msgstr) + 1,
2261                           pos);
2262       /* Do not free msgctxt and msgid.  */
2263       message_list_append (mlp, mp);
2264     }
2265
2266   /* Determine whether the context specifies that the msgid is a format
2267      string.  */
2268   set_format_flags_from_context (is_format, context, mp->msgid, pos, "msgid");
2269
2270   /* Ask the lexer for the comments it has seen.  */
2271   {
2272     size_t nitems_before;
2273     size_t nitems_after;
2274     int j;
2275     bool add_all_remaining_comments;
2276
2277     nitems_before = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2278
2279     if (extracted_comment != NULL)
2280       {
2281         char *copy = xstrdup (extracted_comment);
2282         char *rest;
2283
2284         rest = copy;
2285         while (*rest != '\0')
2286           {
2287             char *newline = strchr (rest, '\n');
2288
2289             if (newline != NULL)
2290               {
2291                 *newline = '\0';
2292                 message_comment_dot_append (mp, rest);
2293                 rest = newline + 1;
2294               }
2295             else
2296               {
2297                 message_comment_dot_append (mp, rest);
2298                 break;
2299               }
2300           }
2301         free (copy);
2302       }
2303
2304     add_all_remaining_comments = add_all_comments;
2305     for (j = 0; ; ++j)
2306       {
2307         const char *s = xgettext_comment (j);
2308         const char *t;
2309         if (s == NULL)
2310           break;
2311
2312         CONVERT_STRING (s, lc_comment);
2313
2314         /* To reduce the possibility of unwanted matches we do a two
2315            step match: the line must contain 'xgettext:' and one of
2316            the possible format description strings.  */
2317         if ((t = c_strstr (s, "xgettext:")) != NULL)
2318           {
2319             bool tmp_fuzzy;
2320             enum is_format tmp_format[NFORMATS];
2321             struct argument_range tmp_range;
2322             enum is_wrap tmp_wrap;
2323             bool interesting;
2324
2325             t += strlen ("xgettext:");
2326
2327             po_parse_comment_special (t, &tmp_fuzzy, tmp_format, &tmp_range,
2328                                       &tmp_wrap);
2329
2330             interesting = false;
2331             for (i = 0; i < NFORMATS; i++)
2332               if (tmp_format[i] != undecided)
2333                 {
2334                   is_format[i] = tmp_format[i];
2335                   interesting = true;
2336                 }
2337             if (has_range_p (tmp_range))
2338               {
2339                 range = tmp_range;
2340                 interesting = true;
2341               }
2342             if (tmp_wrap != undecided)
2343               {
2344                 do_wrap = tmp_wrap;
2345                 interesting = true;
2346               }
2347
2348             /* If the "xgettext:" marker was followed by an interesting
2349                keyword, and we updated our is_format/do_wrap variables,
2350                we don't print the comment as a #. comment.  */
2351             if (interesting)
2352               continue;
2353           }
2354         /* When the comment tag is seen, it drags in not only the line
2355            which it starts, but all remaining comment lines.  */
2356         if (add_all_remaining_comments
2357             || (add_all_remaining_comments =
2358                   (comment_tag != NULL
2359                    && strncmp (s, comment_tag, strlen (comment_tag)) == 0)))
2360           message_comment_dot_append (mp, s);
2361       }
2362
2363     nitems_after = (mp->comment_dot != NULL ? mp->comment_dot->nitems : 0);
2364
2365     /* Don't add the comments if they are a repetition of the tail of the
2366        already present comments.  This avoids unneeded duplication if the
2367        same message appears several times, each time with the same comment.  */
2368     if (nitems_before < nitems_after)
2369       {
2370         size_t added = nitems_after - nitems_before;
2371
2372         if (added <= nitems_before)
2373           {
2374             bool repeated = true;
2375
2376             for (i = 0; i < added; i++)
2377               if (strcmp (mp->comment_dot->item[nitems_before - added + i],
2378                           mp->comment_dot->item[nitems_before + i]) != 0)
2379                 {
2380                   repeated = false;
2381                   break;
2382                 }
2383
2384             if (repeated)
2385               {
2386                 for (i = 0; i < added; i++)
2387                   free ((char *) mp->comment_dot->item[nitems_before + i]);
2388                 mp->comment_dot->nitems = nitems_before;
2389               }
2390           }
2391       }
2392   }
2393
2394   /* If it is not already decided, through programmer comments, whether the
2395      msgid is a format string, examine the msgid.  This is a heuristic.  */
2396   for (i = 0; i < NFORMATS; i++)
2397     {
2398       if (is_format[i] == undecided
2399           && (formatstring_parsers[i] == current_formatstring_parser1
2400               || formatstring_parsers[i] == current_formatstring_parser2
2401               || formatstring_parsers[i] == current_formatstring_parser3)
2402           /* But avoid redundancy: objc-format is stronger than c-format.  */
2403           && !(i == format_c && possible_format_p (is_format[format_objc]))
2404           && !(i == format_objc && possible_format_p (is_format[format_c]))
2405           /* Avoid flagging a string as c-format when it's known to be a
2406              qt-format or qt-plural-format or kde-format or boost-format
2407              string.  */
2408           && !(i == format_c
2409                && (possible_format_p (is_format[format_qt])
2410                    || possible_format_p (is_format[format_qt_plural])
2411                    || possible_format_p (is_format[format_kde])
2412                    || possible_format_p (is_format[format_boost]))))
2413         {
2414           struct formatstring_parser *parser = formatstring_parsers[i];
2415           char *invalid_reason = NULL;
2416           void *descr = parser->parse (mp->msgid, false, NULL, &invalid_reason);
2417
2418           if (descr != NULL)
2419             {
2420               /* msgid is a valid format string.  We mark only those msgids
2421                  as format strings which contain at least one format directive
2422                  and thus are format strings with a high probability.  We
2423                  don't mark strings without directives as format strings,
2424                  because that would force the programmer to add
2425                  "xgettext: no-c-format" anywhere where a translator wishes
2426                  to use a percent sign.  So, the msgfmt checking will not be
2427                  perfect.  Oh well.  */
2428               if (parser->get_number_of_directives (descr) > 0
2429                   && !(parser->is_unlikely_intentional != NULL
2430                        && parser->is_unlikely_intentional (descr)))
2431                 is_format[i] = possible;
2432
2433               parser->free (descr);
2434             }
2435           else
2436             {
2437               /* msgid is not a valid format string.  */
2438               is_format[i] = impossible;
2439               free (invalid_reason);
2440             }
2441         }
2442       mp->is_format[i] = is_format[i];
2443     }
2444
2445   if (has_range_p (range))
2446     {
2447       if (has_range_p (mp->range))
2448         {
2449           if (range.min < mp->range.min)
2450             mp->range.min = range.min;
2451           if (range.max > mp->range.max)
2452             mp->range.max = range.max;
2453         }
2454       else
2455         mp->range = range;
2456     }
2457
2458   mp->do_wrap = do_wrap == no ? no : yes;       /* By default we wrap.  */
2459
2460   /* Warn about the use of non-reorderable format strings when the programming
2461      language also provides reorderable format strings.  */
2462   warn_format_string (is_format, mp->msgid, pos, "msgid");
2463
2464   /* Remember where we saw this msgid.  */
2465   if (line_comment)
2466     message_comment_filepos (mp, pos->file_name, pos->line_number);
2467
2468   /* Tell the lexer to reset its comment buffer, so that the next
2469      message gets the correct comments.  */
2470   xgettext_comment_reset ();
2471   savable_comment_reset ();
2472
2473   return mp;
2474 }
2475
2476
2477 void
2478 remember_a_message_plural (message_ty *mp, char *string,
2479                            flag_context_ty context, lex_pos_ty *pos,
2480                            refcounted_string_list_ty *comment)
2481 {
2482   char *msgid_plural;
2483   char *msgstr1;
2484   size_t msgstr1_len;
2485   char *msgstr;
2486   size_t i;
2487
2488   msgid_plural = string;
2489
2490   savable_comment_to_xgettext_comment (comment);
2491
2492   CONVERT_STRING (msgid_plural, lc_string);
2493
2494   /* See if the message is already a plural message.  */
2495   if (mp->msgid_plural == NULL)
2496     {
2497       mp->msgid_plural = msgid_plural;
2498
2499       /* Construct the first plural form from the prefix and suffix,
2500          otherwise use the empty string.  The translator will have to
2501          provide additional plural forms.  */
2502       if (msgstr_prefix)
2503         msgstr1 =
2504           xasprintf ("%s%s%s", msgstr_prefix, msgid_plural, msgstr_suffix);
2505       else
2506         msgstr1 = "";
2507       msgstr1_len = strlen (msgstr1) + 1;
2508       msgstr = XNMALLOC (mp->msgstr_len + msgstr1_len, char);
2509       memcpy (msgstr, mp->msgstr, mp->msgstr_len);
2510       memcpy (msgstr + mp->msgstr_len, msgstr1, msgstr1_len);
2511       mp->msgstr = msgstr;
2512       mp->msgstr_len = mp->msgstr_len + msgstr1_len;
2513       if (msgstr_prefix)
2514         free (msgstr1);
2515
2516       /* Determine whether the context specifies that the msgid_plural is a
2517          format string.  */
2518       set_format_flags_from_context (mp->is_format, context, mp->msgid_plural,
2519                                      pos, "msgid_plural");
2520
2521       /* If it is not already decided, through programmer comments or
2522          the msgid, whether the msgid is a format string, examine the
2523          msgid_plural.  This is a heuristic.  */
2524       for (i = 0; i < NFORMATS; i++)
2525         if ((formatstring_parsers[i] == current_formatstring_parser1
2526              || formatstring_parsers[i] == current_formatstring_parser2
2527              || formatstring_parsers[i] == current_formatstring_parser3)
2528             && (mp->is_format[i] == undecided || mp->is_format[i] == possible)
2529             /* But avoid redundancy: objc-format is stronger than c-format.  */
2530             && !(i == format_c
2531                  && possible_format_p (mp->is_format[format_objc]))
2532             && !(i == format_objc
2533                  && possible_format_p (mp->is_format[format_c]))
2534             /* Avoid flagging a string as c-format when it's known to be a
2535                qt-format or qt-plural-format or boost-format string.  */
2536             && !(i == format_c
2537                  && (possible_format_p (mp->is_format[format_qt])
2538                      || possible_format_p (mp->is_format[format_qt_plural])
2539                      || possible_format_p (mp->is_format[format_kde])
2540                      || possible_format_p (mp->is_format[format_boost]))))
2541           {
2542             struct formatstring_parser *parser = formatstring_parsers[i];
2543             char *invalid_reason = NULL;
2544             void *descr =
2545               parser->parse (mp->msgid_plural, false, NULL, &invalid_reason);
2546
2547             if (descr != NULL)
2548               {
2549                 /* Same heuristic as in remember_a_message.  */
2550                 if (parser->get_number_of_directives (descr) > 0
2551                     && !(parser->is_unlikely_intentional != NULL
2552                          && parser->is_unlikely_intentional (descr)))
2553                   mp->is_format[i] = possible;
2554
2555                 parser->free (descr);
2556               }
2557             else
2558               {
2559                 /* msgid_plural is not a valid format string.  */
2560                 mp->is_format[i] = impossible;
2561                 free (invalid_reason);
2562               }
2563           }
2564
2565       /* Warn about the use of non-reorderable format strings when the programming
2566          language also provides reorderable format strings.  */
2567       warn_format_string (mp->is_format, mp->msgid_plural, pos, "msgid_plural");
2568     }
2569   else
2570     free (msgid_plural);
2571
2572   /* Tell the lexer to reset its comment buffer, so that the next
2573      message gets the correct comments.  */
2574   xgettext_comment_reset ();
2575   savable_comment_reset ();
2576 }
2577
2578
2579 struct arglist_parser *
2580 arglist_parser_alloc (message_list_ty *mlp, const struct callshapes *shapes)
2581 {
2582   if (shapes == NULL || shapes->nshapes == 0)
2583     {
2584       struct arglist_parser *ap =
2585         (struct arglist_parser *)
2586         xmalloc (offsetof (struct arglist_parser, alternative[0]));
2587
2588       ap->mlp = mlp;
2589       ap->keyword = NULL;
2590       ap->keyword_len = 0;
2591       ap->nalternatives = 0;
2592
2593       return ap;
2594     }
2595   else
2596     {
2597       struct arglist_parser *ap =
2598         (struct arglist_parser *)
2599         xmalloc (xsum (sizeof (struct arglist_parser),
2600                        xtimes (shapes->nshapes - 1,
2601                                sizeof (struct partial_call))));
2602       size_t i;
2603
2604       ap->mlp = mlp;
2605       ap->keyword = shapes->keyword;
2606       ap->keyword_len = shapes->keyword_len;
2607       ap->nalternatives = shapes->nshapes;
2608       for (i = 0; i < shapes->nshapes; i++)
2609         {
2610           ap->alternative[i].argnumc = shapes->shapes[i].argnumc;
2611           ap->alternative[i].argnum1 = shapes->shapes[i].argnum1;
2612           ap->alternative[i].argnum2 = shapes->shapes[i].argnum2;
2613           ap->alternative[i].argnum1_glib_context =
2614             shapes->shapes[i].argnum1_glib_context;
2615           ap->alternative[i].argnum2_glib_context =
2616             shapes->shapes[i].argnum2_glib_context;
2617           ap->alternative[i].argtotal = shapes->shapes[i].argtotal;
2618           ap->alternative[i].xcomments = shapes->shapes[i].xcomments;
2619           ap->alternative[i].msgctxt = NULL;
2620           ap->alternative[i].msgctxt_pos.file_name = NULL;
2621           ap->alternative[i].msgctxt_pos.line_number = (size_t)(-1);
2622           ap->alternative[i].msgid = NULL;
2623           ap->alternative[i].msgid_context = null_context;
2624           ap->alternative[i].msgid_pos.file_name = NULL;
2625           ap->alternative[i].msgid_pos.line_number = (size_t)(-1);
2626           ap->alternative[i].msgid_comment = NULL;
2627           ap->alternative[i].msgid_plural = NULL;
2628           ap->alternative[i].msgid_plural_context = null_context;
2629           ap->alternative[i].msgid_plural_pos.file_name = NULL;
2630           ap->alternative[i].msgid_plural_pos.line_number = (size_t)(-1);
2631         }
2632
2633       return ap;
2634     }
2635 }
2636
2637
2638 struct arglist_parser *
2639 arglist_parser_clone (struct arglist_parser *ap)
2640 {
2641   struct arglist_parser *copy =
2642     (struct arglist_parser *)
2643     xmalloc (xsum (sizeof (struct arglist_parser) - sizeof (struct partial_call),
2644                    xtimes (ap->nalternatives, sizeof (struct partial_call))));
2645   size_t i;
2646
2647   copy->mlp = ap->mlp;
2648   copy->keyword = ap->keyword;
2649   copy->keyword_len = ap->keyword_len;
2650   copy->nalternatives = ap->nalternatives;
2651   for (i = 0; i < ap->nalternatives; i++)
2652     {
2653       const struct partial_call *cp = &ap->alternative[i];
2654       struct partial_call *ccp = &copy->alternative[i];
2655
2656       ccp->argnumc = cp->argnumc;
2657       ccp->argnum1 = cp->argnum1;
2658       ccp->argnum2 = cp->argnum2;
2659       ccp->argnum1_glib_context = cp->argnum1_glib_context;
2660       ccp->argnum2_glib_context = cp->argnum2_glib_context;
2661       ccp->argtotal = cp->argtotal;
2662       ccp->xcomments = cp->xcomments;
2663       ccp->msgctxt = (cp->msgctxt != NULL ? xstrdup (cp->msgctxt) : NULL);
2664       ccp->msgctxt_pos = cp->msgctxt_pos;
2665       ccp->msgid = (cp->msgid != NULL ? xstrdup (cp->msgid) : NULL);
2666       ccp->msgid_context = cp->msgid_context;
2667       ccp->msgid_pos = cp->msgctxt_pos;
2668       ccp->msgid_comment = add_reference (cp->msgid_comment);
2669       ccp->msgid_plural =
2670         (cp->msgid_plural != NULL ? xstrdup (cp->msgid_plural) : NULL);
2671       ccp->msgid_plural_context = cp->msgid_plural_context;
2672       ccp->msgid_plural_pos = cp->msgid_plural_pos;
2673     }
2674
2675   return copy;
2676 }
2677
2678
2679 void
2680 arglist_parser_remember (struct arglist_parser *ap,
2681                          int argnum, char *string,
2682                          flag_context_ty context,
2683                          char *file_name, size_t line_number,
2684                          refcounted_string_list_ty *comment)
2685 {
2686   bool stored_string = false;
2687   size_t nalternatives = ap->nalternatives;
2688   size_t i;
2689
2690   if (!(argnum > 0))
2691     abort ();
2692   for (i = 0; i < nalternatives; i++)
2693     {
2694       struct partial_call *cp = &ap->alternative[i];
2695
2696       if (argnum == cp->argnumc)
2697         {
2698           cp->msgctxt = string;
2699           cp->msgctxt_pos.file_name = file_name;
2700           cp->msgctxt_pos.line_number = line_number;
2701           stored_string = true;
2702           /* Mark msgctxt as done.  */
2703           cp->argnumc = 0;
2704         }
2705       else
2706         {
2707           if (argnum == cp->argnum1)
2708             {
2709               cp->msgid = string;
2710               cp->msgid_context = context;
2711               cp->msgid_pos.file_name = file_name;
2712               cp->msgid_pos.line_number = line_number;
2713               cp->msgid_comment = add_reference (comment);
2714               stored_string = true;
2715               /* Mark msgid as done.  */
2716               cp->argnum1 = 0;
2717             }
2718           if (argnum == cp->argnum2)
2719             {
2720               cp->msgid_plural = string;
2721               cp->msgid_plural_context = context;
2722               cp->msgid_plural_pos.file_name = file_name;
2723               cp->msgid_plural_pos.line_number = line_number;
2724               stored_string = true;
2725               /* Mark msgid_plural as done.  */
2726               cp->argnum2 = 0;
2727             }
2728         }
2729     }
2730   /* Note: There is a memory leak here: When string was stored but is later
2731      not used by arglist_parser_done, we don't free it.  */
2732   if (!stored_string)
2733     free (string);
2734 }
2735
2736
2737 bool
2738 arglist_parser_decidedp (struct arglist_parser *ap, int argnum)
2739 {
2740   size_t i;
2741
2742   /* Test whether all alternatives are decided.
2743      Note: A decided alternative can be complete
2744        cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2745        && cp->argtotal == 0
2746      or it can be failed if no literal strings were found at the specified
2747      argument positions:
2748        cp->argnumc <= argnum && cp->argnum1 <= argnum && cp->argnum2 <= argnum
2749      or it can be failed if the number of arguments is exceeded:
2750        cp->argtotal > 0 && cp->argtotal < argnum
2751    */
2752   for (i = 0; i < ap->nalternatives; i++)
2753     {
2754       struct partial_call *cp = &ap->alternative[i];
2755
2756       if (!((cp->argnumc <= argnum
2757              && cp->argnum1 <= argnum
2758              && cp->argnum2 <= argnum)
2759             || (cp->argtotal > 0 && cp->argtotal < argnum)))
2760         /* cp is still undecided.  */
2761         return false;
2762     }
2763   return true;
2764 }
2765
2766
2767 void
2768 arglist_parser_done (struct arglist_parser *ap, int argnum)
2769 {
2770   size_t ncomplete;
2771   size_t i;
2772
2773   /* Determine the number of complete calls.  */
2774   ncomplete = 0;
2775   for (i = 0; i < ap->nalternatives; i++)
2776     {
2777       struct partial_call *cp = &ap->alternative[i];
2778
2779       if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2780           && (cp->argtotal == 0 || cp->argtotal == argnum))
2781         ncomplete++;
2782     }
2783
2784   if (ncomplete > 0)
2785     {
2786       struct partial_call *best_cp = NULL;
2787       bool ambiguous = false;
2788
2789       /* Find complete calls where msgctxt, msgid, msgid_plural are all
2790          provided.  */
2791       for (i = 0; i < ap->nalternatives; i++)
2792         {
2793           struct partial_call *cp = &ap->alternative[i];
2794
2795           if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2796               && (cp->argtotal == 0 || cp->argtotal == argnum)
2797               && cp->msgctxt != NULL
2798               && cp->msgid != NULL
2799               && cp->msgid_plural != NULL)
2800             {
2801               if (best_cp != NULL)
2802                 {
2803                   ambiguous = true;
2804                   break;
2805                 }
2806               best_cp = cp;
2807             }
2808         }
2809
2810       if (best_cp == NULL)
2811         {
2812           struct partial_call *best_cp1 = NULL;
2813           struct partial_call *best_cp2 = NULL;
2814
2815           /* Find complete calls where msgctxt, msgid are provided.  */
2816           for (i = 0; i < ap->nalternatives; i++)
2817             {
2818               struct partial_call *cp = &ap->alternative[i];
2819
2820               if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2821                   && (cp->argtotal == 0 || cp->argtotal == argnum)
2822                   && cp->msgctxt != NULL
2823                   && cp->msgid != NULL)
2824                 {
2825                   if (best_cp1 != NULL)
2826                     {
2827                       ambiguous = true;
2828                       break;
2829                     }
2830                   best_cp1 = cp;
2831                 }
2832             }
2833
2834           /* Find complete calls where msgid, msgid_plural are provided.  */
2835           for (i = 0; i < ap->nalternatives; i++)
2836             {
2837               struct partial_call *cp = &ap->alternative[i];
2838
2839               if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2840                   && (cp->argtotal == 0 || cp->argtotal == argnum)
2841                   && cp->msgid != NULL
2842                   && cp->msgid_plural != NULL)
2843                 {
2844                   if (best_cp2 != NULL)
2845                     {
2846                       ambiguous = true;
2847                       break;
2848                     }
2849                   best_cp2 = cp;
2850                 }
2851             }
2852
2853           if (best_cp1 != NULL)
2854             best_cp = best_cp1;
2855           if (best_cp2 != NULL)
2856             {
2857               if (best_cp != NULL)
2858                 ambiguous = true;
2859               else
2860                 best_cp = best_cp2;
2861             }
2862         }
2863
2864       if (best_cp == NULL)
2865         {
2866           /* Find complete calls where msgid is provided.  */
2867           for (i = 0; i < ap->nalternatives; i++)
2868             {
2869               struct partial_call *cp = &ap->alternative[i];
2870
2871               if (cp->argnumc == 0 && cp->argnum1 == 0 && cp->argnum2 == 0
2872                   && (cp->argtotal == 0 || cp->argtotal == argnum)
2873                   && cp->msgid != NULL)
2874                 {
2875                   if (best_cp != NULL)
2876                     {
2877                       ambiguous = true;
2878                       break;
2879                     }
2880                   best_cp = cp;
2881                 }
2882             }
2883         }
2884
2885       if (ambiguous)
2886         {
2887           error_with_progname = false;
2888           error_at_line (0, 0,
2889                          best_cp->msgid_pos.file_name,
2890                          best_cp->msgid_pos.line_number,
2891                          _("ambiguous argument specification for keyword '%.*s'"),
2892                          (int) ap->keyword_len, ap->keyword);
2893           error_with_progname = true;
2894         }
2895
2896       if (best_cp != NULL)
2897         {
2898           /* best_cp indicates the best found complete call.
2899              Now call remember_a_message.  */
2900           message_ty *mp;
2901
2902           /* Split strings in the GNOME glib syntax "msgctxt|msgid".  */
2903           if (best_cp->argnum1_glib_context || best_cp->argnum2_glib_context)
2904             /* split_keywordspec should not allow the context to be specified
2905                in two different ways.  */
2906             if (best_cp->msgctxt != NULL)
2907               abort ();
2908           if (best_cp->argnum1_glib_context)
2909             {
2910               const char *separator = strchr (best_cp->msgid, '|');
2911
2912               if (separator == NULL)
2913                 {
2914                   error_with_progname = false;
2915                   error_at_line (0, 0,
2916                                  best_cp->msgid_pos.file_name,
2917                                  best_cp->msgid_pos.line_number,
2918                                  _("warning: missing context for keyword '%.*s'"),
2919                                  (int) ap->keyword_len, ap->keyword);
2920                   error_with_progname = true;
2921                 }
2922               else
2923                 {
2924                   size_t ctxt_len = separator - best_cp->msgid;
2925                   char *ctxt = XNMALLOC (ctxt_len + 1, char);
2926
2927                   memcpy (ctxt, best_cp->msgid, ctxt_len);
2928                   ctxt[ctxt_len] = '\0';
2929                   best_cp->msgctxt = ctxt;
2930                   best_cp->msgid = xstrdup (separator + 1);
2931                 }
2932             }
2933           if (best_cp->msgid_plural != NULL && best_cp->argnum2_glib_context)
2934             {
2935               const char *separator = strchr (best_cp->msgid_plural, '|');
2936
2937               if (separator == NULL)
2938                 {
2939                   error_with_progname = false;
2940                   error_at_line (0, 0,
2941                                  best_cp->msgid_plural_pos.file_name,
2942                                  best_cp->msgid_plural_pos.line_number,
2943                                  _("warning: missing context for plural argument of keyword '%.*s'"),
2944                                  (int) ap->keyword_len, ap->keyword);
2945                   error_with_progname = true;
2946                 }
2947               else
2948                 {
2949                   size_t ctxt_len = separator - best_cp->msgid_plural;
2950                   char *ctxt = XNMALLOC (ctxt_len + 1, char);
2951
2952                   memcpy (ctxt, best_cp->msgid_plural, ctxt_len);
2953                   ctxt[ctxt_len] = '\0';
2954                   if (best_cp->msgctxt == NULL)
2955                     best_cp->msgctxt = ctxt;
2956                   else
2957                     {
2958                       if (strcmp (ctxt, best_cp->msgctxt) != 0)
2959                         {
2960                           error_with_progname = false;
2961                           error_at_line (0, 0,
2962                                          best_cp->msgid_plural_pos.file_name,
2963                                          best_cp->msgid_plural_pos.line_number,
2964                                          _("context mismatch between singular and plural form"));
2965                           error_with_progname = true;
2966                         }
2967                       free (ctxt);
2968                     }
2969                   best_cp->msgid_plural = xstrdup (separator + 1);
2970                 }
2971             }
2972
2973           {
2974             flag_context_ty msgid_context = best_cp->msgid_context;
2975             flag_context_ty msgid_plural_context = best_cp->msgid_plural_context;
2976
2977             /* Special support for the 3-argument tr operator in Qt:
2978                When --qt and --keyword=tr:1,1,2c,3t are specified, add to the
2979                context the information that the argument is expeected to be a
2980                qt-plural-format.  */
2981             if (recognize_format_qt
2982                 && current_formatstring_parser3 == &formatstring_qt_plural
2983                 && best_cp->msgid_plural == best_cp->msgid)
2984               {
2985                 msgid_context.is_format3 = yes_according_to_context;
2986                 msgid_plural_context.is_format3 = yes_according_to_context;
2987               }
2988
2989             mp = remember_a_message (ap->mlp, best_cp->msgctxt, best_cp->msgid,
2990                                      msgid_context,
2991                                      &best_cp->msgid_pos,
2992                                      NULL, best_cp->msgid_comment);
2993             if (mp != NULL && best_cp->msgid_plural != NULL)
2994               remember_a_message_plural (mp, best_cp->msgid_plural,
2995                                          msgid_plural_context,
2996                                          &best_cp->msgid_plural_pos,
2997                                          NULL);
2998           }
2999
3000           if (best_cp->xcomments.nitems > 0)
3001             {
3002               /* Add best_cp->xcomments to mp->comment_dot, unless already
3003                  present.  */
3004               size_t i;
3005
3006               for (i = 0; i < best_cp->xcomments.nitems; i++)
3007                 {
3008                   const char *xcomment = best_cp->xcomments.item[i];
3009                   bool found = false;
3010
3011                   if (mp->comment_dot != NULL)
3012                     {
3013                       size_t j;
3014
3015                       for (j = 0; j < mp->comment_dot->nitems; j++)
3016                         if (strcmp (xcomment, mp->comment_dot->item[j]) == 0)
3017                           {
3018                             found = true;
3019                             break;
3020                           }
3021                     }
3022                   if (!found)
3023                     message_comment_dot_append (mp, xcomment);
3024                 }
3025             }
3026         }
3027     }
3028   else
3029     {
3030       /* No complete call was parsed.  */
3031       /* Note: There is a memory leak here: When there is more than one
3032          alternative, the same string can be stored in multiple alternatives,
3033          and it's not easy to free all strings reliably.  */
3034       if (ap->nalternatives == 1)
3035         {
3036           if (ap->alternative[0].msgctxt != NULL)
3037             free (ap->alternative[0].msgctxt);
3038           if (ap->alternative[0].msgid != NULL)
3039             free (ap->alternative[0].msgid);
3040           if (ap->alternative[0].msgid_plural != NULL)
3041             free (ap->alternative[0].msgid_plural);
3042         }
3043     }
3044
3045   for (i = 0; i < ap->nalternatives; i++)
3046     drop_reference (ap->alternative[i].msgid_comment);
3047   free (ap);
3048 }
3049
3050
3051 static message_ty *
3052 construct_header ()
3053 {
3054   char *project_id_version;
3055   time_t now;
3056   char *timestring;
3057   message_ty *mp;
3058   char *msgstr;
3059   char *comment;
3060   static lex_pos_ty pos = { __FILE__, __LINE__ };
3061
3062   if (package_name != NULL)
3063     {
3064       if (package_version != NULL)
3065         project_id_version = xasprintf ("%s %s", package_name, package_version);
3066       else
3067         project_id_version = xasprintf ("%s", package_name);
3068     }
3069   else
3070     project_id_version = xstrdup ("PACKAGE VERSION");
3071
3072   if (msgid_bugs_address != NULL && msgid_bugs_address[0] == '\0')
3073     multiline_warning (xasprintf (_("warning: ")),
3074                        xstrdup (_("\
3075 The option --msgid-bugs-address was not specified.\n\
3076 If you are using a 'Makevars' file, please specify\n\
3077 the MSGID_BUGS_ADDRESS variable there; otherwise please\n\
3078 specify an --msgid-bugs-address command line option.\n\
3079 ")));
3080
3081   time (&now);
3082   timestring = po_strftime (&now);
3083
3084   msgstr = xasprintf ("\
3085 Project-Id-Version: %s\n\
3086 Report-Msgid-Bugs-To: %s\n\
3087 POT-Creation-Date: %s\n\
3088 PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n\
3089 Last-Translator: FULL NAME <EMAIL@ADDRESS>\n\
3090 Language-Team: LANGUAGE <LL@li.org>\n\
3091 Language: \n\
3092 MIME-Version: 1.0\n\
3093 Content-Type: text/plain; charset=CHARSET\n\
3094 Content-Transfer-Encoding: 8bit\n",
3095                       project_id_version,
3096                       msgid_bugs_address != NULL ? msgid_bugs_address : "",
3097                       timestring);
3098   free (timestring);
3099   free (project_id_version);
3100
3101   mp = message_alloc (NULL, "", NULL, msgstr, strlen (msgstr) + 1, &pos);
3102
3103   if (copyright_holder[0] != '\0')
3104     comment = xasprintf ("\
3105 SOME DESCRIPTIVE TITLE.\n\
3106 Copyright (C) YEAR %s\n\
3107 This file is distributed under the same license as the PACKAGE package.\n\
3108 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n",
3109                          copyright_holder);
3110   else
3111     comment = xstrdup ("\
3112 SOME DESCRIPTIVE TITLE.\n\
3113 This file is put in the public domain.\n\
3114 FIRST AUTHOR <EMAIL@ADDRESS>, YEAR.\n");
3115   message_comment_append (mp, comment);
3116   free (comment);
3117
3118   mp->is_fuzzy = true;
3119
3120   return mp;
3121 }
3122
3123 static void
3124 finalize_header (msgdomain_list_ty *mdlp)
3125 {
3126   /* If the generated PO file has plural forms, add a Plural-Forms template
3127      to the constructed header.  */
3128   {
3129     bool has_plural;
3130     size_t i, j;
3131
3132     has_plural = false;
3133     for (i = 0; i < mdlp->nitems; i++)
3134       {
3135         message_list_ty *mlp = mdlp->item[i]->messages;
3136
3137         for (j = 0; j < mlp->nitems; j++)
3138           {
3139             message_ty *mp = mlp->item[j];
3140
3141             if (mp->msgid_plural != NULL)
3142               {
3143                 has_plural = true;
3144                 break;
3145               }
3146           }
3147         if (has_plural)
3148           break;
3149       }
3150
3151     if (has_plural)
3152       {
3153         message_ty *header =
3154           message_list_search (mdlp->item[0]->messages, NULL, "");
3155         if (header != NULL
3156             && c_strstr (header->msgstr, "Plural-Forms:") == NULL)
3157           {
3158             size_t insertpos = strlen (header->msgstr);
3159             const char *suffix;
3160             size_t suffix_len;
3161             char *new_msgstr;
3162
3163             suffix = "\nPlural-Forms: nplurals=INTEGER; plural=EXPRESSION;\n";
3164             if (insertpos == 0 || header->msgstr[insertpos-1] == '\n')
3165               suffix++;
3166             suffix_len = strlen (suffix);
3167             new_msgstr = XNMALLOC (header->msgstr_len + suffix_len, char);
3168             memcpy (new_msgstr, header->msgstr, insertpos);
3169             memcpy (new_msgstr + insertpos, suffix, suffix_len);
3170             memcpy (new_msgstr + insertpos + suffix_len,
3171                     header->msgstr + insertpos,
3172                     header->msgstr_len - insertpos);
3173             header->msgstr = new_msgstr;
3174             header->msgstr_len = header->msgstr_len + suffix_len;
3175           }
3176       }
3177   }
3178
3179   /* If not all the strings were plain ASCII, or if the output syntax
3180      requires a charset conversion, set the charset in the header to UTF-8.
3181      All messages have already been converted to UTF-8 in remember_a_message
3182      and remember_a_message_plural.  */
3183   {
3184     bool has_nonascii = false;
3185     size_t i;
3186
3187     for (i = 0; i < mdlp->nitems; i++)
3188       {
3189         message_list_ty *mlp = mdlp->item[i]->messages;
3190
3191         if (!is_ascii_message_list (mlp))
3192           has_nonascii = true;
3193       }
3194
3195     if (has_nonascii || output_syntax->requires_utf8)
3196       {
3197         message_list_ty *mlp = mdlp->item[0]->messages;
3198
3199         iconv_message_list (mlp, po_charset_utf8, po_charset_utf8, NULL);
3200       }
3201   }
3202 }
3203
3204
3205 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
3206 #define ENDOF(a) ((a) + SIZEOF(a))
3207
3208
3209 static extractor_ty
3210 language_to_extractor (const char *name)
3211 {
3212   struct table_ty
3213   {
3214     const char *name;
3215     extractor_func func;
3216     flag_context_list_table_ty *flag_table;
3217     struct formatstring_parser *formatstring_parser1;
3218     struct formatstring_parser *formatstring_parser2;
3219   };
3220   typedef struct table_ty table_ty;
3221
3222   static table_ty table[] =
3223   {
3224     SCANNERS_C
3225     SCANNERS_PO
3226     SCANNERS_SH
3227     SCANNERS_PYTHON
3228     SCANNERS_LISP
3229     SCANNERS_ELISP
3230     SCANNERS_LIBREP
3231     SCANNERS_SCHEME
3232     SCANNERS_SMALLTALK
3233     SCANNERS_JAVA
3234     SCANNERS_PROPERTIES
3235     SCANNERS_CSHARP
3236     SCANNERS_AWK
3237     SCANNERS_YCP
3238     SCANNERS_TCL
3239     SCANNERS_PERL
3240     SCANNERS_PHP
3241     SCANNERS_STRINGTABLE
3242     SCANNERS_RST
3243     SCANNERS_GLADE
3244     SCANNERS_LUA
3245     SCANNERS_JAVASCRIPT
3246     SCANNERS_VALA
3247     /* Here may follow more languages and their scanners: pike, etc...
3248        Make sure new scanners honor the --exclude-file option.  */
3249   };
3250
3251   table_ty *tp;
3252
3253   for (tp = table; tp < ENDOF(table); ++tp)
3254     if (c_strcasecmp (name, tp->name) == 0)
3255       {
3256         extractor_ty result;
3257
3258         result.func = tp->func;
3259         result.flag_table = tp->flag_table;
3260         result.formatstring_parser1 = tp->formatstring_parser1;
3261         result.formatstring_parser2 = tp->formatstring_parser2;
3262         result.formatstring_parser3 = NULL;
3263
3264         /* Handle --qt.  It's preferrable to handle this facility here rather
3265            than through an option --language=C++/Qt because the latter would
3266            conflict with the language "C++" regarding the file extensions.  */
3267         if (recognize_format_qt && strcmp (tp->name, "C++") == 0)
3268           {
3269             result.flag_table = &flag_table_cxx_qt;
3270             result.formatstring_parser2 = &formatstring_qt;
3271             result.formatstring_parser3 = &formatstring_qt_plural;
3272           }
3273         /* Likewise for --kde.  */
3274         if (recognize_format_kde && strcmp (tp->name, "C++") == 0)
3275           {
3276             result.flag_table = &flag_table_cxx_kde;
3277             result.formatstring_parser2 = &formatstring_kde;
3278           }
3279         /* Likewise for --boost.  */
3280         if (recognize_format_boost && strcmp (tp->name, "C++") == 0)
3281           {
3282             result.flag_table = &flag_table_cxx_boost;
3283             result.formatstring_parser2 = &formatstring_boost;
3284           }
3285
3286         return result;
3287       }
3288
3289   error (EXIT_FAILURE, 0, _("language '%s' unknown"), name);
3290   /* NOTREACHED */
3291   {
3292     extractor_ty result = { NULL, NULL, NULL, NULL };
3293     return result;
3294   }
3295 }
3296
3297
3298 static const char *
3299 extension_to_language (const char *extension)
3300 {
3301   struct table_ty
3302   {
3303     const char *extension;
3304     const char *language;
3305   };
3306   typedef struct table_ty table_ty;
3307
3308   static table_ty table[] =
3309   {
3310     EXTENSIONS_C
3311     EXTENSIONS_PO
3312     EXTENSIONS_SH
3313     EXTENSIONS_PYTHON
3314     EXTENSIONS_LISP
3315     EXTENSIONS_ELISP
3316     EXTENSIONS_LIBREP
3317     EXTENSIONS_SCHEME
3318     EXTENSIONS_SMALLTALK
3319     EXTENSIONS_JAVA
3320     EXTENSIONS_PROPERTIES
3321     EXTENSIONS_CSHARP
3322     EXTENSIONS_AWK
3323     EXTENSIONS_YCP
3324     EXTENSIONS_TCL
3325     EXTENSIONS_PERL
3326     EXTENSIONS_PHP
3327     EXTENSIONS_STRINGTABLE
3328     EXTENSIONS_RST
3329     EXTENSIONS_GLADE
3330     EXTENSIONS_LUA
3331     EXTENSIONS_JAVASCRIPT
3332     EXTENSIONS_VALA
3333     /* Here may follow more file extensions... */
3334   };
3335
3336   table_ty *tp;
3337
3338   for (tp = table; tp < ENDOF(table); ++tp)
3339     if (strcmp (extension, tp->extension) == 0)
3340       return tp->language;
3341   return NULL;
3342 }