gettext-tools/src/x-c.c

   1 /* xgettext C/C++/ObjectiveC backend.
   2    Copyright (C) 1995-1998, 2000-2009, 2012, 2015 Free Software
   3    Foundation, Inc.
   4
   5    This file was written by Peter Miller <millerp@canb.auug.org.au>
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 /* Specification.  */
  25 #include "x-c.h"
  26
  27 #include <assert.h>
  28 #include <errno.h>
  29 #include <stdbool.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33
  34 #include "message.h"
  35 #include "xgettext.h"
  36 #include "error.h"
  37 #include "error-progname.h"
  38 #include "xalloc.h"
  39 #include "xvasprintf.h"
  40 #include "hash.h"
  41 #include "po-charset.h"
  42 #include "gettext.h"
  43
  44 #define _(s) gettext(s)
  45
  46 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
  47
  48
  49 /* The ANSI C standard defines several phases of translation:
  50
  51    1. Terminate line by \n, regardless of the external representation
  52       of a text line.  Stdio does this for us.
  53
  54    2. Convert trigraphs to their single character equivalents.
  55
  56    3. Concatenate each line ending in backslash (\) with the following
  57       line.
  58
  59    4. Replace each comment with a space character.
  60
  61    5. Parse each resulting logical line as preprocessing tokens a
  62       white space.
  63
  64    6. Recognize and carry out directives (it also expands macros on
  65       non-directive lines, which we do not do here).
  66
  67    7. Replaces escape sequences within character strings with their
  68       single character equivalents (we do this in step 5, because we
  69       don't have to worry about the #include argument).
  70
  71    8. Concatenates adjacent string literals to form single string
  72       literals (because we don't expand macros, there are a few things
  73       we will miss).
  74
  75    9. Converts the remaining preprocessing tokens to C tokens and
  76       discards any white space from the translation unit.
  77
  78    This lexer implements the above, and presents the scanner (in
  79    xgettext.c) with a stream of C tokens.  The comments are
  80    accumulated in a buffer, and given to xgettext when asked for.  */
  81
  82
  83 /* ========================= Lexer customization.  ========================= */
  84
  85 static bool trigraphs = false;
  86
  87 void
  88 x_c_trigraphs ()
  89 {
  90   trigraphs = true;
  91 }
  92
  93
  94 /* ====================== Keyword set customization.  ====================== */
  95
  96 /* If true extract all strings.  */
  97 static bool extract_all = false;
  98
  99 static hash_table c_keywords;
 100 static hash_table objc_keywords;
 101 static bool default_keywords = true;
 102
 103
 104 void
 105 x_c_extract_all ()
 106 {
 107   extract_all = true;
 108 }
 109
 110
 111 static void
 112 add_keyword (const char *name, hash_table *keywords)
 113 {
 114   if (name == NULL)
 115     default_keywords = false;
 116   else
 117     {
 118       const char *end;
 119       struct callshape shape;
 120       const char *colon;
 121
 122       if (keywords->table == NULL)
 123         hash_init (keywords, 100);
 124
 125       split_keywordspec (name, &end, &shape);
 126
 127       /* The characters between name and end should form a valid C identifier.
 128          A colon means an invalid parse in split_keywordspec().  */
 129       colon = strchr (name, ':');
 130       if (colon == NULL || colon >= end)
 131         insert_keyword_callshape (keywords, name, end - name, &shape);
 132     }
 133 }
 134
 135 void
 136 x_c_keyword (const char *name)
 137 {
 138   add_keyword (name, &c_keywords);
 139 }
 140
 141 void
 142 x_objc_keyword (const char *name)
 143 {
 144   add_keyword (name, &objc_keywords);
 145 }
 146
 147 static bool additional_keywords_kde;
 148
 149 void
 150 activate_additional_keywords_kde ()
 151 {
 152   additional_keywords_kde = true;
 153 }
 154
 155 /* Finish initializing the keywords hash tables.
 156    Called after argument processing, before each file is processed.  */
 157 static void
 158 init_keywords ()
 159 {
 160   if (default_keywords)
 161     {
 162       /* When adding new keywords here, also update the documentation in
 163          xgettext.texi!  */
 164       x_c_keyword ("gettext");
 165       x_c_keyword ("dgettext:2");
 166       x_c_keyword ("dcgettext:2");
 167       x_c_keyword ("ngettext:1,2");
 168       x_c_keyword ("dngettext:2,3");
 169       x_c_keyword ("dcngettext:2,3");
 170       x_c_keyword ("gettext_noop");
 171       x_c_keyword ("pgettext:1c,2");
 172       x_c_keyword ("dpgettext:2c,3");
 173       x_c_keyword ("dcpgettext:2c,3");
 174       x_c_keyword ("npgettext:1c,2,3");
 175       x_c_keyword ("dnpgettext:2c,3,4");
 176       x_c_keyword ("dcnpgettext:2c,3,4");
 177
 178       if (additional_keywords_kde)
 179         {
 180           x_c_keyword ("i18n:1");
 181           x_c_keyword ("i18nc:1c,2");
 182           x_c_keyword ("i18np:1,2");
 183           x_c_keyword ("i18ncp:1c,2,3");
 184           x_c_keyword ("i18nd:2");
 185           x_c_keyword ("i18ndc:2c,3");
 186           x_c_keyword ("i18ndp:2,3");
 187           x_c_keyword ("i18ndcp:2c,3,4");
 188           x_c_keyword ("ki18n:1");
 189           x_c_keyword ("ki18nc:1c,2");
 190           x_c_keyword ("ki18np:1,2");
 191           x_c_keyword ("ki18ncp:1c,2,3");
 192           x_c_keyword ("ki18nd:2");
 193           x_c_keyword ("ki18ndc:2c,3");
 194           x_c_keyword ("ki18ndp:2,3");
 195           x_c_keyword ("ki18ndcp:2c,3,4");
 196           x_c_keyword ("I18N_NOOP:1");
 197           x_c_keyword ("I18NC_NOOP:1c,2");
 198           x_c_keyword ("I18N_NOOP2:1c,2");
 199           x_c_keyword ("I18N_NOOP2_NOSTRIP:1c,2");
 200           x_c_keyword ("xi18n:1");
 201           x_c_keyword ("xi18nc:1c,2");
 202           x_c_keyword ("xi18np:1,2");
 203           x_c_keyword ("xi18ncp:1c,2,3");
 204           x_c_keyword ("xi18nd:2");
 205           x_c_keyword ("xi18ndc:2c,3");
 206           x_c_keyword ("xi18ndp:2,3");
 207           x_c_keyword ("xi18ndcp:2c,3,4");
 208           x_c_keyword ("kxi18n:1");
 209           x_c_keyword ("kxi18nc:1c,2");
 210           x_c_keyword ("kxi18np:1,2");
 211           x_c_keyword ("kxi18ncp:1c,2,3");
 212           x_c_keyword ("kxi18nd:2");
 213           x_c_keyword ("kxi18ndc:2c,3");
 214           x_c_keyword ("kxi18ndp:2,3");
 215           x_c_keyword ("kxi18ndcp:2c,3,4");
 216           x_c_keyword ("XI18N_NOOP:1");
 217           x_c_keyword ("XI18NC_NOOP:1c,2");
 218           x_c_keyword ("XI18N_NOOP2:1c,2");
 219           x_c_keyword ("XI18N_NOOP2_NOSTRIP:1c,2");
 220         }
 221
 222       x_objc_keyword ("gettext");
 223       x_objc_keyword ("dgettext:2");
 224       x_objc_keyword ("dcgettext:2");
 225       x_objc_keyword ("ngettext:1,2");
 226       x_objc_keyword ("dngettext:2,3");
 227       x_objc_keyword ("dcngettext:2,3");
 228       x_objc_keyword ("gettext_noop");
 229       x_objc_keyword ("pgettext:1c,2");
 230       x_objc_keyword ("dpgettext:2c,3");
 231       x_objc_keyword ("dcpgettext:2c,3");
 232       x_objc_keyword ("npgettext:1c,2,3");
 233       x_objc_keyword ("dnpgettext:2c,3,4");
 234       x_objc_keyword ("dcnpgettext:2c,3,4");
 235       x_objc_keyword ("NSLocalizedString");       /* similar to gettext */
 236       x_objc_keyword ("_");                       /* similar to gettext */
 237       x_objc_keyword ("NSLocalizedStaticString"); /* similar to gettext_noop */
 238       x_objc_keyword ("__");                      /* similar to gettext_noop */
 239
 240       default_keywords = false;
 241     }
 242 }
 243
 244 void
 245 init_flag_table_c ()
 246 {
 247   xgettext_record_flag ("gettext:1:pass-c-format");
 248   xgettext_record_flag ("dgettext:2:pass-c-format");
 249   xgettext_record_flag ("dcgettext:2:pass-c-format");
 250   xgettext_record_flag ("ngettext:1:pass-c-format");
 251   xgettext_record_flag ("ngettext:2:pass-c-format");
 252   xgettext_record_flag ("dngettext:2:pass-c-format");
 253   xgettext_record_flag ("dngettext:3:pass-c-format");
 254   xgettext_record_flag ("dcngettext:2:pass-c-format");
 255   xgettext_record_flag ("dcngettext:3:pass-c-format");
 256   xgettext_record_flag ("gettext_noop:1:pass-c-format");
 257   xgettext_record_flag ("pgettext:2:pass-c-format");
 258   xgettext_record_flag ("dpgettext:3:pass-c-format");
 259   xgettext_record_flag ("dcpgettext:3:pass-c-format");
 260   xgettext_record_flag ("npgettext:2:pass-c-format");
 261   xgettext_record_flag ("npgettext:3:pass-c-format");
 262   xgettext_record_flag ("dnpgettext:3:pass-c-format");
 263   xgettext_record_flag ("dnpgettext:4:pass-c-format");
 264   xgettext_record_flag ("dcnpgettext:3:pass-c-format");
 265   xgettext_record_flag ("dcnpgettext:4:pass-c-format");
 266
 267   /* <stdio.h> */
 268   xgettext_record_flag ("fprintf:2:c-format");
 269   xgettext_record_flag ("vfprintf:2:c-format");
 270   xgettext_record_flag ("printf:1:c-format");
 271   xgettext_record_flag ("vprintf:1:c-format");
 272   xgettext_record_flag ("sprintf:2:c-format");
 273   xgettext_record_flag ("vsprintf:2:c-format");
 274   xgettext_record_flag ("snprintf:3:c-format");
 275   xgettext_record_flag ("vsnprintf:3:c-format");
 276 #if 0 /* These functions are not standard.  */
 277   /* <stdio.h> */
 278   xgettext_record_flag ("asprintf:2:c-format");
 279   xgettext_record_flag ("vasprintf:2:c-format");
 280   xgettext_record_flag ("dprintf:2:c-format");
 281   xgettext_record_flag ("vdprintf:2:c-format");
 282   xgettext_record_flag ("obstack_printf:2:c-format");
 283   xgettext_record_flag ("obstack_vprintf:2:c-format");
 284   /* <error.h> */
 285   xgettext_record_flag ("error:3:c-format");
 286   xgettext_record_flag ("error_at_line:5:c-format");
 287   /* <argp.h> */
 288   xgettext_record_flag ("argp_error:2:c-format");
 289   xgettext_record_flag ("argp_failure:2:c-format");
 290 #endif
 291
 292   xgettext_record_flag ("gettext:1:pass-qt-format");
 293   xgettext_record_flag ("dgettext:2:pass-qt-format");
 294   xgettext_record_flag ("dcgettext:2:pass-qt-format");
 295   xgettext_record_flag ("ngettext:1:pass-qt-format");
 296   xgettext_record_flag ("ngettext:2:pass-qt-format");
 297   xgettext_record_flag ("dngettext:2:pass-qt-format");
 298   xgettext_record_flag ("dngettext:3:pass-qt-format");
 299   xgettext_record_flag ("dcngettext:2:pass-qt-format");
 300   xgettext_record_flag ("dcngettext:3:pass-qt-format");
 301   xgettext_record_flag ("gettext_noop:1:pass-qt-format");
 302   xgettext_record_flag ("pgettext:2:pass-qt-format");
 303   xgettext_record_flag ("dpgettext:3:pass-qt-format");
 304   xgettext_record_flag ("dcpgettext:3:pass-qt-format");
 305   xgettext_record_flag ("npgettext:2:pass-qt-format");
 306   xgettext_record_flag ("npgettext:3:pass-qt-format");
 307   xgettext_record_flag ("dnpgettext:3:pass-qt-format");
 308   xgettext_record_flag ("dnpgettext:4:pass-qt-format");
 309   xgettext_record_flag ("dcnpgettext:3:pass-qt-format");
 310   xgettext_record_flag ("dcnpgettext:4:pass-qt-format");
 311
 312   xgettext_record_flag ("gettext:1:pass-kde-format");
 313   xgettext_record_flag ("dgettext:2:pass-kde-format");
 314   xgettext_record_flag ("dcgettext:2:pass-kde-format");
 315   xgettext_record_flag ("ngettext:1:pass-kde-format");
 316   xgettext_record_flag ("ngettext:2:pass-kde-format");
 317   xgettext_record_flag ("dngettext:2:pass-kde-format");
 318   xgettext_record_flag ("dngettext:3:pass-kde-format");
 319   xgettext_record_flag ("dcngettext:2:pass-kde-format");
 320   xgettext_record_flag ("dcngettext:3:pass-kde-format");
 321   xgettext_record_flag ("gettext_noop:1:pass-kde-format");
 322   xgettext_record_flag ("pgettext:2:pass-kde-format");
 323   xgettext_record_flag ("dpgettext:3:pass-kde-format");
 324   xgettext_record_flag ("dcpgettext:3:pass-kde-format");
 325   xgettext_record_flag ("npgettext:2:pass-kde-format");
 326   xgettext_record_flag ("npgettext:3:pass-kde-format");
 327   xgettext_record_flag ("dnpgettext:3:pass-kde-format");
 328   xgettext_record_flag ("dnpgettext:4:pass-kde-format");
 329   xgettext_record_flag ("dcnpgettext:3:pass-kde-format");
 330   xgettext_record_flag ("dcnpgettext:4:pass-kde-format");
 331
 332   xgettext_record_flag ("gettext:1:pass-boost-format");
 333   xgettext_record_flag ("dgettext:2:pass-boost-format");
 334   xgettext_record_flag ("dcgettext:2:pass-boost-format");
 335   xgettext_record_flag ("ngettext:1:pass-boost-format");
 336   xgettext_record_flag ("ngettext:2:pass-boost-format");
 337   xgettext_record_flag ("dngettext:2:pass-boost-format");
 338   xgettext_record_flag ("dngettext:3:pass-boost-format");
 339   xgettext_record_flag ("dcngettext:2:pass-boost-format");
 340   xgettext_record_flag ("dcngettext:3:pass-boost-format");
 341   xgettext_record_flag ("gettext_noop:1:pass-boost-format");
 342   xgettext_record_flag ("pgettext:2:pass-boost-format");
 343   xgettext_record_flag ("dpgettext:3:pass-boost-format");
 344   xgettext_record_flag ("dcpgettext:3:pass-boost-format");
 345   xgettext_record_flag ("npgettext:2:pass-boost-format");
 346   xgettext_record_flag ("npgettext:3:pass-boost-format");
 347   xgettext_record_flag ("dnpgettext:3:pass-boost-format");
 348   xgettext_record_flag ("dnpgettext:4:pass-boost-format");
 349   xgettext_record_flag ("dcnpgettext:3:pass-boost-format");
 350   xgettext_record_flag ("dcnpgettext:4:pass-boost-format");
 351
 352   /* <boost/format.hpp> */
 353   xgettext_record_flag ("format:1:boost-format");
 354 }
 355
 356 void
 357 init_flag_table_objc ()
 358 {
 359   /* Since the settings done in init_flag_table_c() also have an effect for
 360      the ObjectiveC parser, we don't have to repeat them here.  */
 361   xgettext_record_flag ("gettext:1:pass-objc-format");
 362   xgettext_record_flag ("dgettext:2:pass-objc-format");
 363   xgettext_record_flag ("dcgettext:2:pass-objc-format");
 364   xgettext_record_flag ("ngettext:1:pass-objc-format");
 365   xgettext_record_flag ("ngettext:2:pass-objc-format");
 366   xgettext_record_flag ("dngettext:2:pass-objc-format");
 367   xgettext_record_flag ("dngettext:3:pass-objc-format");
 368   xgettext_record_flag ("dcngettext:2:pass-objc-format");
 369   xgettext_record_flag ("dcngettext:3:pass-objc-format");
 370   xgettext_record_flag ("gettext_noop:1:pass-objc-format");
 371   xgettext_record_flag ("pgettext:2:pass-objc-format");
 372   xgettext_record_flag ("dpgettext:3:pass-objc-format");
 373   xgettext_record_flag ("dcpgettext:3:pass-objc-format");
 374   xgettext_record_flag ("npgettext:2:pass-objc-format");
 375   xgettext_record_flag ("npgettext:3:pass-objc-format");
 376   xgettext_record_flag ("dnpgettext:3:pass-objc-format");
 377   xgettext_record_flag ("dnpgettext:4:pass-objc-format");
 378   xgettext_record_flag ("dcnpgettext:3:pass-objc-format");
 379   xgettext_record_flag ("dcnpgettext:4:pass-objc-format");
 380   xgettext_record_flag ("NSLocalizedString:1:pass-c-format");
 381   xgettext_record_flag ("NSLocalizedString:1:pass-objc-format");
 382   xgettext_record_flag ("_:1:pass-c-format");
 383   xgettext_record_flag ("_:1:pass-objc-format");
 384   xgettext_record_flag ("stringWithFormat::1:objc-format");
 385   xgettext_record_flag ("initWithFormat::1:objc-format");
 386   xgettext_record_flag ("stringByAppendingFormat::1:objc-format");
 387   xgettext_record_flag ("localizedStringWithFormat::1:objc-format");
 388   xgettext_record_flag ("appendFormat::1:objc-format");
 389 }
 390
 391 void
 392 init_flag_table_gcc_internal ()
 393 {
 394   xgettext_record_flag ("gettext:1:pass-gcc-internal-format");
 395   xgettext_record_flag ("dgettext:2:pass-gcc-internal-format");
 396   xgettext_record_flag ("dcgettext:2:pass-gcc-internal-format");
 397   xgettext_record_flag ("ngettext:1:pass-gcc-internal-format");
 398   xgettext_record_flag ("ngettext:2:pass-gcc-internal-format");
 399   xgettext_record_flag ("dngettext:2:pass-gcc-internal-format");
 400   xgettext_record_flag ("dngettext:3:pass-gcc-internal-format");
 401   xgettext_record_flag ("dcngettext:2:pass-gcc-internal-format");
 402   xgettext_record_flag ("dcngettext:3:pass-gcc-internal-format");
 403   xgettext_record_flag ("gettext_noop:1:pass-gcc-internal-format");
 404   xgettext_record_flag ("pgettext:2:pass-gcc-internal-format");
 405   xgettext_record_flag ("dpgettext:3:pass-gcc-internal-format");
 406   xgettext_record_flag ("dcpgettext:3:pass-gcc-internal-format");
 407   xgettext_record_flag ("npgettext:2:pass-gcc-internal-format");
 408   xgettext_record_flag ("npgettext:3:pass-gcc-internal-format");
 409   xgettext_record_flag ("dnpgettext:3:pass-gcc-internal-format");
 410   xgettext_record_flag ("dnpgettext:4:pass-gcc-internal-format");
 411   xgettext_record_flag ("dcnpgettext:3:pass-gcc-internal-format");
 412   xgettext_record_flag ("dcnpgettext:4:pass-gcc-internal-format");
 413 #if 0 /* This should better be done inside GCC.  */
 414   /* grepping for ATTRIBUTE_PRINTF in gcc-3.3/gcc/?*.h */
 415   /* c-format.c */
 416   xgettext_record_flag ("status_warning:2:gcc-internal-format");
 417   /* c-tree.h */
 418   xgettext_record_flag ("pedwarn_c99:1:pass-gcc-internal-format");
 419   /* collect2.h */
 420   //xgettext_record_flag ("error:1:c-format"); // 3 different versions
 421   xgettext_record_flag ("notice:1:c-format");
 422   //xgettext_record_flag ("fatal:1:c-format"); // 2 different versions
 423   xgettext_record_flag ("fatal_perror:1:c-format");
 424   /* cpplib.h */
 425   xgettext_record_flag ("cpp_error:3:c-format");
 426   xgettext_record_flag ("cpp_error_with_line:5:c-format");
 427   /* diagnostic.h */
 428   xgettext_record_flag ("diagnostic_set_info:2:pass-gcc-internal-format");
 429   xgettext_record_flag ("output_printf:2:gcc-internal-format");
 430   xgettext_record_flag ("output_verbatim:2:pass-gcc-internal-format");
 431   xgettext_record_flag ("verbatim:1:gcc-internal-format");
 432   xgettext_record_flag ("inform:1:pass-gcc-internal-format");
 433   /* gcc.h */
 434   //xgettext_record_flag ("fatal:1:c-format"); // 2 different versions
 435   //xgettext_record_flag ("error:1:c-format"); // 3 different versions
 436   /* genattrtab.h */
 437   xgettext_record_flag ("attr_printf:2:pass-c-format");
 438   /* gengtype.h */
 439   xgettext_record_flag ("error_at_line:2:pass-c-format");
 440   xgettext_record_flag ("xvasprintf:2:pass-c-format");
 441   xgettext_record_flag ("xasprintf:1:pass-c-format");
 442   xgettext_record_flag ("oprintf:2:pass-c-format");
 443   /* gensupport.h */
 444   xgettext_record_flag ("message_with_line:2:pass-c-format");
 445   /* output.h */
 446   xgettext_record_flag ("output_operand_lossage:1:c-format");
 447   /* ra.h */
 448    xgettext_record_flag ("ra_debug_msg:2:pass-c-format");
 449   /* toplev.h */
 450   xgettext_record_flag ("fnotice:2:c-format");
 451   xgettext_record_flag ("fatal_io_error:2:gcc-internal-format");
 452   xgettext_record_flag ("error_for_asm:2:pass-gcc-internal-format");
 453   xgettext_record_flag ("warning_for_asm:2:pass-gcc-internal-format");
 454   xgettext_record_flag ("error_with_file_and_line:3:pass-gcc-internal-format");
 455   xgettext_record_flag ("error_with_decl:2:pass-gcc-internal-format");
 456   xgettext_record_flag ("pedwarn:1:gcc-internal-format");
 457   xgettext_record_flag ("pedwarn_with_file_and_line:3:gcc-internal-format");
 458   xgettext_record_flag ("pedwarn_with_decl:2:gcc-internal-format");
 459   xgettext_record_flag ("sorry:1:gcc-internal-format");
 460   xgettext_record_flag ("error:1:pass-gcc-internal-format");
 461   xgettext_record_flag ("fatal_error:1:pass-gcc-internal-format");
 462   xgettext_record_flag ("internal_error:1:pass-gcc-internal-format");
 463   xgettext_record_flag ("warning:1:pass-gcc-internal-format");
 464   xgettext_record_flag ("warning_with_file_and_line:3:pass-gcc-internal-format");
 465   xgettext_record_flag ("warning_with_decl:2:pass-gcc-internal-format");
 466   /* f/com.h */
 467   xgettext_record_flag ("ffecom_get_invented_identifier:1:pass-c-format");
 468   /* f/sts.h */
 469   xgettext_record_flag ("ffests_printf:2:pass-c-format");
 470   /* java/java-tree.h */
 471   xgettext_record_flag ("parse_error_context:2:pass-c-format");
 472 #endif
 473
 474   xgettext_record_flag ("gettext:1:pass-gfc-internal-format");
 475   xgettext_record_flag ("dgettext:2:pass-gfc-internal-format");
 476   xgettext_record_flag ("dcgettext:2:pass-gfc-internal-format");
 477   xgettext_record_flag ("ngettext:1:pass-gfc-internal-format");
 478   xgettext_record_flag ("ngettext:2:pass-gfc-internal-format");
 479   xgettext_record_flag ("dngettext:2:pass-gfc-internal-format");
 480   xgettext_record_flag ("dngettext:3:pass-gfc-internal-format");
 481   xgettext_record_flag ("dcngettext:2:pass-gfc-internal-format");
 482   xgettext_record_flag ("dcngettext:3:pass-gfc-internal-format");
 483   xgettext_record_flag ("gettext_noop:1:pass-gfc-internal-format");
 484   xgettext_record_flag ("pgettext:2:pass-gfc-internal-format");
 485   xgettext_record_flag ("dpgettext:3:pass-gfc-internal-format");
 486   xgettext_record_flag ("dcpgettext:3:pass-gfc-internal-format");
 487   xgettext_record_flag ("npgettext:2:pass-gfc-internal-format");
 488   xgettext_record_flag ("npgettext:3:pass-gfc-internal-format");
 489   xgettext_record_flag ("dnpgettext:3:pass-gfc-internal-format");
 490   xgettext_record_flag ("dnpgettext:4:pass-gfc-internal-format");
 491   xgettext_record_flag ("dcnpgettext:3:pass-gfc-internal-format");
 492   xgettext_record_flag ("dcnpgettext:4:pass-gfc-internal-format");
 493 #if 0 /* This should better be done inside GCC.  */
 494   /* fortran/error.c */
 495   xgettext_record_flag ("gfc_error:1:gfc-internal-format");
 496   xgettext_record_flag ("gfc_error_now:1:gfc-internal-format");
 497   xgettext_record_flag ("gfc_fatal_error:1:gfc-internal-format");
 498   xgettext_record_flag ("gfc_internal_error:1:gfc-internal-format");
 499   xgettext_record_flag ("gfc_notify_std:2:gfc-internal-format");
 500   xgettext_record_flag ("gfc_warning:1:gfc-internal-format");
 501   xgettext_record_flag ("gfc_warning_now:1:gfc-internal-format");
 502 #endif
 503 }
 504
 505 void
 506 init_flag_table_kde ()
 507 {
 508   xgettext_record_flag ("i18n:1:kde-format");
 509   xgettext_record_flag ("i18nc:2:kde-format");
 510   xgettext_record_flag ("i18np:1:kde-format");
 511   xgettext_record_flag ("i18ncp:2:kde-format");
 512   xgettext_record_flag ("i18nd:2:kde-format");
 513   xgettext_record_flag ("i18ndc:3:kde-format");
 514   xgettext_record_flag ("i18ndp:2:kde-format");
 515   xgettext_record_flag ("i18ndcp:3:kde-format");
 516   xgettext_record_flag ("ki18n:1:kde-format");
 517   xgettext_record_flag ("ki18nc:2:kde-format");
 518   xgettext_record_flag ("ki18np:1:kde-format");
 519   xgettext_record_flag ("ki18ncp:2:kde-format");
 520   xgettext_record_flag ("ki18nd:2:kde-format");
 521   xgettext_record_flag ("ki18ndc:3:kde-format");
 522   xgettext_record_flag ("ki18ndp:2:kde-format");
 523   xgettext_record_flag ("ki18ndcp:3:kde-format");
 524   xgettext_record_flag ("I18N_NOOP:1:kde-format");
 525   xgettext_record_flag ("I18NC_NOOP:2:kde-format");
 526   xgettext_record_flag ("I18N_NOOP2:2:kde-format");
 527   xgettext_record_flag ("I18N_NOOP2_NOSTRIP:2:kde-format");
 528   xgettext_record_flag ("xi18n:1:kde-kuit-format");
 529   xgettext_record_flag ("xi18nc:2:kde-kuit-format");
 530   xgettext_record_flag ("xi18np:1:kde-kuit-format");
 531   xgettext_record_flag ("xi18ncp:2:kde-kuit-format");
 532   xgettext_record_flag ("xi18nd:2:kde-kuit-format");
 533   xgettext_record_flag ("xi18ndc:3:kde-kuit-format");
 534   xgettext_record_flag ("xi18ndp:2:kde-kuit-format");
 535   xgettext_record_flag ("xi18ndcp:3:kde-kuit-format");
 536   xgettext_record_flag ("kxi18n:1:kde-kuit-format");
 537   xgettext_record_flag ("kxi18nc:2:kde-kuit-format");
 538   xgettext_record_flag ("kxi18np:1:kde-kuit-format");
 539   xgettext_record_flag ("kxi18ncp:2:kde-kuit-format");
 540   xgettext_record_flag ("kxi18nd:2:kde-kuit-format");
 541   xgettext_record_flag ("kxi18ndc:3:kde-kuit-format");
 542   xgettext_record_flag ("kxi18ndp:2:kde-kuit-format");
 543   xgettext_record_flag ("kxi18ndcp:3:kde-kuit-format");
 544   xgettext_record_flag ("XI18N_NOOP:1:kde-kuit-format");
 545   xgettext_record_flag ("XI18NC_NOOP:2:kde-kuit-format");
 546   xgettext_record_flag ("XI18N_NOOP2:2:kde-kuit-format");
 547   xgettext_record_flag ("XI18N_NOOP2_NOSTRIP:2:kde-kuit-format");
 548 }
 549
 550 /* ======================== Reading of characters.  ======================== */
 551
 552 /* Real filename, used in error messages about the input file.  */
 553 static const char *real_file_name;
 554
 555 /* Logical filename and line number, used to label the extracted messages.  */
 556 static char *logical_file_name;
 557 static int line_number;
 558
 559 /* The input file stream.  */
 560 static FILE *fp;
 561
 562
 563 /* 0. Terminate line by \n, regardless whether the external representation of
 564    a line terminator is LF (Unix), CR (Mac) or CR/LF (DOS/Windows).
 565    It is debatable whether supporting CR/LF line terminators in C sources
 566    on Unix is ISO C or POSIX compliant, but since GCC 3.3 now supports it
 567    unconditionally, it must be OK.
 568    The so-called "text mode" in stdio on DOS/Windows translates CR/LF to \n
 569    automatically, but here we also need this conversion on Unix.  As a side
 570    effect, on DOS/Windows we also parse CR/CR/LF into a single \n, but this
 571    is not a problem.  */
 572
 573
 574 static int
 575 phase0_getc ()
 576 {
 577   int c;
 578
 579   c = getc (fp);
 580   if (c == EOF)
 581     {
 582       if (ferror (fp))
 583         error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
 584                real_file_name);
 585       return EOF;
 586     }
 587
 588   if (c == '\r')
 589     {
 590       int c1 = getc (fp);
 591
 592       if (c1 != EOF && c1 != '\n')
 593         ungetc (c1, fp);
 594
 595       /* Seen line terminator CR or CR/LF.  */
 596       return '\n';
 597     }
 598
 599   return c;
 600 }
 601
 602
 603 /* Supports only one pushback character, and not '\n'.  */
 604 static inline void
 605 phase0_ungetc (int c)
 606 {
 607   if (c != EOF)
 608     ungetc (c, fp);
 609 }
 610
 611
 612 /* 1. line_number handling.  Combine backslash-newline to nothing.  */
 613
 614 static unsigned char phase1_pushback[2];
 615 static int phase1_pushback_length;
 616
 617
 618 static int
 619 phase1_getc ()
 620 {
 621   int c;
 622
 623   if (phase1_pushback_length)
 624     {
 625       c = phase1_pushback[--phase1_pushback_length];
 626       if (c == '\n')
 627         ++line_number;
 628       return c;
 629     }
 630   for (;;)
 631     {
 632       c = phase0_getc ();
 633       switch (c)
 634         {
 635         case '\n':
 636           ++line_number;
 637           return '\n';
 638
 639         case '\\':
 640           c = phase0_getc ();
 641           if (c != '\n')
 642             {
 643               phase0_ungetc (c);
 644               return '\\';
 645             }
 646           ++line_number;
 647           break;
 648
 649         default:
 650           return c;
 651         }
 652     }
 653 }
 654
 655
 656 /* Supports 2 characters of pushback.  */
 657 static void
 658 phase1_ungetc (int c)
 659 {
 660   switch (c)
 661     {
 662     case EOF:
 663       break;
 664
 665     case '\n':
 666       --line_number;
 667       /* FALLTHROUGH */
 668
 669     default:
 670       if (phase1_pushback_length == SIZEOF (phase1_pushback))
 671         abort ();
 672       phase1_pushback[phase1_pushback_length++] = c;
 673       break;
 674     }
 675 }
 676
 677
 678 /* 2. Convert trigraphs to their single character equivalents.  Most
 679    sane human beings vomit copiously at the mention of trigraphs, which
 680    is why they are an option.  */
 681
 682 static unsigned char phase2_pushback[1];
 683 static int phase2_pushback_length;
 684
 685
 686 static int
 687 phase2_getc ()
 688 {
 689   int c;
 690
 691   if (phase2_pushback_length)
 692     return phase2_pushback[--phase2_pushback_length];
 693   if (!trigraphs)
 694     return phase1_getc ();
 695
 696   c = phase1_getc ();
 697   if (c != '?')
 698     return c;
 699   c = phase1_getc ();
 700   if (c != '?')
 701     {
 702       phase1_ungetc (c);
 703       return '?';
 704     }
 705   c = phase1_getc ();
 706   switch (c)
 707     {
 708     case '(':
 709       return '[';
 710     case '/':
 711       return '\\';
 712     case ')':
 713       return ']';
 714     case '\'':
 715       return '^';
 716     case '<':
 717       return '{';
 718     case '!':
 719       return '|';
 720     case '>':
 721       return '}';
 722     case '-':
 723       return '~';
 724     case '#':
 725       return '=';
 726     }
 727   phase1_ungetc (c);
 728   phase1_ungetc ('?');
 729   return '?';
 730 }
 731
 732
 733 /* Supports only one pushback character.  */
 734 static void
 735 phase2_ungetc (int c)
 736 {
 737   if (c != EOF)
 738     {
 739       if (phase2_pushback_length == SIZEOF (phase2_pushback))
 740         abort ();
 741       phase2_pushback[phase2_pushback_length++] = c;
 742     }
 743 }
 744
 745
 746 /* 3. Concatenate each line ending in backslash (\) with the following
 747    line.  Basically, all you need to do is elide "\\\n" sequences from
 748    the input.  */
 749
 750 static unsigned char phase3_pushback[2];
 751 static int phase3_pushback_length;
 752
 753
 754 static int
 755 phase3_getc ()
 756 {
 757   if (phase3_pushback_length)
 758     return phase3_pushback[--phase3_pushback_length];
 759   for (;;)
 760     {
 761       int c = phase2_getc ();
 762       if (c != '\\')
 763         return c;
 764       c = phase2_getc ();
 765       if (c != '\n')
 766         {
 767           phase2_ungetc (c);
 768           return '\\';
 769         }
 770     }
 771 }
 772
 773
 774 /* Supports 2 characters of pushback.  */
 775 static void
 776 phase3_ungetc (int c)
 777 {
 778   if (c != EOF)
 779     {
 780       if (phase3_pushback_length == SIZEOF (phase3_pushback))
 781         abort ();
 782       phase3_pushback[phase3_pushback_length++] = c;
 783     }
 784 }
 785
 786
 787 /* Accumulating comments.  */
 788
 789 static char *buffer;
 790 static size_t bufmax;
 791 static size_t buflen;
 792
 793 static inline void
 794 comment_start ()
 795 {
 796   buflen = 0;
 797 }
 798
 799 static inline void
 800 comment_add (int c)
 801 {
 802   if (buflen >= bufmax)
 803     {
 804       bufmax = 2 * bufmax + 10;
 805       buffer = xrealloc (buffer, bufmax);
 806     }
 807   buffer[buflen++] = c;
 808 }
 809
 810 static inline void
 811 comment_line_end (size_t chars_to_remove)
 812 {
 813   buflen -= chars_to_remove;
 814   while (buflen >= 1
 815          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 816     --buflen;
 817   if (chars_to_remove == 0 && buflen >= bufmax)
 818     {
 819       bufmax = 2 * bufmax + 10;
 820       buffer = xrealloc (buffer, bufmax);
 821     }
 822   buffer[buflen] = '\0';
 823   savable_comment_add (buffer);
 824 }
 825
 826
 827 /* These are for tracking whether comments count as immediately before
 828    keyword.  */
 829 static int last_comment_line;
 830 static int last_non_comment_line;
 831 static int newline_count;
 832
 833
 834 /* 4. Replace each comment that is not inside a character constant or
 835    string literal with a space character.  We need to remember the
 836    comment for later, because it may be attached to a keyword string.
 837    We also optionally understand C++ comments.  */
 838
 839 static int
 840 phase4_getc ()
 841 {
 842   int c;
 843   bool last_was_star;
 844
 845   c = phase3_getc ();
 846   if (c != '/')
 847     return c;
 848   c = phase3_getc ();
 849   switch (c)
 850     {
 851     default:
 852       phase3_ungetc (c);
 853       return '/';
 854
 855     case '*':
 856       /* C comment.  */
 857       comment_start ();
 858       last_was_star = false;
 859       for (;;)
 860         {
 861           c = phase3_getc ();
 862           if (c == EOF)
 863             break;
 864           /* We skip all leading white space, but not EOLs.  */
 865           if (!(buflen == 0 && (c == ' ' || c == '\t')))
 866             comment_add (c);
 867           switch (c)
 868             {
 869             case '\n':
 870               comment_line_end (1);
 871               comment_start ();
 872               last_was_star = false;
 873               continue;
 874
 875             case '*':
 876               last_was_star = true;
 877               continue;
 878
 879             case '/':
 880               if (last_was_star)
 881                 {
 882                   comment_line_end (2);
 883                   break;
 884                 }
 885               /* FALLTHROUGH */
 886
 887             default:
 888               last_was_star = false;
 889               continue;
 890             }
 891           break;
 892         }
 893       last_comment_line = newline_count;
 894       return ' ';
 895
 896     case '/':
 897       /* C++ or ISO C 99 comment.  */
 898       comment_start ();
 899       for (;;)
 900         {
 901           c = phase3_getc ();
 902           if (c == '\n' || c == EOF)
 903             break;
 904           /* We skip all leading white space, but not EOLs.  */
 905           if (!(buflen == 0 && (c == ' ' || c == '\t')))
 906             comment_add (c);
 907         }
 908       comment_line_end (0);
 909       last_comment_line = newline_count;
 910       return '\n';
 911     }
 912 }
 913
 914
 915 /* Supports only one pushback character.  */
 916 static void
 917 phase4_ungetc (int c)
 918 {
 919   phase3_ungetc (c);
 920 }
 921
 922
 923 /* ========================== Reading of tokens.  ========================== */
 924
 925
 926 /* True if ObjectiveC extensions are recognized.  */
 927 static bool objc_extensions;
 928
 929 /* True if C++ extensions are recognized.  */
 930 static bool cxx_extensions;
 931
 932 enum token_type_ty
 933 {
 934   token_type_character_constant,        /* 'x' */
 935   token_type_eof,
 936   token_type_eoln,
 937   token_type_hash,                      /* # */
 938   token_type_lparen,                    /* ( */
 939   token_type_rparen,                    /* ) */
 940   token_type_comma,                     /* , */
 941   token_type_colon,                     /* : */
 942   token_type_name,                      /* abc */
 943   token_type_number,                    /* 2.7 */
 944   token_type_string_literal,            /* "abc" */
 945   token_type_symbol,                    /* < > = etc. */
 946   token_type_objc_special,              /* @ */
 947   token_type_white_space
 948 };
 949 typedef enum token_type_ty token_type_ty;
 950
 951 typedef struct token_ty token_ty;
 952 struct token_ty
 953 {
 954   token_type_ty type;
 955   char *string;         /* for token_type_name, token_type_string_literal */
 956   refcounted_string_list_ty *comment;   /* for token_type_string_literal,
 957                                            token_type_objc_special */
 958   enum literalstring_escape_type escape; /* for token_type_string_literal */
 959   long number;
 960   int line_number;
 961 };
 962
 963
 964 /* Free the memory pointed to by a 'struct token_ty'.  */
 965 static inline void
 966 free_token (token_ty *tp)
 967 {
 968   if (tp->type == token_type_name || tp->type == token_type_string_literal)
 969     free (tp->string);
 970   if (tp->type == token_type_string_literal
 971       || tp->type == token_type_objc_special)
 972     drop_reference (tp->comment);
 973 }
 974
 975
 976 static char *
 977 literalstring_parse (const char *string, lex_pos_ty *pos,
 978                      enum literalstring_escape_type type)
 979 {
 980   struct mixed_string_buffer *bp;
 981   const char *p;
 982
 983   /* Start accumulating the string.  */
 984   bp = mixed_string_buffer_alloc (lc_string,
 985                                   logical_file_name,
 986                                   line_number);
 987
 988   for (p = string; ; )
 989     {
 990       int c = *p++;
 991
 992       if (c == '\0')
 993         break;
 994
 995       if (c != '\\')
 996         {
 997           mixed_string_buffer_append_char (bp, c);
 998           continue;
 999         }
1000
1001       if (!(type & LET_ANSI_C) && !(type & LET_UNICODE))
1002         {
1003           mixed_string_buffer_append_char (bp, '\\');
1004           continue;
1005         }
1006
1007       c = *p++;
1008       if (c == '\0')
1009         break;
1010
1011       if (type & LET_ANSI_C)
1012         switch (c)
1013           {
1014           case '"':
1015           case '\'':
1016           case '?':
1017           case '\\':
1018             mixed_string_buffer_append_char (bp, c);
1019             continue;
1020
1021           case 'a':
1022             mixed_string_buffer_append_char (bp, '\a');
1023             continue;
1024           case 'b':
1025             mixed_string_buffer_append_char (bp, '\b');
1026             continue;
1027
1028             /* The \e escape is preculiar to gcc, and assumes an ASCII
1029                character set (or superset).  We don't provide support for it
1030                here.  */
1031
1032           case 'f':
1033             mixed_string_buffer_append_char (bp, '\f');
1034             continue;
1035           case 'n':
1036             mixed_string_buffer_append_char (bp, '\n');
1037             continue;
1038           case 'r':
1039             mixed_string_buffer_append_char (bp, '\r');
1040             continue;
1041           case 't':
1042             mixed_string_buffer_append_char (bp, '\t');
1043             continue;
1044           case 'v':
1045             mixed_string_buffer_append_char (bp, '\v');
1046             continue;
1047
1048           case 'x':
1049             c = *p++;
1050             if (c == '\0')
1051               break;
1052             switch (c)
1053               {
1054               default:
1055                 mixed_string_buffer_append_char (bp, '\\');
1056                 mixed_string_buffer_append_char (bp, 'x');
1057                 mixed_string_buffer_append_char (bp, c);
1058                 break;
1059
1060               case '0': case '1': case '2': case '3': case '4':
1061               case '5': case '6': case '7': case '8': case '9':
1062               case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1063               case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1064                 {
1065                   int n;
1066
1067                   for (n = 0; ; c = *p++)
1068                     {
1069                       switch (c)
1070                         {
1071                         default:
1072                           break;
1073
1074                         case '0': case '1': case '2': case '3': case '4':
1075                         case '5': case '6': case '7': case '8': case '9':
1076                           n = n * 16 + c - '0';
1077                           continue;
1078
1079                         case 'A': case 'B': case 'C': case 'D': case 'E':
1080                         case 'F':
1081                           n = n * 16 + 10 + c - 'A';
1082                           continue;
1083
1084                         case 'a': case 'b': case 'c': case 'd': case 'e':
1085                         case 'f':
1086                           n = n * 16 + 10 + c - 'a';
1087                           continue;
1088                         }
1089                       break;
1090                     }
1091
1092                   mixed_string_buffer_append_char (bp, n);
1093                   --p;
1094                 }
1095                 break;
1096               }
1097             continue;
1098
1099           case '0': case '1': case '2': case '3':
1100           case '4': case '5': case '6': case '7':
1101             {
1102               int n, j;
1103
1104               for (n = 0, j = 0; j < 3; ++j)
1105                 {
1106                   n = n * 8 + c - '0';
1107                   c = *p++;
1108                   switch (c)
1109                     {
1110                     default:
1111                       break;
1112
1113                     case '0': case '1': case '2': case '3':
1114                     case '4': case '5': case '6': case '7':
1115                       continue;
1116                     }
1117                   break;
1118                 }
1119
1120               mixed_string_buffer_append_char (bp, n);
1121               --p;
1122             }
1123             continue;
1124           }
1125
1126       if (type & LET_UNICODE)
1127         switch (c)
1128           {
1129           case 'U': case 'u':
1130             {
1131               unsigned char buf[8];
1132               int prefix = c;
1133               int length = prefix == 'u' ? 4 : 8;
1134               int n, j;
1135
1136               for (n = 0, j = 0; j < length; j++)
1137                 {
1138                   c = *p++;
1139
1140                   if (c >= '0' && c <= '9')
1141                     n = (n << 4) + (c - '0');
1142                   else if (c >= 'A' && c <= 'F')
1143                     n = (n << 4) + (c - 'A' + 10);
1144                   else if (c >= 'a' && c <= 'f')
1145                     n = (n << 4) + (c - 'a' + 10);
1146                   else
1147                     break;
1148
1149                   buf[j] = c;
1150                 }
1151
1152               if (j == length)
1153                 {
1154                   if (n < 0x110000)
1155                     mixed_string_buffer_append_unicode (bp, n);
1156                   else
1157                     {
1158                       error_with_progname = false;
1159                       error_at_line (0, 0,
1160                                      pos->file_name, pos->line_number,
1161                                      _("\
1162 warning: invalid Unicode character"));
1163                       error_with_progname = true;
1164                     }
1165                 }
1166               else
1167                 {
1168                   int i;
1169
1170                   mixed_string_buffer_append_char (bp, '\\');
1171                   mixed_string_buffer_append_char (bp, prefix);
1172
1173                   for (i = 0; i < j; i++)
1174                     mixed_string_buffer_append_char (bp, buf[i]);
1175
1176                   --p;
1177                 }
1178             }
1179             continue;
1180           }
1181
1182       if (c == '\0')
1183         break;
1184
1185       mixed_string_buffer_append_char (bp, c);
1186     }
1187
1188   return mixed_string_buffer_done (bp);
1189 }
1190
1191 struct literalstring_parser literalstring_c =
1192   {
1193     literalstring_parse
1194   };
1195
1196
1197 /* 5. Parse each resulting logical line as preprocessing tokens and
1198    white space.  Preprocessing tokens and C tokens don't always match.  */
1199
1200 static token_ty phase5_pushback[1];
1201 static int phase5_pushback_length;
1202
1203
1204 static void
1205 phase5_get (token_ty *tp)
1206 {
1207   static char *buffer;
1208   static int bufmax;
1209   int bufpos;
1210   int c;
1211   int last_was_backslash;
1212   bool raw_expected = false;
1213   int delimiter_left_end;
1214   int delimiter_right_start;
1215   int last_rparen;
1216
1217   if (phase5_pushback_length)
1218     {
1219       *tp = phase5_pushback[--phase5_pushback_length];
1220       return;
1221     }
1222   tp->string = NULL;
1223   tp->number = 0;
1224   tp->line_number = line_number;
1225   c = phase4_getc ();
1226   switch (c)
1227     {
1228     case EOF:
1229       tp->type = token_type_eof;
1230       return;
1231
1232     case '\n':
1233       tp->type = token_type_eoln;
1234       return;
1235
1236     case ' ':
1237     case '\f':
1238     case '\t':
1239       for (;;)
1240         {
1241           c = phase4_getc ();
1242           switch (c)
1243             {
1244             case ' ':
1245             case '\f':
1246             case '\t':
1247               continue;
1248
1249             default:
1250               phase4_ungetc (c);
1251               break;
1252             }
1253           break;
1254         }
1255       tp->type = token_type_white_space;
1256       return;
1257
1258     case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G':
1259     case 'H': case 'I': case 'J': case 'K': case 'L': case 'M': case 'N':
1260     case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U':
1261     case 'V': case 'W': case 'X': case 'Y': case 'Z':
1262     case '_':
1263     case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g':
1264     case 'h': case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
1265     case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u':
1266     case 'v': case 'w': case 'x': case 'y': case 'z':
1267       bufpos = 0;
1268       for (;;)
1269         {
1270           if (bufpos >= bufmax)
1271             {
1272               bufmax = 2 * bufmax + 10;
1273               buffer = xrealloc (buffer, bufmax);
1274             }
1275           buffer[bufpos++] = c;
1276           c = phase4_getc ();
1277           switch (c)
1278             {
1279             case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1280             case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1281             case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1282             case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1283             case 'Y': case 'Z':
1284             case '_':
1285             case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1286             case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1287             case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1288             case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1289             case 'y': case 'z':
1290             case '0': case '1': case '2': case '3': case '4':
1291             case '5': case '6': case '7': case '8': case '9':
1292               continue;
1293
1294             default:
1295               /* Recognize string literals prefixed by R, u8, u8R, u,
1296                  uR, U, UR, L, or LR.  It is defined in the C standard
1297                  ISO/IEC 9899:201x and the C++ standard ISO/IEC
1298                  14882:2011.  The raw string literals prefixed by R,
1299                  u8R, uR, UR, or LR are only valid in C++.
1300
1301                  Since gettext's argument is a byte sequence, we are
1302                  only interested in u8, R, and u8R.  */
1303               if (c == '"')
1304                 {
1305                   bool is_prefix = false;
1306
1307                   switch (buffer[0])
1308                     {
1309                     case 'R':
1310                       if (cxx_extensions && bufpos == 1)
1311                         {
1312                           is_prefix = true;
1313                           raw_expected = true;
1314                         }
1315                       break;
1316                     case 'u':
1317                       if (bufpos == 1)
1318                         is_prefix = true;
1319                       else
1320                         switch (buffer[1])
1321                           {
1322                           case 'R':
1323                             if (cxx_extensions && bufpos == 2)
1324                               {
1325                                 is_prefix = true;
1326                                 raw_expected = true;
1327                               }
1328                             break;
1329                           case '8':
1330                             if (bufpos == 2)
1331                               is_prefix = true;
1332                             else if (cxx_extensions
1333                                      && bufpos == 3 && buffer[2] == 'R')
1334                               {
1335                                 is_prefix = true;
1336                                 raw_expected = true;
1337                               }
1338                             break;
1339                           }
1340                       break;
1341                     case 'U':
1342                     case 'L':
1343                       if (bufpos == 1)
1344                         is_prefix = true;
1345                       else if (cxx_extensions
1346                                && bufpos == 2 && buffer[1] == 'R')
1347                         {
1348                           is_prefix = true;
1349                           raw_expected = true;
1350                         }
1351                       break;
1352                     }
1353
1354                   if (is_prefix)
1355                     goto string;
1356                 }
1357               phase4_ungetc (c);
1358               break;
1359             }
1360           break;
1361         }
1362       if (bufpos >= bufmax)
1363         {
1364           bufmax = 2 * bufmax + 10;
1365           buffer = xrealloc (buffer, bufmax);
1366         }
1367       buffer[bufpos] = 0;
1368       tp->string = xstrdup (buffer);
1369       tp->type = token_type_name;
1370       return;
1371
1372     case '.':
1373       c = phase4_getc ();
1374       phase4_ungetc (c);
1375       switch (c)
1376         {
1377         default:
1378           tp->type = token_type_symbol;
1379           return;
1380
1381         case '0': case '1': case '2': case '3': case '4':
1382         case '5': case '6': case '7': case '8': case '9':
1383           c = '.';
1384           break;
1385         }
1386       /* FALLTHROUGH */
1387
1388     case '0': case '1': case '2': case '3': case '4':
1389     case '5': case '6': case '7': case '8': case '9':
1390       /* The preprocessing number token is more "generous" than the C
1391          number tokens.  This is mostly due to token pasting (another
1392          thing we can ignore here).  */
1393       bufpos = 0;
1394       for (;;)
1395         {
1396           if (bufpos >= bufmax)
1397             {
1398               bufmax = 2 * bufmax + 10;
1399               buffer = xrealloc (buffer, bufmax);
1400             }
1401           buffer[bufpos++] = c;
1402           c = phase4_getc ();
1403           switch (c)
1404             {
1405             case 'e':
1406             case 'E':
1407               if (bufpos >= bufmax)
1408                 {
1409                   bufmax = 2 * bufmax + 10;
1410                   buffer = xrealloc (buffer, bufmax);
1411                 }
1412               buffer[bufpos++] = c;
1413               c = phase4_getc ();
1414               if (c != '+' && c != '-')
1415                 {
1416                   phase4_ungetc (c);
1417                   break;
1418                 }
1419               continue;
1420
1421             case 'A': case 'B': case 'C': case 'D':           case 'F':
1422             case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1423             case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1424             case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1425             case 'Y': case 'Z':
1426             case 'a': case 'b': case 'c': case 'd':           case 'f':
1427             case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1428             case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1429             case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1430             case 'y': case 'z':
1431             case '0': case '1': case '2': case '3': case '4':
1432             case '5': case '6': case '7': case '8': case '9':
1433             case '.':
1434               continue;
1435
1436             default:
1437               phase4_ungetc (c);
1438               break;
1439             }
1440           break;
1441         }
1442       if (bufpos >= bufmax)
1443         {
1444           bufmax = 2 * bufmax + 10;
1445           buffer = xrealloc (buffer, bufmax);
1446         }
1447       buffer[bufpos] = 0;
1448       tp->type = token_type_number;
1449       tp->number = atol (buffer);
1450       return;
1451
1452     case '\'':
1453       /* We could worry about the 'L' before wide character constants,
1454          but ignoring it has no effect unless one of the keywords is
1455          "L".  Just pretend it won't happen.  Also, we don't need to
1456          remember the character constant.  */
1457       last_was_backslash = false;
1458       for (;;)
1459         {
1460           c = phase3_getc ();
1461           if (last_was_backslash)
1462             {
1463               last_was_backslash = false;
1464               continue;
1465             }
1466           switch (c)
1467             {
1468             case '\\':
1469               last_was_backslash = true;
1470               /* FALLTHROUGH */
1471             default:
1472               continue;
1473             case '\n':
1474               error_with_progname = false;
1475               error (0, 0, _("%s:%d: warning: unterminated character constant"),
1476                      logical_file_name, line_number - 1);
1477               error_with_progname = true;
1478               phase3_ungetc ('\n');
1479               break;
1480             case EOF: case '\'':
1481               break;
1482             }
1483           break;
1484         }
1485       tp->type = token_type_character_constant;
1486       return;
1487
1488     case '"':
1489       {
1490       string:
1491         /* We could worry about the 'L' before wide string constants,
1492            but since gettext's argument is not a wide character string,
1493            let the compiler complain about the argument not matching the
1494            prototype.  Just pretend it won't happen.  */
1495         last_was_backslash = false;
1496         delimiter_left_end = -1;
1497         delimiter_right_start = -1;
1498         last_rparen = -1;
1499         bufpos = 0;
1500         for (;;)
1501           {
1502             c = phase3_getc ();
1503             if (last_was_backslash && !raw_expected)
1504               {
1505                 last_was_backslash = false;
1506                 if (bufpos >= bufmax)
1507                   {
1508                     bufmax = 2 * bufmax + 10;
1509                     buffer = xrealloc (buffer, bufmax);
1510                   }
1511                 buffer[bufpos++] = c;
1512                 continue;
1513               }
1514             switch (c)
1515               {
1516               case '\\':
1517                 last_was_backslash = true;
1518                 /* FALLTHROUGH */
1519               default:
1520                 if (raw_expected)
1521                   {
1522                     if (c == '(' && delimiter_left_end < 0)
1523                       delimiter_left_end = bufpos;
1524                     else if (c == ')' && delimiter_left_end >= 0)
1525                       last_rparen = bufpos;
1526                   }
1527                 else if (c == '\n')
1528                   {
1529                     error_with_progname = false;
1530                     error (0, 0,
1531                            _("%s:%d: warning: unterminated string literal"),
1532                            logical_file_name, line_number - 1);
1533                     error_with_progname = true;
1534                     phase3_ungetc ('\n');
1535                     break;
1536                   }
1537                 if (bufpos >= bufmax)
1538                   {
1539                     bufmax = 2 * bufmax + 10;
1540                     buffer = xrealloc (buffer, bufmax);
1541                   }
1542                 buffer[bufpos++] = c;
1543                 continue;
1544
1545               case '"':
1546                 if (raw_expected && delimiter_left_end >= 0)
1547                   {
1548                     if (last_rparen < 0
1549                         || delimiter_left_end != bufpos - (last_rparen + 1)
1550                         || strncmp (buffer, buffer + last_rparen + 1,
1551                                     delimiter_left_end) != 0)
1552                       {
1553                         if (bufpos >= bufmax)
1554                           {
1555                             bufmax = 2 * bufmax + 10;
1556                             buffer = xrealloc (buffer, bufmax);
1557                           }
1558                         buffer[bufpos++] = c;
1559                         continue;
1560                       }
1561                     delimiter_right_start = last_rparen;
1562                   }
1563                 break;
1564
1565               case EOF:
1566                 break;
1567               }
1568             break;
1569           }
1570         if (bufpos >= bufmax)
1571           {
1572             bufmax = 2 * bufmax + 10;
1573             buffer = xrealloc (buffer, bufmax);
1574           }
1575         buffer[bufpos] = 0;
1576
1577         if (raw_expected)
1578           {
1579             if (delimiter_left_end < 0 || delimiter_right_start < 0)
1580               {
1581                 error_with_progname = false;
1582                 error (0, 0, _("%s:%d: warning: unterminated string literal"),
1583                        logical_file_name, line_number - 1);
1584                 error_with_progname = true;
1585               }
1586             else
1587               {
1588                 buffer[delimiter_right_start] = '\0';
1589                 tp->type = token_type_string_literal;
1590                 tp->string = xstrdup (&buffer[delimiter_left_end + 1]);
1591                 tp->escape = LET_NONE;
1592                 tp->comment = add_reference (savable_comment);
1593                 return;
1594               }
1595           }
1596         tp->type = token_type_string_literal;
1597         tp->string = xstrdup (buffer);
1598         tp->escape = LET_ANSI_C | LET_UNICODE;
1599         tp->comment = add_reference (savable_comment);
1600         return;
1601       }
1602
1603     case '(':
1604       tp->type = token_type_lparen;
1605       return;
1606
1607     case ')':
1608       tp->type = token_type_rparen;
1609       return;
1610
1611     case ',':
1612       tp->type = token_type_comma;
1613       return;
1614
1615     case '#':
1616       tp->type = token_type_hash;
1617       return;
1618
1619     case ':':
1620       tp->type = token_type_colon;
1621       return;
1622
1623     case '@':
1624       if (objc_extensions)
1625         {
1626           tp->type = token_type_objc_special;
1627           tp->comment = add_reference (savable_comment);
1628           return;
1629         }
1630       /* FALLTHROUGH */
1631
1632     default:
1633       /* We could carefully recognize each of the 2 and 3 character
1634          operators, but it is not necessary, as we only need to recognize
1635          gettext invocations.  Don't bother.  */
1636       tp->type = token_type_symbol;
1637       return;
1638     }
1639 }
1640
1641
1642 /* Supports only one pushback token.  */
1643 static void
1644 phase5_unget (token_ty *tp)
1645 {
1646   if (tp->type != token_type_eof)
1647     {
1648       if (phase5_pushback_length == SIZEOF (phase5_pushback))
1649         abort ();
1650       phase5_pushback[phase5_pushback_length++] = *tp;
1651     }
1652 }
1653
1654
1655 /* X. Recognize a leading # symbol.  Leave leading hash as a hash, but
1656    turn hash in the middle of a line into a plain symbol token.  This
1657    makes the phase 6 easier.  */
1658
1659 static void
1660 phaseX_get (token_ty *tp)
1661 {
1662   static bool middle;   /* false at the beginning of a line, true otherwise.  */
1663
1664   phase5_get (tp);
1665
1666   if (tp->type == token_type_eoln || tp->type == token_type_eof)
1667     middle = false;
1668   else
1669     {
1670       if (middle)
1671         {
1672           /* Turn hash in the middle of a line into a plain symbol token.  */
1673           if (tp->type == token_type_hash)
1674             tp->type = token_type_symbol;
1675         }
1676       else
1677         {
1678           /* When we see leading whitespace followed by a hash sign,
1679              discard the leading white space token.  The hash is all
1680              phase 6 is interested in.  */
1681           if (tp->type == token_type_white_space)
1682             {
1683               token_ty next;
1684
1685               phase5_get (&next);
1686               if (next.type == token_type_hash)
1687                 *tp = next;
1688               else
1689                 phase5_unget (&next);
1690             }
1691           middle = true;
1692         }
1693     }
1694 }
1695
1696
1697 /* 6. Recognize and carry out directives (it also expands macros on
1698    non-directive lines, which we do not do here).  The only directive
1699    we care about are the #line and #define directive.  We throw all the
1700    others away.  */
1701
1702 static token_ty phase6_pushback[2];
1703 static int phase6_pushback_length;
1704
1705
1706 static void
1707 phase6_get (token_ty *tp)
1708 {
1709   static token_ty *buf;
1710   static int bufmax;
1711   int bufpos;
1712   int j;
1713
1714   if (phase6_pushback_length)
1715     {
1716       *tp = phase6_pushback[--phase6_pushback_length];
1717       return;
1718     }
1719   for (;;)
1720     {
1721       /* Get the next token.  If it is not a '#' at the beginning of a
1722          line (ignoring whitespace), return immediately.  */
1723       phaseX_get (tp);
1724       if (tp->type != token_type_hash)
1725         return;
1726
1727       /* Accumulate the rest of the directive in a buffer, until the
1728          "define" keyword is seen or until end of line.  */
1729       bufpos = 0;
1730       for (;;)
1731         {
1732           phaseX_get (tp);
1733           if (tp->type == token_type_eoln || tp->type == token_type_eof)
1734             break;
1735
1736           /* Before the "define" keyword and inside other directives
1737              white space is irrelevant.  So just throw it away.  */
1738           if (tp->type != token_type_white_space)
1739             {
1740               /* If it is a #define directive, return immediately,
1741                  thus treating the body of the #define directive like
1742                  normal input.  */
1743               if (bufpos == 0
1744                   && tp->type == token_type_name
1745                   && strcmp (tp->string, "define") == 0)
1746                 return;
1747
1748               /* Accumulate.  */
1749               if (bufpos >= bufmax)
1750                 {
1751                   bufmax = 2 * bufmax + 10;
1752                   buf = xrealloc (buf, bufmax * sizeof (buf[0]));
1753                 }
1754               buf[bufpos++] = *tp;
1755             }
1756         }
1757
1758       /* If it is a #line directive, with no macros to expand, act on
1759          it.  Ignore all other directives.  */
1760       if (bufpos >= 3 && buf[0].type == token_type_name
1761           && strcmp (buf[0].string, "line") == 0
1762           && buf[1].type == token_type_number
1763           && buf[2].type == token_type_string_literal)
1764         {
1765           logical_file_name = xstrdup (buf[2].string);
1766           line_number = buf[1].number;
1767         }
1768       if (bufpos >= 2 && buf[0].type == token_type_number
1769           && buf[1].type == token_type_string_literal)
1770         {
1771           logical_file_name = xstrdup (buf[1].string);
1772           line_number = buf[0].number;
1773         }
1774
1775       /* Release the storage held by the directive.  */
1776       for (j = 0; j < bufpos; ++j)
1777         free_token (&buf[j]);
1778
1779       /* We must reset the selected comments.  */
1780       savable_comment_reset ();
1781     }
1782 }
1783
1784
1785 /* Supports 2 tokens of pushback.  */
1786 static void
1787 phase6_unget (token_ty *tp)
1788 {
1789   if (tp->type != token_type_eof)
1790     {
1791       if (phase6_pushback_length == SIZEOF (phase6_pushback))
1792         abort ();
1793       phase6_pushback[phase6_pushback_length++] = *tp;
1794     }
1795 }
1796
1797
1798 /* 8a. Convert ISO C 99 section 7.8.1 format string directives to string
1799    literal placeholders.  */
1800
1801 /* Test for an ISO C 99 section 7.8.1 format string directive.  */
1802 static bool
1803 is_inttypes_macro (const char *name)
1804 {
1805   /* Syntax:
1806      P R I { d | i | o | u | x | X }
1807      { { | LEAST | FAST } { 8 | 16 | 32 | 64 } | MAX | PTR }  */
1808   if (name[0] == 'P' && name[1] == 'R' && name[2] == 'I')
1809     {
1810       name += 3;
1811       if (name[0] == 'd' || name[0] == 'i' || name[0] == 'o' || name[0] == 'u'
1812           || name[0] == 'x' || name[0] == 'X')
1813         {
1814           name += 1;
1815           if (name[0] == 'M' && name[1] == 'A' && name[2] == 'X'
1816               && name[3] == '\0')
1817             return true;
1818           if (name[0] == 'P' && name[1] == 'T' && name[2] == 'R'
1819               && name[3] == '\0')
1820             return true;
1821           if (name[0] == 'L' && name[1] == 'E' && name[2] == 'A'
1822               && name[3] == 'S' && name[4] == 'T')
1823             name += 5;
1824           else if (name[0] == 'F' && name[1] == 'A' && name[2] == 'S'
1825                    && name[3] == 'T')
1826             name += 4;
1827           if (name[0] == '8' && name[1] == '\0')
1828             return true;
1829           if (name[0] == '1' && name[1] == '6' && name[2] == '\0')
1830             return true;
1831           if (name[0] == '3' && name[1] == '2' && name[2] == '\0')
1832             return true;
1833           if (name[0] == '6' && name[1] == '4' && name[2] == '\0')
1834             return true;
1835         }
1836     }
1837   return false;
1838 }
1839
1840 static void
1841 phase8a_get (token_ty *tp)
1842 {
1843   phase6_get (tp);
1844   if (tp->type == token_type_name && is_inttypes_macro (tp->string))
1845     {
1846       /* Turn PRIdXXX into "<PRIdXXX>".  */
1847       char *new_string = xasprintf ("<%s>", tp->string);
1848       free (tp->string);
1849       tp->string = new_string;
1850       tp->comment = add_reference (savable_comment);
1851       tp->type = token_type_string_literal;
1852       tp->escape = LET_ANSI_C | LET_UNICODE;
1853     }
1854 }
1855
1856 /* Supports 2 tokens of pushback.  */
1857 static inline void
1858 phase8a_unget (token_ty *tp)
1859 {
1860   phase6_unget (tp);
1861 }
1862
1863
1864 /* 8b. Drop whitespace.  */
1865 static void
1866 phase8b_get (token_ty *tp)
1867 {
1868   for (;;)
1869     {
1870       phase8a_get (tp);
1871
1872       if (tp->type == token_type_white_space)
1873         continue;
1874       if (tp->type == token_type_eoln)
1875         {
1876           /* We have to track the last occurrence of a string.  One
1877              mode of xgettext allows to group an extracted message
1878              with a comment for documentation.  The rule which states
1879              which comment is assumed to be grouped with the message
1880              says it should immediately precede it.  Our
1881              interpretation: between the last line of the comment and
1882              the line in which the keyword is found must be no line
1883              with non-white space tokens.  */
1884           ++newline_count;
1885           if (last_non_comment_line > last_comment_line)
1886             savable_comment_reset ();
1887           continue;
1888         }
1889       break;
1890     }
1891 }
1892
1893 /* Supports 2 tokens of pushback.  */
1894 static inline void
1895 phase8b_unget (token_ty *tp)
1896 {
1897   phase8a_unget (tp);
1898 }
1899
1900
1901 /* 8c. In ObjectiveC mode, drop '@' before a literal string.  We need to
1902    do this before performing concatenation of adjacent string literals.  */
1903 static void
1904 phase8c_get (token_ty *tp)
1905 {
1906   token_ty tmp;
1907
1908   phase8b_get (tp);
1909   if (tp->type != token_type_objc_special)
1910     return;
1911   phase8b_get (&tmp);
1912   if (tmp.type != token_type_string_literal)
1913     {
1914       phase8b_unget (&tmp);
1915       return;
1916     }
1917   /* Drop the '@' token and return immediately the following string.  */
1918   drop_reference (tmp.comment);
1919   tmp.comment = tp->comment;
1920   *tp = tmp;
1921 }
1922
1923 /* Supports only one pushback token.  */
1924 static inline void
1925 phase8c_unget (token_ty *tp)
1926 {
1927   phase8b_unget (tp);
1928 }
1929
1930
1931 /* 8. Concatenate adjacent string literals to form single string
1932    literals (because we don't expand macros, there are a few things we
1933    will miss).
1934
1935    FIXME: handle the case when the string literals have different
1936    tp->escape setting.  */
1937
1938 static void
1939 phase8_get (token_ty *tp)
1940 {
1941   phase8c_get (tp);
1942   if (tp->type != token_type_string_literal)
1943     return;
1944   for (;;)
1945     {
1946       token_ty tmp;
1947       size_t len;
1948
1949       phase8c_get (&tmp);
1950       if (tmp.type != token_type_string_literal)
1951         {
1952           phase8c_unget (&tmp);
1953           return;
1954         }
1955       len = strlen (tp->string);
1956       tp->string = xrealloc (tp->string, len + strlen (tmp.string) + 1);
1957       strcpy (tp->string + len, tmp.string);
1958       free_token (&tmp);
1959     }
1960 }
1961
1962
1963 /* ===================== Reading of high-level tokens.  ==================== */
1964
1965
1966 enum xgettext_token_type_ty
1967 {
1968   xgettext_token_type_eof,
1969   xgettext_token_type_keyword,
1970   xgettext_token_type_symbol,
1971   xgettext_token_type_lparen,
1972   xgettext_token_type_rparen,
1973   xgettext_token_type_comma,
1974   xgettext_token_type_colon,
1975   xgettext_token_type_string_literal,
1976   xgettext_token_type_other
1977 };
1978 typedef enum xgettext_token_type_ty xgettext_token_type_ty;
1979
1980 typedef struct xgettext_token_ty xgettext_token_ty;
1981 struct xgettext_token_ty
1982 {
1983   xgettext_token_type_ty type;
1984
1985   /* This field is used only for xgettext_token_type_keyword.  */
1986   const struct callshapes *shapes;
1987
1988   /* This field is used only for xgettext_token_type_string_literal,
1989      xgettext_token_type_keyword, xgettext_token_type_symbol.  */
1990   char *string;
1991
1992   /* This field is used only for xgettext_token_type_string_literal.  */
1993   enum literalstring_escape_type escape;
1994
1995   /* This field is used only for xgettext_token_type_string_literal.  */
1996   refcounted_string_list_ty *comment;
1997
1998   /* These fields are only for
1999        xgettext_token_type_keyword,
2000        xgettext_token_type_string_literal.  */
2001   lex_pos_ty pos;
2002 };
2003
2004
2005 /* 9. Convert the remaining preprocessing tokens to C tokens and
2006    discards any white space from the translation unit.  */
2007
2008 static void
2009 x_c_lex (xgettext_token_ty *tp)
2010 {
2011   for (;;)
2012     {
2013       token_ty token;
2014       void *keyword_value;
2015
2016       phase8_get (&token);
2017       switch (token.type)
2018         {
2019         case token_type_eof:
2020           tp->type = xgettext_token_type_eof;
2021           return;
2022
2023         case token_type_name:
2024           last_non_comment_line = newline_count;
2025
2026           if (hash_find_entry (objc_extensions ? &objc_keywords : &c_keywords,
2027                                token.string, strlen (token.string),
2028                                &keyword_value)
2029               == 0)
2030             {
2031               tp->type = xgettext_token_type_keyword;
2032               tp->shapes = (const struct callshapes *) keyword_value;
2033               tp->pos.file_name = logical_file_name;
2034               tp->pos.line_number = token.line_number;
2035             }
2036           else
2037             tp->type = xgettext_token_type_symbol;
2038           tp->string = token.string;
2039           return;
2040
2041         case token_type_lparen:
2042           last_non_comment_line = newline_count;
2043
2044           tp->type = xgettext_token_type_lparen;
2045           return;
2046
2047         case token_type_rparen:
2048           last_non_comment_line = newline_count;
2049
2050           tp->type = xgettext_token_type_rparen;
2051           return;
2052
2053         case token_type_comma:
2054           last_non_comment_line = newline_count;
2055
2056           tp->type = xgettext_token_type_comma;
2057           return;
2058
2059         case token_type_colon:
2060           last_non_comment_line = newline_count;
2061
2062           tp->type = xgettext_token_type_colon;
2063           return;
2064
2065         case token_type_string_literal:
2066           last_non_comment_line = newline_count;
2067
2068           tp->type = xgettext_token_type_string_literal;
2069           tp->string = token.string;
2070           tp->escape = token.escape;
2071           tp->comment = token.comment;
2072           tp->pos.file_name = logical_file_name;
2073           tp->pos.line_number = token.line_number;
2074           return;
2075
2076         case token_type_objc_special:
2077           drop_reference (token.comment);
2078           /* FALLTHROUGH */
2079
2080         default:
2081           last_non_comment_line = newline_count;
2082
2083           tp->type = xgettext_token_type_other;
2084           return;
2085         }
2086     }
2087 }
2088
2089
2090 /* ========================= Extracting strings.  ========================== */
2091
2092
2093 /* Context lookup table.  */
2094 static flag_context_list_table_ty *flag_context_list_table;
2095
2096
2097 /* The file is broken into tokens.  Scan the token stream, looking for
2098    a keyword, followed by a left paren, followed by a string.  When we
2099    see this sequence, we have something to remember.  We assume we are
2100    looking at a valid C or C++ program, and leave the complaints about
2101    the grammar to the compiler.
2102
2103      Normal handling: Look for
2104        keyword ( ... msgid ... )
2105      Plural handling: Look for
2106        keyword ( ... msgid ... msgid_plural ... )
2107
2108    We use recursion because the arguments before msgid or between msgid
2109    and msgid_plural can contain subexpressions of the same form.  */
2110
2111
2112 /* Extract messages until the next balanced closing parenthesis.
2113    Extracted messages are added to MLP.
2114    Return true upon eof, false upon closing parenthesis.  */
2115 static bool
2116 extract_parenthesized (message_list_ty *mlp,
2117                        flag_context_ty outer_context,
2118                        flag_context_list_iterator_ty context_iter,
2119                        struct arglist_parser *argparser)
2120 {
2121   /* Current argument number.  */
2122   int arg = 1;
2123   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
2124   int state;
2125   /* Parameters of the keyword just seen.  Defined only in state 1.  */
2126   const struct callshapes *next_shapes = NULL;
2127   /* Context iterator that will be used if the next token is a '('.  */
2128   flag_context_list_iterator_ty next_context_iter =
2129     passthrough_context_list_iterator;
2130   /* Context iterator that will be used if the next token is a ':'.
2131      (Objective C selector syntax.)  */
2132   flag_context_list_iterator_ty selectorcall_context_iter =
2133     passthrough_context_list_iterator;
2134   /* Current context.  */
2135   flag_context_ty inner_context =
2136     inherited_context (outer_context,
2137                        flag_context_list_iterator_advance (&context_iter));
2138
2139   /* Start state is 0.  */
2140   state = 0;
2141
2142   for (;;)
2143     {
2144       xgettext_token_ty token;
2145
2146       x_c_lex (&token);
2147       switch (token.type)
2148         {
2149         case xgettext_token_type_keyword:
2150           next_shapes = token.shapes;
2151           state = 1;
2152           goto keyword_or_symbol;
2153
2154         case xgettext_token_type_symbol:
2155           state = 0;
2156         keyword_or_symbol:
2157           next_context_iter =
2158             flag_context_list_iterator (
2159               flag_context_list_table_lookup (
2160                 flag_context_list_table,
2161                 token.string, strlen (token.string)));
2162           if (objc_extensions)
2163             {
2164               size_t token_string_len = strlen (token.string);
2165               token.string = xrealloc (token.string, token_string_len + 2);
2166               token.string[token_string_len] = ':';
2167               token.string[token_string_len + 1] = '\0';
2168               selectorcall_context_iter =
2169                 flag_context_list_iterator (
2170                   flag_context_list_table_lookup (
2171                     flag_context_list_table,
2172                     token.string, token_string_len + 1));
2173             }
2174           free (token.string);
2175           continue;
2176
2177         case xgettext_token_type_lparen:
2178           if (extract_parenthesized (mlp, inner_context, next_context_iter,
2179                                      arglist_parser_alloc (mlp,
2180                                                            state ? next_shapes : NULL)))
2181             {
2182               arglist_parser_done (argparser, arg);
2183               return true;
2184             }
2185           next_context_iter = null_context_list_iterator;
2186           selectorcall_context_iter = null_context_list_iterator;
2187           state = 0;
2188           continue;
2189
2190         case xgettext_token_type_rparen:
2191           arglist_parser_done (argparser, arg);
2192           return false;
2193
2194         case xgettext_token_type_comma:
2195           arg++;
2196           inner_context =
2197             inherited_context (outer_context,
2198                                flag_context_list_iterator_advance (
2199                                  &context_iter));
2200           next_context_iter = passthrough_context_list_iterator;
2201           selectorcall_context_iter = passthrough_context_list_iterator;
2202           state = 0;
2203           continue;
2204
2205         case xgettext_token_type_colon:
2206           if (objc_extensions)
2207             {
2208               context_iter = selectorcall_context_iter;
2209               inner_context =
2210                 inherited_context (inner_context,
2211                                    flag_context_list_iterator_advance (
2212                                      &context_iter));
2213               next_context_iter = passthrough_context_list_iterator;
2214               selectorcall_context_iter = passthrough_context_list_iterator;
2215             }
2216           else
2217             {
2218               next_context_iter = null_context_list_iterator;
2219               selectorcall_context_iter = null_context_list_iterator;
2220             }
2221           state = 0;
2222           continue;
2223
2224         case xgettext_token_type_string_literal:
2225           if (extract_all)
2226             {
2227               char *string;
2228               refcounted_string_list_ty *comment;
2229               const char *encoding;
2230
2231               string = literalstring_parse (token.string, &token.pos,
2232                                             token.escape);
2233               free (token.string);
2234               token.string = string;
2235
2236               if (token.comment != NULL)
2237                 {
2238                   comment = savable_comment_convert_encoding (token.comment,
2239                                                               &token.pos);
2240                   drop_reference (token.comment);
2241                   token.comment = comment;
2242                 }
2243
2244               /* token.string and token.comment are already converted
2245                  to UTF-8.  Prevent further conversion in
2246                  remember_a_message.  */
2247               encoding = xgettext_current_source_encoding;
2248               xgettext_current_source_encoding = po_charset_utf8;
2249               remember_a_message (mlp, NULL, token.string, inner_context,
2250                                   &token.pos, NULL, token.comment);
2251               xgettext_current_source_encoding = encoding;
2252             }
2253           else
2254             arglist_parser_remember_literal (argparser, arg, token.string,
2255                                              inner_context,
2256                                              token.pos.file_name,
2257                                              token.pos.line_number,
2258                                              token.comment,
2259                                              token.escape);
2260           drop_reference (token.comment);
2261           next_context_iter = null_context_list_iterator;
2262           selectorcall_context_iter = null_context_list_iterator;
2263           state = 0;
2264           continue;
2265
2266         case xgettext_token_type_other:
2267           next_context_iter = null_context_list_iterator;
2268           selectorcall_context_iter = null_context_list_iterator;
2269           state = 0;
2270           continue;
2271
2272         case xgettext_token_type_eof:
2273           arglist_parser_done (argparser, arg);
2274           return true;
2275
2276         default:
2277           abort ();
2278         }
2279     }
2280 }
2281
2282
2283 static void
2284 extract_whole_file (FILE *f,
2285                     const char *real_filename, const char *logical_filename,
2286                     flag_context_list_table_ty *flag_table,
2287                     msgdomain_list_ty *mdlp)
2288 {
2289   message_list_ty *mlp = mdlp->item[0]->messages;
2290
2291   fp = f;
2292   real_file_name = real_filename;
2293   logical_file_name = xstrdup (logical_filename);
2294   line_number = 1;
2295
2296   newline_count = 0;
2297   last_comment_line = -1;
2298   last_non_comment_line = -1;
2299
2300   flag_context_list_table = flag_table;
2301
2302   init_keywords ();
2303
2304   /* Eat tokens until eof is seen.  When extract_parenthesized returns
2305      due to an unbalanced closing parenthesis, just restart it.  */
2306   while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
2307                                  arglist_parser_alloc (mlp, NULL)))
2308     ;
2309
2310   /* Close scanner.  */
2311   fp = NULL;
2312   real_file_name = NULL;
2313   logical_file_name = NULL;
2314   line_number = 0;
2315 }
2316
2317
2318 void
2319 extract_c (FILE *f,
2320            const char *real_filename, const char *logical_filename,
2321            flag_context_list_table_ty *flag_table,
2322            msgdomain_list_ty *mdlp)
2323 {
2324   objc_extensions = false;
2325   cxx_extensions = false;
2326   extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
2327 }
2328
2329 void
2330 extract_cxx (FILE *f,
2331              const char *real_filename, const char *logical_filename,
2332              flag_context_list_table_ty *flag_table,
2333              msgdomain_list_ty *mdlp)
2334 {
2335   objc_extensions = false;
2336   cxx_extensions = true;
2337   extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
2338 }
2339
2340 void
2341 extract_objc (FILE *f,
2342               const char *real_filename, const char *logical_filename,
2343               flag_context_list_table_ty *flag_table,
2344               msgdomain_list_ty *mdlp)
2345 {
2346   objc_extensions = true;
2347   cxx_extensions = false;
2348   extract_whole_file (f, real_filename, logical_filename, flag_table, mdlp);
2349 }