gettext-tools/src/x-awk.c

   1 /* xgettext awk backend.
   2    Copyright (C) 2002-2003, 2005-2009 Free Software Foundation, Inc.
   3
   4    This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation; either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  18
  19 #ifdef HAVE_CONFIG_H
  20 # include "config.h"
  21 #endif
  22
  23 /* Specification.  */
  24 #include "x-awk.h"
  25
  26 #include <errno.h>
  27 #include <stdbool.h>
  28 #include <stdio.h>
  29 #include <stdlib.h>
  30 #include <string.h>
  31
  32 #include "message.h"
  33 #include "xgettext.h"
  34 #include "error.h"
  35 #include "error-progname.h"
  36 #include "xalloc.h"
  37 #include "gettext.h"
  38
  39 #define _(s) gettext(s)
  40
  41
  42 /* The awk syntax is defined in the gawk manual page and documentation.
  43    See also gawk/awkgram.y.  */
  44
  45
  46 /* ====================== Keyword set customization.  ====================== */
  47
  48 /* If true extract all strings.  */
  49 static bool extract_all = false;
  50
  51 static hash_table keywords;
  52 static bool default_keywords = true;
  53
  54
  55 void
  56 x_awk_extract_all ()
  57 {
  58   extract_all = true;
  59 }
  60
  61
  62 void
  63 x_awk_keyword (const char *name)
  64 {
  65   if (name == NULL)
  66     default_keywords = false;
  67   else
  68     {
  69       const char *end;
  70       struct callshape shape;
  71       const char *colon;
  72
  73       if (keywords.table == NULL)
  74         hash_init (&keywords, 100);
  75
  76       split_keywordspec (name, &end, &shape);
  77
  78       /* The characters between name and end should form a valid C identifier.
  79          A colon means an invalid parse in split_keywordspec().  */
  80       colon = strchr (name, ':');
  81       if (colon == NULL || colon >= end)
  82         insert_keyword_callshape (&keywords, name, end - name, &shape);
  83     }
  84 }
  85
  86 /* Finish initializing the keywords hash table.
  87    Called after argument processing, before each file is processed.  */
  88 static void
  89 init_keywords ()
  90 {
  91   if (default_keywords)
  92     {
  93       /* When adding new keywords here, also update the documentation in
  94          xgettext.texi!  */
  95       x_awk_keyword ("dcgettext");
  96       x_awk_keyword ("dcngettext:1,2");
  97       default_keywords = false;
  98     }
  99 }
 100
 101 void
 102 init_flag_table_awk ()
 103 {
 104   xgettext_record_flag ("dcgettext:1:pass-awk-format");
 105   xgettext_record_flag ("dcngettext:1:pass-awk-format");
 106   xgettext_record_flag ("dcngettext:2:pass-awk-format");
 107   xgettext_record_flag ("printf:1:awk-format");
 108 }
 109
 110
 111 /* ======================== Reading of characters.  ======================== */
 112
 113 /* Real filename, used in error messages about the input file.  */
 114 static const char *real_file_name;
 115
 116 /* Logical filename and line number, used to label the extracted messages.  */
 117 static char *logical_file_name;
 118 static int line_number;
 119
 120 /* The input file stream.  */
 121 static FILE *fp;
 122
 123 /* These are for tracking whether comments count as immediately before
 124    keyword.  */
 125 static int last_comment_line;
 126 static int last_non_comment_line;
 127
 128
 129 /* 1. line_number handling.  */
 130
 131 static int
 132 phase1_getc ()
 133 {
 134   int c = getc (fp);
 135
 136   if (c == EOF)
 137     {
 138       if (ferror (fp))
 139         error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
 140                real_file_name);
 141       return EOF;
 142     }
 143
 144   if (c == '\n')
 145     line_number++;
 146
 147   return c;
 148 }
 149
 150 /* Supports only one pushback character.  */
 151 static void
 152 phase1_ungetc (int c)
 153 {
 154   if (c != EOF)
 155     {
 156       if (c == '\n')
 157         --line_number;
 158
 159       ungetc (c, fp);
 160     }
 161 }
 162
 163
 164 /* 2. Replace each comment that is not inside a string literal or regular
 165    expression with a newline character.  We need to remember the comment
 166    for later, because it may be attached to a keyword string.  */
 167
 168 static int
 169 phase2_getc ()
 170 {
 171   static char *buffer;
 172   static size_t bufmax;
 173   size_t buflen;
 174   int lineno;
 175   int c;
 176
 177   c = phase1_getc ();
 178   if (c == '#')
 179     {
 180       buflen = 0;
 181       lineno = line_number;
 182       for (;;)
 183         {
 184           c = phase1_getc ();
 185           if (c == '\n' || c == EOF)
 186             break;
 187           /* We skip all leading white space, but not EOLs.  */
 188           if (!(buflen == 0 && (c == ' ' || c == '\t')))
 189             {
 190               if (buflen >= bufmax)
 191                 {
 192                   bufmax = 2 * bufmax + 10;
 193                   buffer = xrealloc (buffer, bufmax);
 194                 }
 195               buffer[buflen++] = c;
 196             }
 197         }
 198       if (buflen >= bufmax)
 199         {
 200           bufmax = 2 * bufmax + 10;
 201           buffer = xrealloc (buffer, bufmax);
 202         }
 203       buffer[buflen] = '\0';
 204       savable_comment_add (buffer);
 205       last_comment_line = lineno;
 206     }
 207   return c;
 208 }
 209
 210 /* Supports only one pushback character.  */
 211 static void
 212 phase2_ungetc (int c)
 213 {
 214   if (c != EOF)
 215     phase1_ungetc (c);
 216 }
 217
 218
 219 /* ========================== Reading of tokens.  ========================== */
 220
 221
 222 enum token_type_ty
 223 {
 224   token_type_eof,
 225   token_type_lparen,            /* ( */
 226   token_type_rparen,            /* ) */
 227   token_type_comma,             /* , */
 228   token_type_string,            /* "abc" */
 229   token_type_i18nstring,        /* _"abc" */
 230   token_type_symbol,            /* symbol, number */
 231   token_type_semicolon,         /* ; */
 232   token_type_other              /* regexp, misc. operator */
 233 };
 234 typedef enum token_type_ty token_type_ty;
 235
 236 typedef struct token_ty token_ty;
 237 struct token_ty
 238 {
 239   token_type_ty type;
 240   char *string;         /* for token_type_{symbol,string,i18nstring} */
 241   int line_number;
 242 };
 243
 244
 245 /* 7. Replace escape sequences within character strings with their
 246    single character equivalents.  */
 247
 248 #define P7_QUOTES (1000 + '"')
 249
 250 static int
 251 phase7_getc ()
 252 {
 253   int c;
 254
 255   for (;;)
 256     {
 257       /* Use phase 1, because phase 2 elides comments.  */
 258       c = phase1_getc ();
 259
 260       if (c == EOF || c == '\n')
 261         break;
 262       if (c == '"')
 263         return P7_QUOTES;
 264       if (c != '\\')
 265         return c;
 266       c = phase1_getc ();
 267       if (c == EOF)
 268         break;
 269       if (c != '\n')
 270         switch (c)
 271           {
 272           case 'a':
 273             return '\a';
 274           case 'b':
 275             return '\b';
 276           case 'f':
 277             return '\f';
 278           case 'n':
 279             return '\n';
 280           case 'r':
 281             return '\r';
 282           case 't':
 283             return '\t';
 284           case 'v':
 285             return '\v';
 286           case '0': case '1': case '2': case '3': case '4':
 287           case '5': case '6': case '7':
 288             {
 289               int n = c - '0';
 290
 291               c = phase1_getc ();
 292               if (c != EOF)
 293                 {
 294                   if (c >= '0' && c <= '7')
 295                     {
 296                       n = (n << 3) + (c - '0');
 297                       c = phase1_getc ();
 298                       if (c != EOF)
 299                         {
 300                           if (c >= '0' && c <= '7')
 301                             n = (n << 3) + (c - '0');
 302                           else
 303                             phase1_ungetc (c);
 304                         }
 305                     }
 306                   else
 307                     phase1_ungetc (c);
 308                 }
 309               return (unsigned char) n;
 310             }
 311           case 'x':
 312             {
 313               int n = 0;
 314
 315               for (;;)
 316                 {
 317                   c = phase1_getc ();
 318                   if (c == EOF)
 319                     break;
 320                   else if (c >= '0' && c <= '9')
 321                     n = (n << 4) + (c - '0');
 322                   else if (c >= 'A' && c <= 'F')
 323                     n = (n << 4) + (c - 'A' + 10);
 324                   else if (c >= 'a' && c <= 'f')
 325                     n = (n << 4) + (c - 'a' + 10);
 326                   else
 327                     {
 328                       phase1_ungetc (c);
 329                       break;
 330                     }
 331                 }
 332               return (unsigned char) n;
 333             }
 334           default:
 335             return c;
 336           }
 337     }
 338
 339   phase1_ungetc (c);
 340   error_with_progname = false;
 341   error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
 342          line_number);
 343   error_with_progname = true;
 344   return P7_QUOTES;
 345 }
 346
 347
 348 /* Free the memory pointed to by a 'struct token_ty'.  */
 349 static inline void
 350 free_token (token_ty *tp)
 351 {
 352   switch (tp->type)
 353     {
 354     case token_type_string:
 355     case token_type_i18nstring:
 356     case token_type_symbol:
 357       free (tp->string);
 358       break;
 359     default:
 360       break;
 361     }
 362 }
 363
 364
 365 /* Combine characters into tokens.  Discard whitespace.  */
 366
 367 /* There is an ambiguity about '/': It can start a division operator ('/' or
 368    '/=') or it can start a regular expression.  The distinction is important
 369    because inside regular expressions, '#' and '"' lose its special meanings.
 370    If you look at the awk grammar, you see that the operator is only allowed
 371    right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
 372    can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
 373    So we prefer the division operator interpretation only right after
 374    symbol, string, number, ')', ']', with whitespace but no newline allowed
 375    in between.  */
 376 static bool prefer_division_over_regexp;
 377
 378 static void
 379 x_awk_lex (token_ty *tp)
 380 {
 381   static char *buffer;
 382   static int bufmax;
 383   int bufpos;
 384   int c;
 385
 386   for (;;)
 387     {
 388       tp->line_number = line_number;
 389       c = phase2_getc ();
 390
 391       switch (c)
 392         {
 393         case EOF:
 394           tp->type = token_type_eof;
 395           return;
 396
 397         case '\n':
 398           if (last_non_comment_line > last_comment_line)
 399             savable_comment_reset ();
 400           /* Newline is not allowed inside expressions.  It usually
 401              introduces a fresh statement.
 402              FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
 403              does *not* introduce a fresh statement.  */
 404           prefer_division_over_regexp = false;
 405           /* FALLTHROUGH */
 406         case '\t':
 407         case ' ':
 408           /* Ignore whitespace and comments.  */
 409           continue;
 410
 411         case '\\':
 412           /* Backslash ought to be immediately followed by a newline.  */
 413           continue;
 414         }
 415
 416       last_non_comment_line = tp->line_number;
 417
 418       switch (c)
 419         {
 420         case '.':
 421           {
 422             int c2 = phase2_getc ();
 423             phase2_ungetc (c2);
 424             if (!(c2 >= '0' && c2 <= '9'))
 425               {
 426
 427                 tp->type = token_type_other;
 428                 prefer_division_over_regexp = false;
 429                 return;
 430               }
 431           }
 432           /* FALLTHROUGH */
 433         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 434         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 435         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 436         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 437         case 'Y': case 'Z':
 438         case '_':
 439         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 440         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 441         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 442         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 443         case 'y': case 'z':
 444         case '0': case '1': case '2': case '3': case '4':
 445         case '5': case '6': case '7': case '8': case '9':
 446           /* Symbol, or part of a number.  */
 447           bufpos = 0;
 448           for (;;)
 449             {
 450               if (bufpos >= bufmax)
 451                 {
 452                   bufmax = 2 * bufmax + 10;
 453                   buffer = xrealloc (buffer, bufmax);
 454                 }
 455               buffer[bufpos++] = c;
 456               c = phase2_getc ();
 457               switch (c)
 458                 {
 459                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 460                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 461                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 462                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 463                 case 'Y': case 'Z':
 464                 case '_':
 465                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 466                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 467                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 468                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 469                 case 'y': case 'z':
 470                 case '0': case '1': case '2': case '3': case '4':
 471                 case '5': case '6': case '7': case '8': case '9':
 472                   continue;
 473                 default:
 474                   if (bufpos == 1 && buffer[0] == '_' && c == '"')
 475                     {
 476                       tp->type = token_type_i18nstring;
 477                       goto case_string;
 478                     }
 479                   phase2_ungetc (c);
 480                   break;
 481                 }
 482               break;
 483             }
 484           if (bufpos >= bufmax)
 485             {
 486               bufmax = 2 * bufmax + 10;
 487               buffer = xrealloc (buffer, bufmax);
 488             }
 489           buffer[bufpos] = '\0';
 490           tp->string = xstrdup (buffer);
 491           tp->type = token_type_symbol;
 492           /* Most identifiers can be variable names; after them we must
 493              interpret '/' as division operator.  But for awk's builtin
 494              keywords we have three cases:
 495              (a) Must interpret '/' as division operator. "length".
 496              (b) Must interpret '/' as start of a regular expression.
 497                  "do", "exit", "print", "printf", "return".
 498              (c) '/' after this keyword in invalid anyway. All others.
 499              I used the following script for the distinction.
 500                 for k in $awk_keywords; do
 501                   echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
 502                 done
 503            */
 504           if (strcmp (buffer, "do") == 0
 505               || strcmp (buffer, "exit") == 0
 506               || strcmp (buffer, "print") == 0
 507               || strcmp (buffer, "printf") == 0
 508               || strcmp (buffer, "return") == 0)
 509             prefer_division_over_regexp = false;
 510           else
 511             prefer_division_over_regexp = true;
 512           return;
 513
 514         case '"':
 515           tp->type = token_type_string;
 516         case_string:
 517           bufpos = 0;
 518           for (;;)
 519             {
 520               c = phase7_getc ();
 521               if (c == EOF || c == P7_QUOTES)
 522                 break;
 523               if (bufpos >= bufmax)
 524                 {
 525                   bufmax = 2 * bufmax + 10;
 526                   buffer = xrealloc (buffer, bufmax);
 527                 }
 528               buffer[bufpos++] = c;
 529             }
 530           if (bufpos >= bufmax)
 531             {
 532               bufmax = 2 * bufmax + 10;
 533               buffer = xrealloc (buffer, bufmax);
 534             }
 535           buffer[bufpos] = '\0';
 536           tp->string = xstrdup (buffer);
 537           prefer_division_over_regexp = true;
 538           return;
 539
 540         case '(':
 541           tp->type = token_type_lparen;
 542           prefer_division_over_regexp = false;
 543           return;
 544
 545         case ')':
 546           tp->type = token_type_rparen;
 547           prefer_division_over_regexp = true;
 548           return;
 549
 550         case ',':
 551           tp->type = token_type_comma;
 552           prefer_division_over_regexp = false;
 553           return;
 554
 555         case ';':
 556           tp->type = token_type_semicolon;
 557           prefer_division_over_regexp = false;
 558           return;
 559
 560         case ']':
 561           tp->type = token_type_other;
 562           prefer_division_over_regexp = true;
 563           return;
 564
 565         case '/':
 566           if (!prefer_division_over_regexp)
 567             {
 568               /* Regular expression.
 569                  Counting brackets is non-trivial. [[] is balanced, and so is
 570                  [\]]. Also, /[/]/ is balanced and ends at the third slash.
 571                  Do not count [ or ] if either one is preceded by a \.
 572                  A '[' should be counted if
 573                   a) it is the first one so far (brackets == 0), or
 574                   b) it is the '[' in '[:'.
 575                  A ']' should be counted if not preceded by a \.
 576                  According to POSIX, []] is how you put a ] into a set.
 577                  Try to handle that too.
 578                */
 579               int brackets = 0;
 580               bool pos0 = true;         /* true at start of regexp */
 581               bool pos1_open = false;   /* true after [ at start of regexp */
 582               bool pos2_open_not = false; /* true after [^ at start of regexp */
 583
 584               for (;;)
 585                 {
 586                   c = phase1_getc ();
 587
 588                   if (c == EOF || c == '\n')
 589                     {
 590                       phase1_ungetc (c);
 591                       error_with_progname = false;
 592                       error (0, 0, _("%s:%d: warning: unterminated regular expression"),
 593                              logical_file_name, line_number);
 594                       error_with_progname = true;
 595                       break;
 596                     }
 597                   else if (c == '[')
 598                     {
 599                       if (brackets == 0)
 600                         brackets++;
 601                       else
 602                         {
 603                           c = phase1_getc ();
 604                           if (c == ':')
 605                             brackets++;
 606                           phase1_ungetc (c);
 607                         }
 608                       if (pos0)
 609                         {
 610                           pos0 = false;
 611                           pos1_open = true;
 612                           continue;
 613                         }
 614                     }
 615                   else if (c == ']')
 616                     {
 617                       if (!(pos1_open || pos2_open_not))
 618                         brackets--;
 619                     }
 620                   else if (c == '^')
 621                     {
 622                       if (pos1_open)
 623                         {
 624                           pos1_open = false;
 625                           pos2_open_not = true;
 626                           continue;
 627                         }
 628                     }
 629                   else if (c == '\\')
 630                     {
 631                       c = phase1_getc ();
 632                       /* Backslash-newline is valid and ignored.  */
 633                     }
 634                   else if (c == '/')
 635                     {
 636                       if (brackets <= 0)
 637                         break;
 638                     }
 639
 640                   pos0 = false;
 641                   pos1_open = false;
 642                   pos2_open_not = false;
 643                 }
 644
 645               tp->type = token_type_other;
 646               prefer_division_over_regexp = false;
 647               return;
 648             }
 649           /* FALLTHROUGH */
 650
 651         default:
 652           /* We could carefully recognize each of the 2 and 3 character
 653              operators, but it is not necessary, as we only need to recognize
 654              gettext invocations.  Don't bother.  */
 655           tp->type = token_type_other;
 656           prefer_division_over_regexp = false;
 657           return;
 658         }
 659     }
 660 }
 661
 662
 663 /* ========================= Extracting strings.  ========================== */
 664
 665
 666 /* Context lookup table.  */
 667 static flag_context_list_table_ty *flag_context_list_table;
 668
 669
 670 /* The file is broken into tokens.  Scan the token stream, looking for
 671    a keyword, followed by a left paren, followed by a string.  When we
 672    see this sequence, we have something to remember.  We assume we are
 673    looking at a valid C or C++ program, and leave the complaints about
 674    the grammar to the compiler.
 675
 676      Normal handling: Look for
 677        keyword ( ... msgid ... )
 678      Plural handling: Look for
 679        keyword ( ... msgid ... msgid_plural ... )
 680
 681    We use recursion because the arguments before msgid or between msgid
 682    and msgid_plural can contain subexpressions of the same form.  */
 683
 684
 685 /* Extract messages until the next balanced closing parenthesis.
 686    Extracted messages are added to MLP.
 687    Return true upon eof, false upon closing parenthesis.  */
 688 static bool
 689 extract_parenthesized (message_list_ty *mlp,
 690                        flag_context_ty outer_context,
 691                        flag_context_list_iterator_ty context_iter,
 692                        struct arglist_parser *argparser)
 693 {
 694   /* Current argument number.  */
 695   int arg = 1;
 696   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
 697   int state;
 698   /* Parameters of the keyword just seen.  Defined only in state 1.  */
 699   const struct callshapes *next_shapes = NULL;
 700   /* Whether to implicitly assume the next tokens are arguments even without
 701      a '('.  */
 702   bool next_is_argument = false;
 703   /* Context iterator that will be used if the next token is a '('.  */
 704   flag_context_list_iterator_ty next_context_iter =
 705     passthrough_context_list_iterator;
 706   /* Current context.  */
 707   flag_context_ty inner_context =
 708     inherited_context (outer_context,
 709                        flag_context_list_iterator_advance (&context_iter));
 710
 711   /* Start state is 0.  */
 712   state = 0;
 713
 714   for (;;)
 715     {
 716       token_ty token;
 717
 718       x_awk_lex (&token);
 719
 720       if (next_is_argument && token.type != token_type_lparen)
 721         {
 722           /* An argument list starts, even though there is no '('.  */
 723           context_iter = next_context_iter;
 724           outer_context = inner_context;
 725           inner_context =
 726             inherited_context (outer_context,
 727                                flag_context_list_iterator_advance (
 728                                  &context_iter));
 729         }
 730
 731       switch (token.type)
 732         {
 733         case token_type_symbol:
 734           {
 735             void *keyword_value;
 736
 737             if (hash_find_entry (&keywords, token.string, strlen (token.string),
 738                                  &keyword_value)
 739                 == 0)
 740               {
 741                 next_shapes = (const struct callshapes *) keyword_value;
 742                 state = 1;
 743               }
 744             else
 745               state = 0;
 746           }
 747           next_is_argument =
 748             (strcmp (token.string, "print") == 0
 749              || strcmp (token.string, "printf") == 0);
 750           next_context_iter =
 751             flag_context_list_iterator (
 752               flag_context_list_table_lookup (
 753                 flag_context_list_table,
 754                 token.string, strlen (token.string)));
 755           free (token.string);
 756           continue;
 757
 758         case token_type_lparen:
 759           if (extract_parenthesized (mlp, inner_context, next_context_iter,
 760                                      arglist_parser_alloc (mlp,
 761                                                            state ? next_shapes : NULL)))
 762             {
 763               arglist_parser_done (argparser, arg);
 764               return true;
 765             }
 766           next_is_argument = false;
 767           next_context_iter = null_context_list_iterator;
 768           state = 0;
 769           continue;
 770
 771         case token_type_rparen:
 772           arglist_parser_done (argparser, arg);
 773           return false;
 774
 775         case token_type_comma:
 776           arg++;
 777           inner_context =
 778             inherited_context (outer_context,
 779                                flag_context_list_iterator_advance (
 780                                  &context_iter));
 781           next_is_argument = false;
 782           next_context_iter = passthrough_context_list_iterator;
 783           state = 0;
 784           continue;
 785
 786         case token_type_string:
 787           {
 788             lex_pos_ty pos;
 789             pos.file_name = logical_file_name;
 790             pos.line_number = token.line_number;
 791
 792             if (extract_all)
 793               remember_a_message (mlp, NULL, token.string, inner_context, &pos,
 794                                   NULL, savable_comment);
 795             else
 796               arglist_parser_remember (argparser, arg, token.string,
 797                                        inner_context,
 798                                        pos.file_name, pos.line_number,
 799                                        savable_comment);
 800           }
 801           next_is_argument = false;
 802           next_context_iter = null_context_list_iterator;
 803           state = 0;
 804           continue;
 805
 806         case token_type_i18nstring:
 807           {
 808             lex_pos_ty pos;
 809             pos.file_name = logical_file_name;
 810             pos.line_number = token.line_number;
 811
 812             remember_a_message (mlp, NULL, token.string, inner_context, &pos,
 813                                 NULL, savable_comment);
 814           }
 815           next_is_argument = false;
 816           next_context_iter = null_context_list_iterator;
 817           state = 0;
 818           continue;
 819
 820         case token_type_semicolon:
 821           /* An argument list ends, and a new statement begins.  */
 822           /* FIXME: Should handle newline that acts as statement separator
 823              in the same way.  */
 824           /* FIXME: Instead of resetting outer_context here, it may be better
 825              to recurse in the next_is_argument handling above, waiting for
 826              the next semicolon or other statement terminator.  */
 827           outer_context = null_context;
 828           context_iter = null_context_list_iterator;
 829           next_is_argument = false;
 830           next_context_iter = passthrough_context_list_iterator;
 831           inner_context =
 832             inherited_context (outer_context,
 833                                flag_context_list_iterator_advance (
 834                                  &context_iter));
 835           state = 0;
 836           continue;
 837
 838         case token_type_eof:
 839           arglist_parser_done (argparser, arg);
 840           return true;
 841
 842         case token_type_other:
 843           next_is_argument = false;
 844           next_context_iter = null_context_list_iterator;
 845           state = 0;
 846           continue;
 847
 848         default:
 849           abort ();
 850         }
 851     }
 852 }
 853
 854
 855 void
 856 extract_awk (FILE *f,
 857              const char *real_filename, const char *logical_filename,
 858              flag_context_list_table_ty *flag_table,
 859              msgdomain_list_ty *mdlp)
 860 {
 861   message_list_ty *mlp = mdlp->item[0]->messages;
 862
 863   fp = f;
 864   real_file_name = real_filename;
 865   logical_file_name = xstrdup (logical_filename);
 866   line_number = 1;
 867
 868   last_comment_line = -1;
 869   last_non_comment_line = -1;
 870
 871   prefer_division_over_regexp = false;
 872
 873   flag_context_list_table = flag_table;
 874
 875   init_keywords ();
 876
 877   /* Eat tokens until eof is seen.  When extract_parenthesized returns
 878      due to an unbalanced closing parenthesis, just restart it.  */
 879   while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
 880                                  arglist_parser_alloc (mlp, NULL)))
 881     ;
 882
 883   fp = NULL;
 884   real_file_name = NULL;
 885   logical_file_name = NULL;
 886   line_number = 0;
 887 }