gettext-tools/src/x-awk.c

   1 /* xgettext awk backend.
   2    Copyright (C) 2002-2003, 2005-2009, 2015 Free Software Foundation,
   3    Inc.
   4
   5    This file was written by Bruno Haible <haible@clisp.cons.org>, 2002.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 /* Specification.  */
  25 #include "x-awk.h"
  26
  27 #include <errno.h>
  28 #include <stdbool.h>
  29 #include <stdio.h>
  30 #include <stdlib.h>
  31 #include <string.h>
  32
  33 #include "message.h"
  34 #include "xgettext.h"
  35 #include "error.h"
  36 #include "error-progname.h"
  37 #include "xalloc.h"
  38 #include "gettext.h"
  39
  40 #define _(s) gettext(s)
  41
  42
  43 /* The awk syntax is defined in the gawk manual page and documentation.
  44    See also gawk/awkgram.y.  */
  45
  46
  47 /* ====================== Keyword set customization.  ====================== */
  48
  49 /* If true extract all strings.  */
  50 static bool extract_all = false;
  51
  52 static hash_table keywords;
  53 static bool default_keywords = true;
  54
  55
  56 void
  57 x_awk_extract_all ()
  58 {
  59   extract_all = true;
  60 }
  61
  62
  63 void
  64 x_awk_keyword (const char *name)
  65 {
  66   if (name == NULL)
  67     default_keywords = false;
  68   else
  69     {
  70       const char *end;
  71       struct callshape shape;
  72       const char *colon;
  73
  74       if (keywords.table == NULL)
  75         hash_init (&keywords, 100);
  76
  77       split_keywordspec (name, &end, &shape);
  78
  79       /* The characters between name and end should form a valid C identifier.
  80          A colon means an invalid parse in split_keywordspec().  */
  81       colon = strchr (name, ':');
  82       if (colon == NULL || colon >= end)
  83         insert_keyword_callshape (&keywords, name, end - name, &shape);
  84     }
  85 }
  86
  87 /* Finish initializing the keywords hash table.
  88    Called after argument processing, before each file is processed.  */
  89 static void
  90 init_keywords ()
  91 {
  92   if (default_keywords)
  93     {
  94       /* When adding new keywords here, also update the documentation in
  95          xgettext.texi!  */
  96       x_awk_keyword ("dcgettext");
  97       x_awk_keyword ("dcngettext:1,2");
  98       default_keywords = false;
  99     }
 100 }
 101
 102 void
 103 init_flag_table_awk ()
 104 {
 105   xgettext_record_flag ("dcgettext:1:pass-awk-format");
 106   xgettext_record_flag ("dcngettext:1:pass-awk-format");
 107   xgettext_record_flag ("dcngettext:2:pass-awk-format");
 108   xgettext_record_flag ("printf:1:awk-format");
 109 }
 110
 111
 112 /* ======================== Reading of characters.  ======================== */
 113
 114 /* Real filename, used in error messages about the input file.  */
 115 static const char *real_file_name;
 116
 117 /* Logical filename and line number, used to label the extracted messages.  */
 118 static char *logical_file_name;
 119 static int line_number;
 120
 121 /* The input file stream.  */
 122 static FILE *fp;
 123
 124 /* These are for tracking whether comments count as immediately before
 125    keyword.  */
 126 static int last_comment_line;
 127 static int last_non_comment_line;
 128
 129
 130 /* 1. line_number handling.  */
 131
 132 static int
 133 phase1_getc ()
 134 {
 135   int c = getc (fp);
 136
 137   if (c == EOF)
 138     {
 139       if (ferror (fp))
 140         error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
 141                real_file_name);
 142       return EOF;
 143     }
 144
 145   if (c == '\n')
 146     line_number++;
 147
 148   return c;
 149 }
 150
 151 /* Supports only one pushback character.  */
 152 static void
 153 phase1_ungetc (int c)
 154 {
 155   if (c != EOF)
 156     {
 157       if (c == '\n')
 158         --line_number;
 159
 160       ungetc (c, fp);
 161     }
 162 }
 163
 164
 165 /* 2. Replace each comment that is not inside a string literal or regular
 166    expression with a newline character.  We need to remember the comment
 167    for later, because it may be attached to a keyword string.  */
 168
 169 static int
 170 phase2_getc ()
 171 {
 172   static char *buffer;
 173   static size_t bufmax;
 174   size_t buflen;
 175   int lineno;
 176   int c;
 177
 178   c = phase1_getc ();
 179   if (c == '#')
 180     {
 181       buflen = 0;
 182       lineno = line_number;
 183       for (;;)
 184         {
 185           c = phase1_getc ();
 186           if (c == '\n' || c == EOF)
 187             break;
 188           /* We skip all leading white space, but not EOLs.  */
 189           if (!(buflen == 0 && (c == ' ' || c == '\t')))
 190             {
 191               if (buflen >= bufmax)
 192                 {
 193                   bufmax = 2 * bufmax + 10;
 194                   buffer = xrealloc (buffer, bufmax);
 195                 }
 196               buffer[buflen++] = c;
 197             }
 198         }
 199       if (buflen >= bufmax)
 200         {
 201           bufmax = 2 * bufmax + 10;
 202           buffer = xrealloc (buffer, bufmax);
 203         }
 204       buffer[buflen] = '\0';
 205       savable_comment_add (buffer);
 206       last_comment_line = lineno;
 207     }
 208   return c;
 209 }
 210
 211 /* Supports only one pushback character.  */
 212 static void
 213 phase2_ungetc (int c)
 214 {
 215   if (c != EOF)
 216     phase1_ungetc (c);
 217 }
 218
 219
 220 /* ========================== Reading of tokens.  ========================== */
 221
 222
 223 enum token_type_ty
 224 {
 225   token_type_eof,
 226   token_type_lparen,            /* ( */
 227   token_type_rparen,            /* ) */
 228   token_type_comma,             /* , */
 229   token_type_string,            /* "abc" */
 230   token_type_i18nstring,        /* _"abc" */
 231   token_type_symbol,            /* symbol, number */
 232   token_type_semicolon,         /* ; */
 233   token_type_other              /* regexp, misc. operator */
 234 };
 235 typedef enum token_type_ty token_type_ty;
 236
 237 typedef struct token_ty token_ty;
 238 struct token_ty
 239 {
 240   token_type_ty type;
 241   char *string;         /* for token_type_{symbol,string,i18nstring} */
 242   int line_number;
 243 };
 244
 245
 246 /* 7. Replace escape sequences within character strings with their
 247    single character equivalents.  */
 248
 249 #define P7_QUOTES (1000 + '"')
 250
 251 static int
 252 phase7_getc ()
 253 {
 254   int c;
 255
 256   for (;;)
 257     {
 258       /* Use phase 1, because phase 2 elides comments.  */
 259       c = phase1_getc ();
 260
 261       if (c == EOF || c == '\n')
 262         break;
 263       if (c == '"')
 264         return P7_QUOTES;
 265       if (c != '\\')
 266         return c;
 267       c = phase1_getc ();
 268       if (c == EOF)
 269         break;
 270       if (c != '\n')
 271         switch (c)
 272           {
 273           case 'a':
 274             return '\a';
 275           case 'b':
 276             return '\b';
 277           case 'f':
 278             return '\f';
 279           case 'n':
 280             return '\n';
 281           case 'r':
 282             return '\r';
 283           case 't':
 284             return '\t';
 285           case 'v':
 286             return '\v';
 287           case '0': case '1': case '2': case '3': case '4':
 288           case '5': case '6': case '7':
 289             {
 290               int n = c - '0';
 291
 292               c = phase1_getc ();
 293               if (c != EOF)
 294                 {
 295                   if (c >= '0' && c <= '7')
 296                     {
 297                       n = (n << 3) + (c - '0');
 298                       c = phase1_getc ();
 299                       if (c != EOF)
 300                         {
 301                           if (c >= '0' && c <= '7')
 302                             n = (n << 3) + (c - '0');
 303                           else
 304                             phase1_ungetc (c);
 305                         }
 306                     }
 307                   else
 308                     phase1_ungetc (c);
 309                 }
 310               return (unsigned char) n;
 311             }
 312           case 'x':
 313             {
 314               int n = 0;
 315
 316               for (;;)
 317                 {
 318                   c = phase1_getc ();
 319                   if (c == EOF)
 320                     break;
 321                   else if (c >= '0' && c <= '9')
 322                     n = (n << 4) + (c - '0');
 323                   else if (c >= 'A' && c <= 'F')
 324                     n = (n << 4) + (c - 'A' + 10);
 325                   else if (c >= 'a' && c <= 'f')
 326                     n = (n << 4) + (c - 'a' + 10);
 327                   else
 328                     {
 329                       phase1_ungetc (c);
 330                       break;
 331                     }
 332                 }
 333               return (unsigned char) n;
 334             }
 335           default:
 336             return c;
 337           }
 338     }
 339
 340   phase1_ungetc (c);
 341   error_with_progname = false;
 342   error (0, 0, _("%s:%d: warning: unterminated string"), logical_file_name,
 343          line_number);
 344   error_with_progname = true;
 345   return P7_QUOTES;
 346 }
 347
 348
 349 /* Free the memory pointed to by a 'struct token_ty'.  */
 350 static inline void
 351 free_token (token_ty *tp)
 352 {
 353   switch (tp->type)
 354     {
 355     case token_type_string:
 356     case token_type_i18nstring:
 357     case token_type_symbol:
 358       free (tp->string);
 359       break;
 360     default:
 361       break;
 362     }
 363 }
 364
 365
 366 /* Combine characters into tokens.  Discard whitespace.  */
 367
 368 /* There is an ambiguity about '/': It can start a division operator ('/' or
 369    '/=') or it can start a regular expression.  The distinction is important
 370    because inside regular expressions, '#' and '"' lose its special meanings.
 371    If you look at the awk grammar, you see that the operator is only allowed
 372    right after a 'variable' or 'simp_exp' nonterminal, and these nonterminals
 373    can only end in the NAME, LENGTH, YSTRING, YNUMBER, ')', ']' terminals.
 374    So we prefer the division operator interpretation only right after
 375    symbol, string, number, ')', ']', with whitespace but no newline allowed
 376    in between.  */
 377 static bool prefer_division_over_regexp;
 378
 379 static void
 380 x_awk_lex (token_ty *tp)
 381 {
 382   static char *buffer;
 383   static int bufmax;
 384   int bufpos;
 385   int c;
 386
 387   for (;;)
 388     {
 389       tp->line_number = line_number;
 390       c = phase2_getc ();
 391
 392       switch (c)
 393         {
 394         case EOF:
 395           tp->type = token_type_eof;
 396           return;
 397
 398         case '\n':
 399           if (last_non_comment_line > last_comment_line)
 400             savable_comment_reset ();
 401           /* Newline is not allowed inside expressions.  It usually
 402              introduces a fresh statement.
 403              FIXME: Newlines after any of ',' '{' '?' ':' '||' '&&' 'do' 'else'
 404              does *not* introduce a fresh statement.  */
 405           prefer_division_over_regexp = false;
 406           /* FALLTHROUGH */
 407         case '\t':
 408         case ' ':
 409           /* Ignore whitespace and comments.  */
 410           continue;
 411
 412         case '\\':
 413           /* Backslash ought to be immediately followed by a newline.  */
 414           continue;
 415         }
 416
 417       last_non_comment_line = tp->line_number;
 418
 419       switch (c)
 420         {
 421         case '.':
 422           {
 423             int c2 = phase2_getc ();
 424             phase2_ungetc (c2);
 425             if (!(c2 >= '0' && c2 <= '9'))
 426               {
 427
 428                 tp->type = token_type_other;
 429                 prefer_division_over_regexp = false;
 430                 return;
 431               }
 432           }
 433           /* FALLTHROUGH */
 434         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 435         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 436         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 437         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 438         case 'Y': case 'Z':
 439         case '_':
 440         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 441         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 442         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 443         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 444         case 'y': case 'z':
 445         case '0': case '1': case '2': case '3': case '4':
 446         case '5': case '6': case '7': case '8': case '9':
 447           /* Symbol, or part of a number.  */
 448           bufpos = 0;
 449           for (;;)
 450             {
 451               if (bufpos >= bufmax)
 452                 {
 453                   bufmax = 2 * bufmax + 10;
 454                   buffer = xrealloc (buffer, bufmax);
 455                 }
 456               buffer[bufpos++] = c;
 457               c = phase2_getc ();
 458               switch (c)
 459                 {
 460                 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
 461                 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
 462                 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
 463                 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
 464                 case 'Y': case 'Z':
 465                 case '_':
 466                 case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
 467                 case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
 468                 case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
 469                 case 's': case 't': case 'u': case 'v': case 'w': case 'x':
 470                 case 'y': case 'z':
 471                 case '0': case '1': case '2': case '3': case '4':
 472                 case '5': case '6': case '7': case '8': case '9':
 473                   continue;
 474                 default:
 475                   if (bufpos == 1 && buffer[0] == '_' && c == '"')
 476                     {
 477                       tp->type = token_type_i18nstring;
 478                       goto case_string;
 479                     }
 480                   phase2_ungetc (c);
 481                   break;
 482                 }
 483               break;
 484             }
 485           if (bufpos >= bufmax)
 486             {
 487               bufmax = 2 * bufmax + 10;
 488               buffer = xrealloc (buffer, bufmax);
 489             }
 490           buffer[bufpos] = '\0';
 491           tp->string = xstrdup (buffer);
 492           tp->type = token_type_symbol;
 493           /* Most identifiers can be variable names; after them we must
 494              interpret '/' as division operator.  But for awk's builtin
 495              keywords we have three cases:
 496              (a) Must interpret '/' as division operator. "length".
 497              (b) Must interpret '/' as start of a regular expression.
 498                  "do", "exit", "print", "printf", "return".
 499              (c) '/' after this keyword in invalid anyway. All others.
 500              I used the following script for the distinction.
 501                 for k in $awk_keywords; do
 502                   echo; echo $k; awk "function foo () { $k / 10 }" < /dev/null
 503                 done
 504            */
 505           if (strcmp (buffer, "do") == 0
 506               || strcmp (buffer, "exit") == 0
 507               || strcmp (buffer, "print") == 0
 508               || strcmp (buffer, "printf") == 0
 509               || strcmp (buffer, "return") == 0)
 510             prefer_division_over_regexp = false;
 511           else
 512             prefer_division_over_regexp = true;
 513           return;
 514
 515         case '"':
 516           tp->type = token_type_string;
 517         case_string:
 518           bufpos = 0;
 519           for (;;)
 520             {
 521               c = phase7_getc ();
 522               if (c == EOF || c == P7_QUOTES)
 523                 break;
 524               if (bufpos >= bufmax)
 525                 {
 526                   bufmax = 2 * bufmax + 10;
 527                   buffer = xrealloc (buffer, bufmax);
 528                 }
 529               buffer[bufpos++] = c;
 530             }
 531           if (bufpos >= bufmax)
 532             {
 533               bufmax = 2 * bufmax + 10;
 534               buffer = xrealloc (buffer, bufmax);
 535             }
 536           buffer[bufpos] = '\0';
 537           tp->string = xstrdup (buffer);
 538           prefer_division_over_regexp = true;
 539           return;
 540
 541         case '(':
 542           tp->type = token_type_lparen;
 543           prefer_division_over_regexp = false;
 544           return;
 545
 546         case ')':
 547           tp->type = token_type_rparen;
 548           prefer_division_over_regexp = true;
 549           return;
 550
 551         case ',':
 552           tp->type = token_type_comma;
 553           prefer_division_over_regexp = false;
 554           return;
 555
 556         case ';':
 557           tp->type = token_type_semicolon;
 558           prefer_division_over_regexp = false;
 559           return;
 560
 561         case ']':
 562           tp->type = token_type_other;
 563           prefer_division_over_regexp = true;
 564           return;
 565
 566         case '/':
 567           if (!prefer_division_over_regexp)
 568             {
 569               /* Regular expression.
 570                  Counting brackets is non-trivial. [[] is balanced, and so is
 571                  [\]]. Also, /[/]/ is balanced and ends at the third slash.
 572                  Do not count [ or ] if either one is preceded by a \.
 573                  A '[' should be counted if
 574                   a) it is the first one so far (brackets == 0), or
 575                   b) it is the '[' in '[:'.
 576                  A ']' should be counted if not preceded by a \.
 577                  According to POSIX, []] is how you put a ] into a set.
 578                  Try to handle that too.
 579                */
 580               int brackets = 0;
 581               bool pos0 = true;         /* true at start of regexp */
 582               bool pos1_open = false;   /* true after [ at start of regexp */
 583               bool pos2_open_not = false; /* true after [^ at start of regexp */
 584
 585               for (;;)
 586                 {
 587                   c = phase1_getc ();
 588
 589                   if (c == EOF || c == '\n')
 590                     {
 591                       phase1_ungetc (c);
 592                       error_with_progname = false;
 593                       error (0, 0, _("%s:%d: warning: unterminated regular expression"),
 594                              logical_file_name, line_number);
 595                       error_with_progname = true;
 596                       break;
 597                     }
 598                   else if (c == '[')
 599                     {
 600                       if (brackets == 0)
 601                         brackets++;
 602                       else
 603                         {
 604                           c = phase1_getc ();
 605                           if (c == ':')
 606                             brackets++;
 607                           phase1_ungetc (c);
 608                         }
 609                       if (pos0)
 610                         {
 611                           pos0 = false;
 612                           pos1_open = true;
 613                           continue;
 614                         }
 615                     }
 616                   else if (c == ']')
 617                     {
 618                       if (!(pos1_open || pos2_open_not))
 619                         brackets--;
 620                     }
 621                   else if (c == '^')
 622                     {
 623                       if (pos1_open)
 624                         {
 625                           pos1_open = false;
 626                           pos2_open_not = true;
 627                           continue;
 628                         }
 629                     }
 630                   else if (c == '\\')
 631                     {
 632                       c = phase1_getc ();
 633                       /* Backslash-newline is valid and ignored.  */
 634                     }
 635                   else if (c == '/')
 636                     {
 637                       if (brackets <= 0)
 638                         break;
 639                     }
 640
 641                   pos0 = false;
 642                   pos1_open = false;
 643                   pos2_open_not = false;
 644                 }
 645
 646               tp->type = token_type_other;
 647               prefer_division_over_regexp = false;
 648               return;
 649             }
 650           /* FALLTHROUGH */
 651
 652         default:
 653           /* We could carefully recognize each of the 2 and 3 character
 654              operators, but it is not necessary, as we only need to recognize
 655              gettext invocations.  Don't bother.  */
 656           tp->type = token_type_other;
 657           prefer_division_over_regexp = false;
 658           return;
 659         }
 660     }
 661 }
 662
 663
 664 /* ========================= Extracting strings.  ========================== */
 665
 666
 667 /* Context lookup table.  */
 668 static flag_context_list_table_ty *flag_context_list_table;
 669
 670
 671 /* The file is broken into tokens.  Scan the token stream, looking for
 672    a keyword, followed by a left paren, followed by a string.  When we
 673    see this sequence, we have something to remember.  We assume we are
 674    looking at a valid C or C++ program, and leave the complaints about
 675    the grammar to the compiler.
 676
 677      Normal handling: Look for
 678        keyword ( ... msgid ... )
 679      Plural handling: Look for
 680        keyword ( ... msgid ... msgid_plural ... )
 681
 682    We use recursion because the arguments before msgid or between msgid
 683    and msgid_plural can contain subexpressions of the same form.  */
 684
 685
 686 /* Extract messages until the next balanced closing parenthesis.
 687    Extracted messages are added to MLP.
 688    Return true upon eof, false upon closing parenthesis.  */
 689 static bool
 690 extract_parenthesized (message_list_ty *mlp,
 691                        flag_context_ty outer_context,
 692                        flag_context_list_iterator_ty context_iter,
 693                        struct arglist_parser *argparser)
 694 {
 695   /* Current argument number.  */
 696   int arg = 1;
 697   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
 698   int state;
 699   /* Parameters of the keyword just seen.  Defined only in state 1.  */
 700   const struct callshapes *next_shapes = NULL;
 701   /* Whether to implicitly assume the next tokens are arguments even without
 702      a '('.  */
 703   bool next_is_argument = false;
 704   /* Context iterator that will be used if the next token is a '('.  */
 705   flag_context_list_iterator_ty next_context_iter =
 706     passthrough_context_list_iterator;
 707   /* Current context.  */
 708   flag_context_ty inner_context =
 709     inherited_context (outer_context,
 710                        flag_context_list_iterator_advance (&context_iter));
 711
 712   /* Start state is 0.  */
 713   state = 0;
 714
 715   for (;;)
 716     {
 717       token_ty token;
 718
 719       x_awk_lex (&token);
 720
 721       if (next_is_argument && token.type != token_type_lparen)
 722         {
 723           /* An argument list starts, even though there is no '('.  */
 724           context_iter = next_context_iter;
 725           outer_context = inner_context;
 726           inner_context =
 727             inherited_context (outer_context,
 728                                flag_context_list_iterator_advance (
 729                                  &context_iter));
 730         }
 731
 732       switch (token.type)
 733         {
 734         case token_type_symbol:
 735           {
 736             void *keyword_value;
 737
 738             if (hash_find_entry (&keywords, token.string, strlen (token.string),
 739                                  &keyword_value)
 740                 == 0)
 741               {
 742                 next_shapes = (const struct callshapes *) keyword_value;
 743                 state = 1;
 744               }
 745             else
 746               state = 0;
 747           }
 748           next_is_argument =
 749             (strcmp (token.string, "print") == 0
 750              || strcmp (token.string, "printf") == 0);
 751           next_context_iter =
 752             flag_context_list_iterator (
 753               flag_context_list_table_lookup (
 754                 flag_context_list_table,
 755                 token.string, strlen (token.string)));
 756           free (token.string);
 757           continue;
 758
 759         case token_type_lparen:
 760           if (extract_parenthesized (mlp, inner_context, next_context_iter,
 761                                      arglist_parser_alloc (mlp,
 762                                                            state ? next_shapes : NULL)))
 763             {
 764               arglist_parser_done (argparser, arg);
 765               return true;
 766             }
 767           next_is_argument = false;
 768           next_context_iter = null_context_list_iterator;
 769           state = 0;
 770           continue;
 771
 772         case token_type_rparen:
 773           arglist_parser_done (argparser, arg);
 774           return false;
 775
 776         case token_type_comma:
 777           arg++;
 778           inner_context =
 779             inherited_context (outer_context,
 780                                flag_context_list_iterator_advance (
 781                                  &context_iter));
 782           next_is_argument = false;
 783           next_context_iter = passthrough_context_list_iterator;
 784           state = 0;
 785           continue;
 786
 787         case token_type_string:
 788           {
 789             lex_pos_ty pos;
 790             pos.file_name = logical_file_name;
 791             pos.line_number = token.line_number;
 792
 793             if (extract_all)
 794               remember_a_message (mlp, NULL, token.string, inner_context, &pos,
 795                                   NULL, savable_comment);
 796             else
 797               arglist_parser_remember (argparser, arg, token.string,
 798                                        inner_context,
 799                                        pos.file_name, pos.line_number,
 800                                        savable_comment);
 801           }
 802           next_is_argument = false;
 803           next_context_iter = null_context_list_iterator;
 804           state = 0;
 805           continue;
 806
 807         case token_type_i18nstring:
 808           {
 809             lex_pos_ty pos;
 810             pos.file_name = logical_file_name;
 811             pos.line_number = token.line_number;
 812
 813             remember_a_message (mlp, NULL, token.string, inner_context, &pos,
 814                                 NULL, savable_comment);
 815           }
 816           next_is_argument = false;
 817           next_context_iter = null_context_list_iterator;
 818           state = 0;
 819           continue;
 820
 821         case token_type_semicolon:
 822           /* An argument list ends, and a new statement begins.  */
 823           /* FIXME: Should handle newline that acts as statement separator
 824              in the same way.  */
 825           /* FIXME: Instead of resetting outer_context here, it may be better
 826              to recurse in the next_is_argument handling above, waiting for
 827              the next semicolon or other statement terminator.  */
 828           outer_context = null_context;
 829           context_iter = null_context_list_iterator;
 830           next_is_argument = false;
 831           next_context_iter = passthrough_context_list_iterator;
 832           inner_context =
 833             inherited_context (outer_context,
 834                                flag_context_list_iterator_advance (
 835                                  &context_iter));
 836           state = 0;
 837           continue;
 838
 839         case token_type_eof:
 840           arglist_parser_done (argparser, arg);
 841           return true;
 842
 843         case token_type_other:
 844           next_is_argument = false;
 845           next_context_iter = null_context_list_iterator;
 846           state = 0;
 847           continue;
 848
 849         default:
 850           abort ();
 851         }
 852     }
 853 }
 854
 855
 856 void
 857 extract_awk (FILE *f,
 858              const char *real_filename, const char *logical_filename,
 859              flag_context_list_table_ty *flag_table,
 860              msgdomain_list_ty *mdlp)
 861 {
 862   message_list_ty *mlp = mdlp->item[0]->messages;
 863
 864   fp = f;
 865   real_file_name = real_filename;
 866   logical_file_name = xstrdup (logical_filename);
 867   line_number = 1;
 868
 869   last_comment_line = -1;
 870   last_non_comment_line = -1;
 871
 872   prefer_division_over_regexp = false;
 873
 874   flag_context_list_table = flag_table;
 875
 876   init_keywords ();
 877
 878   /* Eat tokens until eof is seen.  When extract_parenthesized returns
 879      due to an unbalanced closing parenthesis, just restart it.  */
 880   while (!extract_parenthesized (mlp, null_context, null_context_list_iterator,
 881                                  arglist_parser_alloc (mlp, NULL)))
 882     ;
 883
 884   fp = NULL;
 885   real_file_name = NULL;
 886   logical_file_name = NULL;
 887   line_number = 0;
 888 }