gettext-tools/src/x-javascript.c

   1 /* xgettext JavaScript backend.
   2    Copyright (C) 2002-2003, 2005-2009, 2013, 2015 Free Software
   3    Foundation, Inc.
   4
   5    This file was written by Andreas Stricker <andy@knitter.ch>, 2010
   6    It's based on x-python from Bruno Haible.
   7
   8    This program is free software: you can redistribute it and/or modify
   9    it under the terms of the GNU General Public License as published by
  10    the Free Software Foundation; either version 3 of the License, or
  11    (at your option) any later version.
  12
  13    This program is distributed in the hope that it will be useful,
  14    but WITHOUT ANY WARRANTY; without even the implied warranty of
  15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16    GNU General Public License for more details.
  17
  18    You should have received a copy of the GNU General Public License
  19    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21 #ifdef HAVE_CONFIG_H
  22 # include "config.h"
  23 #endif
  24
  25 /* Specification.  */
  26 #include "x-javascript.h"
  27
  28 #include <assert.h>
  29 #include <errno.h>
  30 #include <stdbool.h>
  31 #include <stdio.h>
  32 #include <stdlib.h>
  33 #include <string.h>
  34
  35 #include "message.h"
  36 #include "xgettext.h"
  37 #include "error.h"
  38 #include "error-progname.h"
  39 #include "progname.h"
  40 #include "basename.h"
  41 #include "xerror.h"
  42 #include "xvasprintf.h"
  43 #include "xalloc.h"
  44 #include "c-strstr.h"
  45 #include "c-ctype.h"
  46 #include "po-charset.h"
  47 #include "unistr.h"
  48 #include "gettext.h"
  49
  50 #define _(s) gettext(s)
  51
  52 #define max(a,b) ((a) > (b) ? (a) : (b))
  53
  54 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
  55
  56 /* The JavaScript aka ECMA-Script syntax is defined in ECMA-262
  57    specification:
  58    http://www.ecma-international.org/publications/standards/Ecma-262.htm */
  59
  60 /* ====================== Keyword set customization.  ====================== */
  61
  62 /* If true extract all strings.  */
  63 static bool extract_all = false;
  64
  65 static hash_table keywords;
  66 static bool default_keywords = true;
  67
  68
  69 void
  70 x_javascript_extract_all ()
  71 {
  72   extract_all = true;
  73 }
  74
  75
  76 void
  77 x_javascript_keyword (const char *name)
  78 {
  79   if (name == NULL)
  80     default_keywords = false;
  81   else
  82     {
  83       const char *end;
  84       struct callshape shape;
  85       const char *colon;
  86
  87       if (keywords.table == NULL)
  88         hash_init (&keywords, 100);
  89
  90       split_keywordspec (name, &end, &shape);
  91
  92       /* The characters between name and end should form a valid C identifier.
  93          A colon means an invalid parse in split_keywordspec().  */
  94       colon = strchr (name, ':');
  95       if (colon == NULL || colon >= end)
  96         insert_keyword_callshape (&keywords, name, end - name, &shape);
  97     }
  98 }
  99
 100 /* Finish initializing the keywords hash table.
 101    Called after argument processing, before each file is processed.  */
 102 static void
 103 init_keywords ()
 104 {
 105   if (default_keywords)
 106     {
 107       /* When adding new keywords here, also update the documentation in
 108          xgettext.texi!  */
 109       x_javascript_keyword ("gettext");
 110       x_javascript_keyword ("dgettext:2");
 111       x_javascript_keyword ("dcgettext:2");
 112       x_javascript_keyword ("ngettext:1,2");
 113       x_javascript_keyword ("dngettext:2,3");
 114       x_javascript_keyword ("pgettext:1c,2");
 115       x_javascript_keyword ("dpgettext:2c,3");
 116       x_javascript_keyword ("_");
 117       default_keywords = false;
 118     }
 119 }
 120
 121 void
 122 init_flag_table_javascript ()
 123 {
 124   xgettext_record_flag ("gettext:1:pass-javascript-format");
 125   xgettext_record_flag ("dgettext:2:pass-javascript-format");
 126   xgettext_record_flag ("dcgettext:2:pass-javascript-format");
 127   xgettext_record_flag ("ngettext:1:pass-javascript-format");
 128   xgettext_record_flag ("ngettext:2:pass-javascript-format");
 129   xgettext_record_flag ("dngettext:2:pass-javascript-format");
 130   xgettext_record_flag ("dngettext:3:pass-javascript-format");
 131   xgettext_record_flag ("pgettext:2:pass-javascript-format");
 132   xgettext_record_flag ("dpgettext:3:pass-javascript-format");
 133   xgettext_record_flag ("_:1:pass-javascript-format");
 134 }
 135
 136
 137 /* ======================== Reading of characters.  ======================== */
 138
 139 /* Real filename, used in error messages about the input file.  */
 140 static const char *real_file_name;
 141
 142 /* Logical filename and line number, used to label the extracted messages.  */
 143 static char *logical_file_name;
 144 static int line_number;
 145
 146 /* The input file stream.  */
 147 static FILE *fp;
 148
 149
 150 /* 1. line_number handling.  */
 151
 152 /* Maximum used, roughly a safer MB_LEN_MAX.  */
 153 #define MAX_PHASE1_PUSHBACK 16
 154 static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
 155 static int phase1_pushback_length;
 156
 157 /* Read the next single byte from the input file.  */
 158 static int
 159 phase1_getc ()
 160 {
 161   int c;
 162
 163   if (phase1_pushback_length)
 164     c = phase1_pushback[--phase1_pushback_length];
 165   else
 166     {
 167       c = getc (fp);
 168
 169       if (c == EOF)
 170         {
 171           if (ferror (fp))
 172             error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
 173                    real_file_name);
 174           return EOF;
 175         }
 176     }
 177
 178   if (c == '\n')
 179     ++line_number;
 180
 181   return c;
 182 }
 183
 184 /* Supports MAX_PHASE1_PUSHBACK characters of pushback.  */
 185 static void
 186 phase1_ungetc (int c)
 187 {
 188   if (c != EOF)
 189     {
 190       if (c == '\n')
 191         --line_number;
 192
 193       if (phase1_pushback_length == SIZEOF (phase1_pushback))
 194         abort ();
 195       phase1_pushback[phase1_pushback_length++] = c;
 196     }
 197 }
 198
 199
 200 /* Phase 2: Conversion to Unicode.
 201    For now, we expect JavaScript files to be encoded as UTF-8.  */
 202
 203 /* End-of-file indicator for functions returning an UCS-4 character.  */
 204 #define UEOF -1
 205
 206 static lexical_context_ty lexical_context;
 207
 208 /* Maximum used, length of "<![CDATA[" tag minus one.  */
 209 static int phase2_pushback[8];
 210 static int phase2_pushback_length;
 211
 212 /* Read the next Unicode UCS-4 character from the input file.  */
 213 static int
 214 phase2_getc ()
 215 {
 216   if (phase2_pushback_length)
 217     return phase2_pushback[--phase2_pushback_length];
 218
 219   if (xgettext_current_source_encoding == po_charset_ascii)
 220     {
 221       int c = phase1_getc ();
 222       if (c == EOF)
 223         return UEOF;
 224       if (!c_isascii (c))
 225         {
 226           multiline_error (xstrdup (""),
 227                            xasprintf ("%s\n%s\n",
 228                                       non_ascii_error_message (lexical_context,
 229                                                                real_file_name,
 230                                                                line_number),
 231                                       _("\
 232 Please specify the source encoding through --from-code\n")));
 233           exit (EXIT_FAILURE);
 234         }
 235       return c;
 236     }
 237   else if (xgettext_current_source_encoding != po_charset_utf8)
 238     {
 239 #if HAVE_ICONV
 240       /* Use iconv on an increasing number of bytes.  Read only as many bytes
 241          through phase1_getc as needed.  This is needed to give reasonable
 242          interactive behaviour when fp is connected to an interactive tty.  */
 243       unsigned char buf[MAX_PHASE1_PUSHBACK];
 244       size_t bufcount;
 245       int c = phase1_getc ();
 246       if (c == EOF)
 247         return UEOF;
 248       buf[0] = (unsigned char) c;
 249       bufcount = 1;
 250
 251       for (;;)
 252         {
 253           unsigned char scratchbuf[6];
 254           const char *inptr = (const char *) &buf[0];
 255           size_t insize = bufcount;
 256           char *outptr = (char *) &scratchbuf[0];
 257           size_t outsize = sizeof (scratchbuf);
 258
 259           size_t res = iconv (xgettext_current_source_iconv,
 260                               (ICONV_CONST char **) &inptr, &insize,
 261                               &outptr, &outsize);
 262           /* We expect that a character has been produced if and only if
 263              some input bytes have been consumed.  */
 264           if ((insize < bufcount) != (outsize < sizeof (scratchbuf)))
 265             abort ();
 266           if (outsize == sizeof (scratchbuf))
 267             {
 268               /* No character has been produced.  Must be an error.  */
 269               if (res != (size_t)(-1))
 270                 abort ();
 271
 272               if (errno == EILSEQ)
 273                 {
 274                   /* An invalid multibyte sequence was encountered.  */
 275                   multiline_error (xstrdup (""),
 276                                    xasprintf (_("\
 277 %s:%d: Invalid multibyte sequence.\n\
 278 Please specify the correct source encoding through --from-code\n"),
 279                                    real_file_name, line_number));
 280                   exit (EXIT_FAILURE);
 281                 }
 282               else if (errno == EINVAL)
 283                 {
 284                   /* An incomplete multibyte character.  */
 285                   int c;
 286
 287                   if (bufcount == MAX_PHASE1_PUSHBACK)
 288                     {
 289                       /* An overlong incomplete multibyte sequence was
 290                          encountered.  */
 291                       multiline_error (xstrdup (""),
 292                                        xasprintf (_("\
 293 %s:%d: Long incomplete multibyte sequence.\n\
 294 Please specify the correct source encoding through --from-code\n"),
 295                                        real_file_name, line_number));
 296                       exit (EXIT_FAILURE);
 297                     }
 298
 299                   /* Read one more byte and retry iconv.  */
 300                   c = phase1_getc ();
 301                   if (c == EOF)
 302                     {
 303                       multiline_error (xstrdup (""),
 304                                        xasprintf (_("\
 305 %s:%d: Incomplete multibyte sequence at end of file.\n\
 306 Please specify the correct source encoding through --from-code\n"),
 307                                        real_file_name, line_number));
 308                       exit (EXIT_FAILURE);
 309                     }
 310                   if (c == '\n')
 311                     {
 312                       multiline_error (xstrdup (""),
 313                                        xasprintf (_("\
 314 %s:%d: Incomplete multibyte sequence at end of line.\n\
 315 Please specify the correct source encoding through --from-code\n"),
 316                                        real_file_name, line_number - 1));
 317                       exit (EXIT_FAILURE);
 318                     }
 319                   buf[bufcount++] = (unsigned char) c;
 320                 }
 321               else
 322                 error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"),
 323                        real_file_name, line_number);
 324             }
 325           else
 326             {
 327               size_t outbytes = sizeof (scratchbuf) - outsize;
 328               size_t bytes = bufcount - insize;
 329               ucs4_t uc;
 330
 331               /* We expect that one character has been produced.  */
 332               if (bytes == 0)
 333                 abort ();
 334               if (outbytes == 0)
 335                 abort ();
 336               /* Push back the unused bytes.  */
 337               while (insize > 0)
 338                 phase1_ungetc (buf[--insize]);
 339               /* Convert the character from UTF-8 to UCS-4.  */
 340               if (u8_mbtoucr (&uc, scratchbuf, outbytes) < (int) outbytes)
 341                 {
 342                   /* scratchbuf contains an out-of-range Unicode character
 343                      (> 0x10ffff).  */
 344                   multiline_error (xstrdup (""),
 345                                    xasprintf (_("\
 346 %s:%d: Invalid multibyte sequence.\n\
 347 Please specify the source encoding through --from-code\n"),
 348                                    real_file_name, line_number));
 349                   exit (EXIT_FAILURE);
 350                 }
 351               return uc;
 352             }
 353         }
 354 #else
 355       /* If we don't have iconv(), the only supported values for
 356          xgettext_global_source_encoding and thus also for
 357          xgettext_current_source_encoding are ASCII and UTF-8.  */
 358       abort ();
 359 #endif
 360     }
 361   else
 362     {
 363       /* Read an UTF-8 encoded character.  */
 364       unsigned char buf[6];
 365       unsigned int count;
 366       int c;
 367       ucs4_t uc;
 368
 369       c = phase1_getc ();
 370       if (c == EOF)
 371         return UEOF;
 372       buf[0] = c;
 373       count = 1;
 374
 375       if (buf[0] >= 0xc0)
 376         {
 377           c = phase1_getc ();
 378           if (c == EOF)
 379             return UEOF;
 380           buf[1] = c;
 381           count = 2;
 382         }
 383
 384       if (buf[0] >= 0xe0
 385           && ((buf[1] ^ 0x80) < 0x40))
 386         {
 387           c = phase1_getc ();
 388           if (c == EOF)
 389             return UEOF;
 390           buf[2] = c;
 391           count = 3;
 392         }
 393
 394       if (buf[0] >= 0xf0
 395           && ((buf[1] ^ 0x80) < 0x40)
 396           && ((buf[2] ^ 0x80) < 0x40))
 397         {
 398           c = phase1_getc ();
 399           if (c == EOF)
 400             return UEOF;
 401           buf[3] = c;
 402           count = 4;
 403         }
 404
 405       if (buf[0] >= 0xf8
 406           && ((buf[1] ^ 0x80) < 0x40)
 407           && ((buf[2] ^ 0x80) < 0x40)
 408           && ((buf[3] ^ 0x80) < 0x40))
 409         {
 410           c = phase1_getc ();
 411           if (c == EOF)
 412             return UEOF;
 413           buf[4] = c;
 414           count = 5;
 415         }
 416
 417       if (buf[0] >= 0xfc
 418           && ((buf[1] ^ 0x80) < 0x40)
 419           && ((buf[2] ^ 0x80) < 0x40)
 420           && ((buf[3] ^ 0x80) < 0x40)
 421           && ((buf[4] ^ 0x80) < 0x40))
 422         {
 423           c = phase1_getc ();
 424           if (c == EOF)
 425             return UEOF;
 426           buf[5] = c;
 427           count = 6;
 428         }
 429
 430       u8_mbtouc (&uc, buf, count);
 431       return uc;
 432     }
 433 }
 434
 435 /* Supports max (9, UNINAME_MAX + 3) pushback characters.  */
 436 static void
 437 phase2_ungetc (int c)
 438 {
 439   if (c != UEOF)
 440     {
 441       if (phase2_pushback_length == SIZEOF (phase2_pushback))
 442         abort ();
 443       phase2_pushback[phase2_pushback_length++] = c;
 444     }
 445 }
 446
 447
 448 /* ========================= Accumulating strings.  ======================== */
 449
 450 /* A string buffer type that allows appending Unicode characters.
 451    Returns the entire string in UTF-8 encoding.  */
 452
 453 struct unicode_string_buffer
 454 {
 455   /* The part of the string that has already been converted to UTF-8.  */
 456   char *utf8_buffer;
 457   size_t utf8_buflen;
 458   size_t utf8_allocated;
 459 };
 460
 461 /* Initialize a 'struct unicode_string_buffer' to empty.  */
 462 static inline void
 463 init_unicode_string_buffer (struct unicode_string_buffer *bp)
 464 {
 465   bp->utf8_buffer = NULL;
 466   bp->utf8_buflen = 0;
 467   bp->utf8_allocated = 0;
 468 }
 469
 470 /* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
 471 static inline void
 472 unicode_string_buffer_append_unicode_grow (struct unicode_string_buffer *bp,
 473                                            size_t count)
 474 {
 475   if (bp->utf8_buflen + count > bp->utf8_allocated)
 476     {
 477       size_t new_allocated = 2 * bp->utf8_allocated + 10;
 478       if (new_allocated < bp->utf8_buflen + count)
 479         new_allocated = bp->utf8_buflen + count;
 480       bp->utf8_allocated = new_allocated;
 481       bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
 482     }
 483 }
 484
 485 /* Auxiliary function: Append a Unicode character to bp->utf8.
 486    uc must be < 0x110000.  */
 487 static inline void
 488 unicode_string_buffer_append_unicode (struct unicode_string_buffer *bp,
 489                                       unsigned int uc)
 490 {
 491   unsigned char utf8buf[6];
 492   int count = u8_uctomb (utf8buf, uc, 6);
 493
 494   if (count < 0)
 495     /* The caller should have ensured that uc is not out-of-range.  */
 496     abort ();
 497
 498   unicode_string_buffer_append_unicode_grow (bp, count);
 499   memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
 500   bp->utf8_buflen += count;
 501 }
 502
 503 /* Return the string buffer's contents.  */
 504 static char *
 505 unicode_string_buffer_result (struct unicode_string_buffer *bp)
 506 {
 507   /* NUL-terminate it.  */
 508   unicode_string_buffer_append_unicode_grow (bp, 1);
 509   bp->utf8_buffer[bp->utf8_buflen] = '\0';
 510   /* Return it.  */
 511   return bp->utf8_buffer;
 512 }
 513
 514 /* Free the memory pointed to by a 'struct unicode_string_buffer'.  */
 515 static inline void
 516 free_unicode_string_buffer (struct unicode_string_buffer *bp)
 517 {
 518   free (bp->utf8_buffer);
 519 }
 520
 521
 522 /* ======================== Accumulating comments.  ======================== */
 523
 524
 525 /* Accumulating a single comment line.  */
 526
 527 static struct unicode_string_buffer comment_buffer;
 528
 529 static inline void
 530 comment_start ()
 531 {
 532   lexical_context = lc_comment;
 533   comment_buffer.utf8_buflen = 0;
 534 }
 535
 536 static inline bool
 537 comment_at_start ()
 538 {
 539   return (comment_buffer.utf8_buflen == 0);
 540 }
 541
 542 static inline void
 543 comment_add (int c)
 544 {
 545   unicode_string_buffer_append_unicode (&comment_buffer, c);
 546 }
 547
 548 static inline const char *
 549 comment_line_end (size_t chars_to_remove)
 550 {
 551   char *buffer = unicode_string_buffer_result (&comment_buffer);
 552   size_t buflen = strlen (buffer) - chars_to_remove;
 553
 554   while (buflen >= 1
 555          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 556     --buflen;
 557   buffer[buflen] = '\0';
 558   savable_comment_add (buffer);
 559   lexical_context = lc_outside;
 560   return buffer;
 561 }
 562
 563
 564 /* These are for tracking whether comments count as immediately before
 565    keyword.  */
 566 static int last_comment_line;
 567 static int last_non_comment_line;
 568
 569
 570 /* ======================== Recognizing comments.  ======================== */
 571
 572
 573 /* Canonicalized encoding name for the current input file.  */
 574 static const char *xgettext_current_file_source_encoding;
 575
 576 #if HAVE_ICONV
 577 /* Converter from xgettext_current_file_source_encoding to UTF-8 (except from
 578    ASCII or UTF-8, when this conversion is a no-op).  */
 579 static iconv_t xgettext_current_file_source_iconv;
 580 #endif
 581
 582 /* Tracking whether the current line is a continuation line or contains a
 583    non-blank character.  */
 584 static bool continuation_or_nonblank_line = false;
 585
 586
 587 /* Phase 3: Outside strings, replace backslash-newline with nothing and a
 588    comment with nothing.  */
 589
 590 static int
 591 phase3_getc ()
 592 {
 593   int c;
 594
 595   for (;;)
 596     {
 597       c = phase2_getc ();
 598       if (c == '\\')
 599         {
 600           c = phase2_getc ();
 601           if (c != '\n')
 602             {
 603               phase2_ungetc (c);
 604               /* This shouldn't happen usually, because "A backslash is
 605                  illegal elsewhere on a line outside a string literal."  */
 606               return '\\';
 607             }
 608           /* Eat backslash-newline.  */
 609           continuation_or_nonblank_line = true;
 610         }
 611       else if (c == '/')
 612         {
 613           c = phase2_getc ();
 614           if (c == '/')
 615             {
 616               /* C++ style comment.  */
 617               last_comment_line = line_number;
 618               comment_start ();
 619               for (;;)
 620                 {
 621                   c = phase2_getc ();
 622                   if (c == UEOF || c == '\n')
 623                     {
 624                       comment_line_end (0);
 625                       break;
 626                     }
 627                   /* We skip all leading white space, but not EOLs.  */
 628                   if (!(comment_at_start () && (c == ' ' || c == '\t')))
 629                     comment_add (c);
 630                 }
 631               continuation_or_nonblank_line = false;
 632               return c;
 633             }
 634           else if (c == '*')
 635             {
 636               /* C style comment.  */
 637               bool last_was_star = false;
 638               last_comment_line = line_number;
 639               comment_start ();
 640               for (;;)
 641                 {
 642                   c = phase2_getc ();
 643                   if (c == UEOF)
 644                     break;
 645                   /* We skip all leading white space, but not EOLs.  */
 646                   if (!(comment_at_start () && (c == ' ' || c == '\t')))
 647                     comment_add (c);
 648                   switch (c)
 649                     {
 650                     case '\n':
 651                       comment_line_end (1);
 652                       comment_start ();
 653                       last_was_star = false;
 654                       continue;
 655
 656                     case '*':
 657                       last_was_star = true;
 658                       continue;
 659                     case '/':
 660                       if (last_was_star)
 661                         {
 662                           comment_line_end (2);
 663                           break;
 664                         }
 665                       /* FALLTHROUGH */
 666
 667                     default:
 668                       last_was_star = false;
 669                       continue;
 670                     }
 671                   break;
 672                 }
 673               continuation_or_nonblank_line = false;
 674             }
 675           else
 676             {
 677               phase2_ungetc (c);
 678               return '/';
 679             }
 680         }
 681       else
 682         {
 683           if (c == '\n')
 684             continuation_or_nonblank_line = false;
 685           else if (!(c == ' ' || c == '\t' || c == '\f'))
 686             continuation_or_nonblank_line = true;
 687           return c;
 688         }
 689     }
 690 }
 691
 692 /* Supports only one pushback character.  */
 693 static void
 694 phase3_ungetc (int c)
 695 {
 696   phase2_ungetc (c);
 697 }
 698
 699
 700 /* ========================= Accumulating strings.  ======================== */
 701
 702 /* Return value of phase7_getuc when EOF is reached.  */
 703 #define P7_EOF (-1)
 704 #define P7_STRING_END (-2)
 705
 706 /* Convert an UTF-16 or UTF-32 code point to a return value that can be
 707    distinguished from a single-byte return value.  */
 708 #define UNICODE(code) (0x100 + (code))
 709
 710 /* Test a return value of phase7_getuc whether it designates an UTF-16 or
 711    UTF-32 code point.  */
 712 #define IS_UNICODE(p7_result) ((p7_result) >= 0x100)
 713
 714 /* Extract the UTF-16 or UTF-32 code of a return value that satisfies
 715    IS_UNICODE.  */
 716 #define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
 717
 718
 719 /* ========================== Reading of tokens.  ========================== */
 720
 721
 722 enum token_type_ty
 723 {
 724   token_type_eof,
 725   token_type_lparen,            /* ( */
 726   token_type_rparen,            /* ) */
 727   token_type_comma,             /* , */
 728   token_type_lbracket,          /* [ */
 729   token_type_rbracket,          /* ] */
 730   token_type_plus,              /* + */
 731   token_type_regexp,            /* /.../ */
 732   token_type_operator,          /* - * / % . < > = ~ ! | & ? : ^ */
 733   token_type_equal,             /* = */
 734   token_type_string,            /* "abc", 'abc' */
 735   token_type_keyword,           /* return, else */
 736   token_type_symbol,            /* symbol, number */
 737   token_type_other              /* misc. operator */
 738 };
 739 typedef enum token_type_ty token_type_ty;
 740
 741 typedef struct token_ty token_ty;
 742 struct token_ty
 743 {
 744   token_type_ty type;
 745   char *string;         /* for token_type_string, token_type_symbol,
 746                            token_type_keyword */
 747   refcounted_string_list_ty *comment;   /* for token_type_string */
 748   int line_number;
 749 };
 750
 751
 752 /* Free the memory pointed to by a 'struct token_ty'.  */
 753 static inline void
 754 free_token (token_ty *tp)
 755 {
 756   if (tp->type == token_type_string || tp->type == token_type_symbol)
 757     free (tp->string);
 758   if (tp->type == token_type_string)
 759     drop_reference (tp->comment);
 760 }
 761
 762
 763 /* JavaScript provides strings with either double or single quotes:
 764      "abc" or 'abc'
 765    Both may contain special sequences after a backslash:
 766      \', \", \\, \b, \f, \n, \r, \t, \v
 767    Special characters can be entered using hexadecimal escape
 768    sequences or deprecated octal escape sequences:
 769      \xXX, \OOO
 770    Any unicode point can be entered using Unicode escape sequences:
 771      \uNNNN
 772    If a sequence after a backslash is not a legitimate character
 773    escape sequence, the character value is the sequence itself without
 774    a backslash.  For example, \xxx is treated as xxx.  */
 775
 776 static int
 777 phase7_getuc (int quote_char)
 778 {
 779   int c;
 780
 781   for (;;)
 782     {
 783       /* Use phase 2, because phase 3 elides comments.  */
 784       c = phase2_getc ();
 785
 786       if (c == UEOF)
 787         return P7_EOF;
 788
 789       if (c == quote_char)
 790         return P7_STRING_END;
 791
 792       if (c == '\n')
 793         {
 794           phase2_ungetc (c);
 795           error_with_progname = false;
 796           error (0, 0, _("%s:%d: warning: unterminated string"),
 797                  logical_file_name, line_number);
 798           error_with_progname = true;
 799           return P7_STRING_END;
 800         }
 801
 802       if (c != '\\')
 803         return UNICODE (c);
 804
 805       /* Dispatch according to the character following the backslash.  */
 806       c = phase2_getc ();
 807       if (c == UEOF)
 808         return P7_EOF;
 809
 810       switch (c)
 811         {
 812         case '\n':
 813           continue;
 814         case 'b':
 815           return UNICODE ('\b');
 816         case 'f':
 817           return UNICODE ('\f');
 818         case 'n':
 819           return UNICODE ('\n');
 820         case 'r':
 821           return UNICODE ('\r');
 822         case 't':
 823           return UNICODE ('\t');
 824         case 'v':
 825           return UNICODE ('\v');
 826         case '0': case '1': case '2': case '3': case '4':
 827         case '5': case '6': case '7':
 828           {
 829             int n = c - '0';
 830
 831             c = phase2_getc ();
 832             if (c != UEOF)
 833               {
 834                 if (c >= '0' && c <= '7')
 835                   {
 836                     n = (n << 3) + (c - '0');
 837                     c = phase2_getc ();
 838                     if (c != UEOF)
 839                       {
 840                         if (c >= '0' && c <= '7')
 841                           n = (n << 3) + (c - '0');
 842                         else
 843                           phase2_ungetc (c);
 844                       }
 845                   }
 846                 else
 847                   phase2_ungetc (c);
 848               }
 849             return UNICODE (n);
 850           }
 851         case 'x':
 852           {
 853             int c1 = phase2_getc ();
 854             int n1;
 855
 856             if (c1 >= '0' && c1 <= '9')
 857               n1 = c1 - '0';
 858             else if (c1 >= 'A' && c1 <= 'F')
 859               n1 = c1 - 'A' + 10;
 860             else if (c1 >= 'a' && c1 <= 'f')
 861               n1 = c1 - 'a' + 10;
 862             else
 863               n1 = -1;
 864
 865             if (n1 >= 0)
 866               {
 867                 int c2 = phase2_getc ();
 868                 int n2;
 869
 870                 if (c2 >= '0' && c2 <= '9')
 871                   n2 = c2 - '0';
 872                 else if (c2 >= 'A' && c2 <= 'F')
 873                   n2 = c2 - 'A' + 10;
 874                 else if (c2 >= 'a' && c2 <= 'f')
 875                   n2 = c2 - 'a' + 10;
 876                 else
 877                   n2 = -1;
 878
 879                 if (n2 >= 0)
 880                   {
 881                     int n = (n1 << 4) + n2;
 882                     return UNICODE (n);
 883                   }
 884
 885                 phase2_ungetc (c2);
 886               }
 887             phase2_ungetc (c1);
 888             return UNICODE (c);
 889           }
 890         case 'u':
 891           {
 892             unsigned char buf[4];
 893             unsigned int n = 0;
 894             int i;
 895
 896             for (i = 0; i < 4; i++)
 897               {
 898                 int c1 = phase2_getc ();
 899
 900                 if (c1 >= '0' && c1 <= '9')
 901                   n = (n << 4) + (c1 - '0');
 902                 else if (c1 >= 'A' && c1 <= 'F')
 903                   n = (n << 4) + (c1 - 'A' + 10);
 904                 else if (c1 >= 'a' && c1 <= 'f')
 905                   n = (n << 4) + (c1 - 'a' + 10);
 906                 else
 907                   {
 908                     phase2_ungetc (c1);
 909                     while (--i >= 0)
 910                       phase2_ungetc (buf[i]);
 911                     return UNICODE (c);
 912                   }
 913
 914                 buf[i] = c1;
 915               }
 916             return UNICODE (n);
 917           }
 918         default:
 919           return UNICODE (c);
 920         }
 921     }
 922 }
 923
 924
 925 /* Combine characters into tokens.  Discard whitespace except newlines at
 926    the end of logical lines.  */
 927
 928 static token_ty phase5_pushback[2];
 929 static int phase5_pushback_length;
 930
 931 static token_type_ty last_token_type = token_type_other;
 932
 933 static void
 934 phase5_scan_regexp ()
 935 {
 936     int c;
 937
 938     /* Scan for end of RegExp literal ('/').  */
 939     for (;;)
 940       {
 941         /* Must use phase2 as there can't be comments.  */
 942         c = phase2_getc ();
 943         if (c == '/')
 944           break;
 945         if (c == '\\')
 946           {
 947             c = phase2_getc ();
 948             if (c != UEOF)
 949               continue;
 950           }
 951         if (c == UEOF)
 952           {
 953             error_with_progname = false;
 954             error (0, 0,
 955                    _("%s:%d: warning: RegExp literal terminated too early"),
 956                    logical_file_name, line_number);
 957             error_with_progname = true;
 958             return;
 959           }
 960       }
 961
 962     /* Scan for modifier flags (ECMA-262 5th section 15.10.4.1).  */
 963     c = phase2_getc ();
 964     if (!(c == 'g' || c == 'i' || c == 'm'))
 965       phase2_ungetc (c);
 966 }
 967
 968 static int xml_element_depth = 0;
 969 static bool inside_embedded_js_in_xml = false;
 970
 971 static bool
 972 phase5_scan_xml_markup (token_ty *tp)
 973 {
 974   struct
 975   {
 976     const char *start;
 977     const char *end;
 978   } markers[] =
 979       {
 980         { "!--", "--" },
 981         { "![CDATA[", "]]" },
 982         { "?", "?" }
 983       };
 984   int i;
 985
 986   for (i = 0; i < SIZEOF (markers); i++)
 987     {
 988       const char *start = markers[i].start;
 989       const char *end = markers[i].end;
 990       int j;
 991
 992       /* Look for a start marker.  */
 993       for (j = 0; start[j] != '\0'; j++)
 994         {
 995           int c;
 996
 997           assert (phase2_pushback_length + j < SIZEOF (phase2_pushback));
 998           c = phase2_getc ();
 999           if (c == UEOF)
1000             goto eof;
1001           if (c != start[j])
1002             {
1003               int k = j;
1004
1005               phase2_ungetc (c);
1006               k--;
1007
1008               for (; k >= 0; k--)
1009                 phase2_ungetc (start[k]);
1010               break;
1011             }
1012         }
1013
1014       if (start[j] != '\0')
1015         continue;
1016
1017       /* Skip until the end marker.  */
1018       for (;;)
1019         {
1020           int c;
1021
1022           for (j = 0; end[j] != '\0'; j++)
1023             {
1024               assert (phase2_pushback_length + 1 < SIZEOF (phase2_pushback));
1025               c = phase2_getc ();
1026               if (c == UEOF)
1027                 goto eof;
1028               if (c != end[j])
1029                 {
1030                   /* Don't push the first character back so the next
1031                      iteration start from the second character.  */
1032                   if (j > 0)
1033                     {
1034                       int k = j;
1035
1036                       phase2_ungetc (c);
1037                       k--;
1038
1039                       for (; k > 0; k--)
1040                         phase2_ungetc (end[k]);
1041                     }
1042                   break;
1043                 }
1044             }
1045
1046           if (end[j] != '\0')
1047             continue;
1048
1049           c = phase2_getc ();
1050           if (c == UEOF)
1051             goto eof;
1052           if (c != '>')
1053             {
1054               error_with_progname = false;
1055               error (0, 0,
1056                      _("%s:%d: warning: %s is not allowed"),
1057                      logical_file_name, line_number,
1058                      end);
1059               error_with_progname = true;
1060               return false;
1061             }
1062           return true;
1063         }
1064     }
1065   return false;
1066
1067  eof:
1068   error_with_progname = false;
1069   error (0, 0,
1070          _("%s:%d: warning: unterminated XML markup"),
1071          logical_file_name, line_number);
1072   error_with_progname = true;
1073   return false;
1074 }
1075
1076 static void
1077 phase5_get (token_ty *tp)
1078 {
1079   int c;
1080
1081   if (phase5_pushback_length)
1082     {
1083       *tp = phase5_pushback[--phase5_pushback_length];
1084       last_token_type = tp->type;
1085       return;
1086     }
1087
1088   for (;;)
1089     {
1090       tp->line_number = line_number;
1091       c = phase3_getc ();
1092
1093       switch (c)
1094         {
1095         case UEOF:
1096           tp->type = last_token_type = token_type_eof;
1097           return;
1098
1099         case '\n':
1100           if (last_non_comment_line > last_comment_line)
1101             savable_comment_reset ();
1102           /* FALLTHROUGH */
1103         case ' ':
1104         case '\t':
1105         case '\f':
1106           /* Ignore whitespace and comments.  */
1107           continue;
1108         }
1109
1110       last_non_comment_line = tp->line_number;
1111
1112       switch (c)
1113         {
1114         case '.':
1115           {
1116             int c1 = phase3_getc ();
1117             phase3_ungetc (c1);
1118             if (!(c1 >= '0' && c1 <= '9'))
1119               {
1120
1121                 tp->type = last_token_type = token_type_other;
1122                 return;
1123               }
1124           }
1125           /* FALLTHROUGH */
1126         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1127         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1128         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1129         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1130         case 'Y': case 'Z':
1131         case '_':
1132         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1133         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1134         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1135         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1136         case 'y': case 'z':
1137         case '0': case '1': case '2': case '3': case '4':
1138         case '5': case '6': case '7': case '8': case '9':
1139           /* Symbol, or part of a number.  */
1140           {
1141             static char *buffer;
1142             static int bufmax;
1143             int bufpos;
1144
1145             bufpos = 0;
1146             for (;;)
1147               {
1148                 if (bufpos >= bufmax)
1149                   {
1150                     bufmax = 2 * bufmax + 10;
1151                     buffer = xrealloc (buffer, bufmax);
1152                   }
1153                 buffer[bufpos++] = c;
1154                 c = phase3_getc ();
1155                 switch (c)
1156                   {
1157                   case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1158                   case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1159                   case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1160                   case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1161                   case 'Y': case 'Z':
1162                   case '_':
1163                   case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1164                   case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1165                   case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1166                   case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1167                   case 'y': case 'z':
1168                   case '0': case '1': case '2': case '3': case '4':
1169                   case '5': case '6': case '7': case '8': case '9':
1170                     continue;
1171                   default:
1172                     phase3_ungetc (c);
1173                     break;
1174                   }
1175                 break;
1176               }
1177             if (bufpos >= bufmax)
1178               {
1179                 bufmax = 2 * bufmax + 10;
1180                 buffer = xrealloc (buffer, bufmax);
1181               }
1182             buffer[bufpos] = '\0';
1183             tp->string = xstrdup (buffer);
1184             if (strcmp (buffer, "return") == 0
1185                 || strcmp (buffer, "else") == 0)
1186               tp->type = last_token_type = token_type_keyword;
1187             else
1188               tp->type = last_token_type = token_type_symbol;
1189             return;
1190           }
1191
1192         /* Strings.  */
1193           {
1194             struct mixed_string_buffer *bp;
1195             int quote_char;
1196
1197             case '"': case '\'':
1198               quote_char = c;
1199               lexical_context = lc_string;
1200               /* Start accumulating the string.  */
1201               bp = mixed_string_buffer_alloc (lexical_context,
1202                                               logical_file_name,
1203                                               line_number);
1204               for (;;)
1205                 {
1206                   int uc = phase7_getuc (quote_char);
1207
1208                   /* Keep line_number in sync.  */
1209                   bp->line_number = line_number;
1210
1211                   if (uc == P7_EOF || uc == P7_STRING_END)
1212                     break;
1213
1214                   if (IS_UNICODE (uc))
1215                     {
1216                       assert (UNICODE_VALUE (uc) >= 0
1217                               && UNICODE_VALUE (uc) < 0x110000);
1218                       mixed_string_buffer_append_unicode (bp,
1219                                                           UNICODE_VALUE (uc));
1220                     }
1221                   else
1222                     mixed_string_buffer_append_char (bp, uc);
1223                 }
1224               tp->string = mixed_string_buffer_done (bp);
1225               tp->comment = add_reference (savable_comment);
1226               lexical_context = lc_outside;
1227               tp->type = last_token_type = token_type_string;
1228               return;
1229           }
1230
1231         case '+':
1232           tp->type = last_token_type = token_type_plus;
1233           return;
1234
1235         /* Identify operators. The multiple character ones are simply ignored
1236          * as they are recognized here and are otherwise not relevant. */
1237         case '-': case '*': /* '+' and '/' are not listed here! */
1238         case '%':
1239         case '~': case '!': case '|': case '&': case '^':
1240         case '?': case ':':
1241           tp->type = last_token_type = token_type_operator;
1242           return;
1243
1244         case '=':
1245           tp->type = last_token_type = token_type_equal;
1246           return;
1247
1248         case '<':
1249           {
1250             /* We assume:
1251                - XMLMarkup and XMLElement are only allowed after '=' or '('
1252                - embedded JavaScript expressions in XML do not recurse
1253              */
1254             if (xml_element_depth > 0
1255                 || (!inside_embedded_js_in_xml
1256                     && (last_token_type == token_type_equal
1257                         || last_token_type == token_type_lparen)))
1258               {
1259                 /* Comments, PI, or CDATA.  */
1260                 if (phase5_scan_xml_markup (tp))
1261                   return;
1262                 c = phase2_getc ();
1263
1264                 /* Closing tag.  */
1265                 if (c == '/')
1266                   lexical_context = lc_xml_close_tag;
1267
1268                 /* Opening element.  */
1269                 else
1270                   {
1271                     phase2_ungetc (c);
1272                     lexical_context = lc_xml_open_tag;
1273                     xml_element_depth++;
1274                   }
1275
1276                 tp->type = last_token_type = token_type_other;
1277               }
1278             else
1279               tp->type = last_token_type = token_type_operator;
1280           }
1281           return;
1282
1283         case '>':
1284           if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
1285             {
1286               switch (lexical_context)
1287                 {
1288                 case lc_xml_open_tag:
1289                   lexical_context = lc_xml_content;
1290                   break;
1291
1292                 case lc_xml_close_tag:
1293                   if (xml_element_depth-- > 0)
1294                     lexical_context = lc_xml_content;
1295                   else
1296                     lexical_context = lc_outside;
1297                   break;
1298
1299                 default:
1300                   break;
1301                 }
1302               tp->type = last_token_type = token_type_other;
1303             }
1304           else
1305             tp->type = last_token_type = token_type_operator;
1306           return;
1307
1308         case '/':
1309           if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
1310             {
1311               /* If it appears in an opening tag of an XML element, it's
1312                  part of '/>'.  */
1313               if (lexical_context == lc_xml_open_tag)
1314                 {
1315                   c = phase2_getc ();
1316                   if (c == '>')
1317                     lexical_context = lc_outside;
1318                   else
1319                     phase2_ungetc (c);
1320                 }
1321               tp->type = last_token_type = token_type_other;
1322               return;
1323             }
1324
1325           /* Either a division operator or the start of a regular
1326              expression literal.  If the '/' token is spotted after a
1327              symbol it's a division, otherwise it's a regular
1328              expression.  */
1329           if (last_token_type == token_type_symbol
1330               || last_token_type == token_type_rparen
1331               || last_token_type == token_type_rbracket)
1332             tp->type = last_token_type = token_type_operator;
1333           else
1334             {
1335               phase5_scan_regexp (tp);
1336               tp->type = last_token_type = token_type_regexp;
1337             }
1338           return;
1339
1340         case '{':
1341           if (xml_element_depth > 0 && !inside_embedded_js_in_xml)
1342             inside_embedded_js_in_xml = true;
1343           tp->type = last_token_type = token_type_other;
1344           return;
1345
1346         case '}':
1347           if (xml_element_depth > 0 && inside_embedded_js_in_xml)
1348             inside_embedded_js_in_xml = false;
1349           tp->type = last_token_type = token_type_other;
1350           return;
1351
1352         case '(':
1353           tp->type = last_token_type = token_type_lparen;
1354           return;
1355
1356         case ')':
1357           tp->type = last_token_type = token_type_rparen;
1358           return;
1359
1360         case ',':
1361           tp->type = last_token_type = token_type_comma;
1362           return;
1363
1364         case '[':
1365           tp->type = last_token_type = token_type_lbracket;
1366           return;
1367
1368         case ']':
1369           tp->type = last_token_type = token_type_rbracket;
1370           return;
1371
1372         default:
1373           /* We could carefully recognize each of the 2 and 3 character
1374              operators, but it is not necessary, as we only need to recognize
1375              gettext invocations.  Don't bother.  */
1376           tp->type = last_token_type = token_type_other;
1377           return;
1378         }
1379     }
1380 }
1381
1382 /* Supports only one pushback token.  */
1383 static void
1384 phase5_unget (token_ty *tp)
1385 {
1386   if (tp->type != token_type_eof)
1387     {
1388       if (phase5_pushback_length == SIZEOF (phase5_pushback))
1389         abort ();
1390       phase5_pushback[phase5_pushback_length++] = *tp;
1391     }
1392 }
1393
1394
1395 /* String concatenation with '+'.  */
1396
1397 static void
1398 x_javascript_lex (token_ty *tp)
1399 {
1400   phase5_get (tp);
1401   if (tp->type == token_type_string)
1402     {
1403       char *sum = tp->string;
1404       size_t sum_len = strlen (sum);
1405
1406       for (;;)
1407         {
1408           token_ty token2;
1409
1410           phase5_get (&token2);
1411           if (token2.type == token_type_plus)
1412             {
1413               token_ty token3;
1414
1415               phase5_get (&token3);
1416               if (token3.type == token_type_string)
1417                 {
1418                   char *addend = token3.string;
1419                   size_t addend_len = strlen (addend);
1420
1421                   sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
1422                   memcpy (sum + sum_len, addend, addend_len + 1);
1423                   sum_len += addend_len;
1424
1425                   free_token (&token3);
1426                   free_token (&token2);
1427                   continue;
1428                 }
1429               phase5_unget (&token3);
1430             }
1431           phase5_unget (&token2);
1432           break;
1433         }
1434       tp->string = sum;
1435     }
1436 }
1437
1438
1439 /* ========================= Extracting strings.  ========================== */
1440
1441
1442 /* Context lookup table.  */
1443 static flag_context_list_table_ty *flag_context_list_table;
1444
1445
1446 /* The file is broken into tokens.  Scan the token stream, looking for
1447    a keyword, followed by a left paren, followed by a string.  When we
1448    see this sequence, we have something to remember.  We assume we are
1449    looking at a valid JavaScript program, and leave the complaints about
1450    the grammar to the compiler.
1451
1452      Normal handling: Look for
1453        keyword ( ... msgid ... )
1454      Plural handling: Look for
1455        keyword ( ... msgid ... msgid_plural ... )
1456
1457    We use recursion because the arguments before msgid or between msgid
1458    and msgid_plural can contain subexpressions of the same form.  */
1459
1460
1461 /* Extract messages until the next balanced closing parenthesis or bracket.
1462    Extracted messages are added to MLP.
1463    DELIM can be either token_type_rparen or token_type_rbracket, or
1464    token_type_eof to accept both.
1465    Return true upon eof, false upon closing parenthesis or bracket.  */
1466 static bool
1467 extract_balanced (message_list_ty *mlp,
1468                   token_type_ty delim,
1469                   flag_context_ty outer_context,
1470                   flag_context_list_iterator_ty context_iter,
1471                   struct arglist_parser *argparser)
1472 {
1473   /* Current argument number.  */
1474   int arg = 1;
1475   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
1476   int state;
1477   /* Parameters of the keyword just seen.  Defined only in state 1.  */
1478   const struct callshapes *next_shapes = NULL;
1479   /* Context iterator that will be used if the next token is a '('.  */
1480   flag_context_list_iterator_ty next_context_iter =
1481     passthrough_context_list_iterator;
1482   /* Current context.  */
1483   flag_context_ty inner_context =
1484     inherited_context (outer_context,
1485                        flag_context_list_iterator_advance (&context_iter));
1486
1487   /* Start state is 0.  */
1488   state = 0;
1489
1490   for (;;)
1491     {
1492       token_ty token;
1493
1494       x_javascript_lex (&token);
1495       switch (token.type)
1496         {
1497         case token_type_symbol:
1498           {
1499             void *keyword_value;
1500
1501             if (hash_find_entry (&keywords, token.string, strlen (token.string),
1502                                  &keyword_value)
1503                 == 0)
1504               {
1505                 next_shapes = (const struct callshapes *) keyword_value;
1506                 state = 1;
1507               }
1508             else
1509               state = 0;
1510           }
1511           next_context_iter =
1512             flag_context_list_iterator (
1513               flag_context_list_table_lookup (
1514                 flag_context_list_table,
1515                 token.string, strlen (token.string)));
1516           free (token.string);
1517           continue;
1518
1519         case token_type_lparen:
1520           if (extract_balanced (mlp, token_type_rparen,
1521                                 inner_context, next_context_iter,
1522                                 arglist_parser_alloc (mlp,
1523                                                       state ? next_shapes : NULL)))
1524             {
1525               xgettext_current_source_encoding = po_charset_utf8;
1526               arglist_parser_done (argparser, arg);
1527               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1528               return true;
1529             }
1530           next_context_iter = null_context_list_iterator;
1531           state = 0;
1532           continue;
1533
1534         case token_type_rparen:
1535           if (delim == token_type_rparen || delim == token_type_eof)
1536             {
1537               xgettext_current_source_encoding = po_charset_utf8;
1538               arglist_parser_done (argparser, arg);
1539               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1540               return false;
1541             }
1542           next_context_iter = null_context_list_iterator;
1543           state = 0;
1544           continue;
1545
1546         case token_type_comma:
1547           arg++;
1548           inner_context =
1549             inherited_context (outer_context,
1550                                flag_context_list_iterator_advance (
1551                                  &context_iter));
1552           next_context_iter = passthrough_context_list_iterator;
1553           state = 0;
1554           continue;
1555
1556         case token_type_lbracket:
1557           if (extract_balanced (mlp, token_type_rbracket,
1558                                 null_context, null_context_list_iterator,
1559                                 arglist_parser_alloc (mlp, NULL)))
1560             {
1561               xgettext_current_source_encoding = po_charset_utf8;
1562               arglist_parser_done (argparser, arg);
1563               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1564               return true;
1565             }
1566           next_context_iter = null_context_list_iterator;
1567           state = 0;
1568           continue;
1569
1570         case token_type_rbracket:
1571           if (delim == token_type_rbracket || delim == token_type_eof)
1572             {
1573               xgettext_current_source_encoding = po_charset_utf8;
1574               arglist_parser_done (argparser, arg);
1575               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1576               return false;
1577             }
1578           next_context_iter = null_context_list_iterator;
1579           state = 0;
1580           continue;
1581
1582         case token_type_string:
1583           {
1584             lex_pos_ty pos;
1585             pos.file_name = logical_file_name;
1586             pos.line_number = token.line_number;
1587
1588             xgettext_current_source_encoding = po_charset_utf8;
1589             if (extract_all)
1590               remember_a_message (mlp, NULL, token.string, inner_context,
1591                                   &pos, NULL, token.comment);
1592             else
1593               arglist_parser_remember (argparser, arg, token.string,
1594                                        inner_context,
1595                                        pos.file_name, pos.line_number,
1596                                        token.comment);
1597             xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1598           }
1599           drop_reference (token.comment);
1600           next_context_iter = null_context_list_iterator;
1601           state = 0;
1602           continue;
1603
1604         case token_type_eof:
1605           xgettext_current_source_encoding = po_charset_utf8;
1606           arglist_parser_done (argparser, arg);
1607           xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1608           return true;
1609
1610         case token_type_keyword:
1611         case token_type_plus:
1612         case token_type_regexp:
1613         case token_type_operator:
1614         case token_type_equal:
1615         case token_type_other:
1616           next_context_iter = null_context_list_iterator;
1617           state = 0;
1618           continue;
1619
1620         default:
1621           abort ();
1622         }
1623     }
1624 }
1625
1626
1627 void
1628 extract_javascript (FILE *f,
1629                 const char *real_filename, const char *logical_filename,
1630                 flag_context_list_table_ty *flag_table,
1631                 msgdomain_list_ty *mdlp)
1632 {
1633   message_list_ty *mlp = mdlp->item[0]->messages;
1634
1635   fp = f;
1636   real_file_name = real_filename;
1637   logical_file_name = xstrdup (logical_filename);
1638   line_number = 1;
1639
1640   lexical_context = lc_outside;
1641
1642   last_comment_line = -1;
1643   last_non_comment_line = -1;
1644
1645   xml_element_depth = 0;
1646
1647   xgettext_current_file_source_encoding = xgettext_global_source_encoding;
1648 #if HAVE_ICONV
1649   xgettext_current_file_source_iconv = xgettext_global_source_iconv;
1650 #endif
1651
1652   xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1653 #if HAVE_ICONV
1654   xgettext_current_source_iconv = xgettext_current_file_source_iconv;
1655 #endif
1656
1657   continuation_or_nonblank_line = false;
1658
1659   flag_context_list_table = flag_table;
1660
1661   init_keywords ();
1662
1663   /* Eat tokens until eof is seen.  When extract_balanced returns
1664      due to an unbalanced closing parenthesis, just restart it.  */
1665   while (!extract_balanced (mlp, token_type_eof,
1666                             null_context, null_context_list_iterator,
1667                             arglist_parser_alloc (mlp, NULL)))
1668     ;
1669
1670   fp = NULL;
1671   real_file_name = NULL;
1672   logical_file_name = NULL;
1673   line_number = 0;
1674 }