gettext-tools/src/x-javascript.c

   1 /* xgettext JavaScript backend.
   2    Copyright (C) 2002-2003, 2005-2009, 2013 Free Software Foundation, Inc.
   3
   4    This file was written by Andreas Stricker <andy@knitter.ch>, 2010
   5    It's based on x-python from Bruno Haible.
   6
   7    This program is free software: you can redistribute it and/or modify
   8    it under the terms of the GNU General Public License as published by
   9    the Free Software Foundation; either version 3 of the License, or
  10    (at your option) any later version.
  11
  12    This program is distributed in the hope that it will be useful,
  13    but WITHOUT ANY WARRANTY; without even the implied warranty of
  14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15    GNU General Public License for more details.
  16
  17    You should have received a copy of the GNU General Public License
  18    along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  19
  20 #ifdef HAVE_CONFIG_H
  21 # include "config.h"
  22 #endif
  23
  24 /* Specification.  */
  25 #include "x-javascript.h"
  26
  27 #include <assert.h>
  28 #include <errno.h>
  29 #include <stdbool.h>
  30 #include <stdio.h>
  31 #include <stdlib.h>
  32 #include <string.h>
  33
  34 #include "message.h"
  35 #include "xgettext.h"
  36 #include "error.h"
  37 #include "error-progname.h"
  38 #include "progname.h"
  39 #include "basename.h"
  40 #include "xerror.h"
  41 #include "xvasprintf.h"
  42 #include "xalloc.h"
  43 #include "c-strstr.h"
  44 #include "c-ctype.h"
  45 #include "po-charset.h"
  46 #include "uniname.h"
  47 #include "unistr.h"
  48 #include "gettext.h"
  49
  50 #define _(s) gettext(s)
  51
  52 #define max(a,b) ((a) > (b) ? (a) : (b))
  53
  54 #define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
  55
  56 /* The JavaScript aka ECMA-Script syntax is defined in ECMA-262
  57    specification:
  58    http://www.ecma-international.org/publications/standards/Ecma-262.htm */
  59
  60 /* ====================== Keyword set customization.  ====================== */
  61
  62 /* If true extract all strings.  */
  63 static bool extract_all = false;
  64
  65 static hash_table keywords;
  66 static bool default_keywords = true;
  67
  68
  69 void
  70 x_javascript_extract_all ()
  71 {
  72   extract_all = true;
  73 }
  74
  75
  76 void
  77 x_javascript_keyword (const char *name)
  78 {
  79   if (name == NULL)
  80     default_keywords = false;
  81   else
  82     {
  83       const char *end;
  84       struct callshape shape;
  85       const char *colon;
  86
  87       if (keywords.table == NULL)
  88         hash_init (&keywords, 100);
  89
  90       split_keywordspec (name, &end, &shape);
  91
  92       /* The characters between name and end should form a valid C identifier.
  93          A colon means an invalid parse in split_keywordspec().  */
  94       colon = strchr (name, ':');
  95       if (colon == NULL || colon >= end)
  96         insert_keyword_callshape (&keywords, name, end - name, &shape);
  97     }
  98 }
  99
 100 /* Finish initializing the keywords hash table.
 101    Called after argument processing, before each file is processed.  */
 102 static void
 103 init_keywords ()
 104 {
 105   if (default_keywords)
 106     {
 107       /* When adding new keywords here, also update the documentation in
 108          xgettext.texi!  */
 109       x_javascript_keyword ("gettext");
 110       x_javascript_keyword ("dgettext:2");
 111       x_javascript_keyword ("dcgettext:2");
 112       x_javascript_keyword ("ngettext:1,2");
 113       x_javascript_keyword ("dngettext:2,3");
 114       x_javascript_keyword ("pgettext:1c,2");
 115       x_javascript_keyword ("dpgettext:2c,3");
 116       x_javascript_keyword ("_");
 117       default_keywords = false;
 118     }
 119 }
 120
 121 void
 122 init_flag_table_javascript ()
 123 {
 124   xgettext_record_flag ("gettext:1:pass-javascript-format");
 125   xgettext_record_flag ("dgettext:2:pass-javascript-format");
 126   xgettext_record_flag ("dcgettext:2:pass-javascript-format");
 127   xgettext_record_flag ("ngettext:1:pass-javascript-format");
 128   xgettext_record_flag ("ngettext:2:pass-javascript-format");
 129   xgettext_record_flag ("dngettext:2:pass-javascript-format");
 130   xgettext_record_flag ("dngettext:3:pass-javascript-format");
 131   xgettext_record_flag ("pgettext:2:pass-javascript-format");
 132   xgettext_record_flag ("dpgettext:3:pass-javascript-format");
 133   xgettext_record_flag ("_:1:pass-javascript-format");
 134 }
 135
 136
 137 /* ======================== Reading of characters.  ======================== */
 138
 139 /* Real filename, used in error messages about the input file.  */
 140 static const char *real_file_name;
 141
 142 /* Logical filename and line number, used to label the extracted messages.  */
 143 static char *logical_file_name;
 144 static int line_number;
 145
 146 /* The input file stream.  */
 147 static FILE *fp;
 148
 149
 150 /* 1. line_number handling.  */
 151
 152 /* Maximum used, roughly a safer MB_LEN_MAX.  */
 153 #define MAX_PHASE1_PUSHBACK 16
 154 static unsigned char phase1_pushback[MAX_PHASE1_PUSHBACK];
 155 static int phase1_pushback_length;
 156
 157 /* Read the next single byte from the input file.  */
 158 static int
 159 phase1_getc ()
 160 {
 161   int c;
 162
 163   if (phase1_pushback_length)
 164     c = phase1_pushback[--phase1_pushback_length];
 165   else
 166     {
 167       c = getc (fp);
 168
 169       if (c == EOF)
 170         {
 171           if (ferror (fp))
 172             error (EXIT_FAILURE, errno, _("error while reading \"%s\""),
 173                    real_file_name);
 174           return EOF;
 175         }
 176     }
 177
 178   if (c == '\n')
 179     ++line_number;
 180
 181   return c;
 182 }
 183
 184 /* Supports MAX_PHASE1_PUSHBACK characters of pushback.  */
 185 static void
 186 phase1_ungetc (int c)
 187 {
 188   if (c != EOF)
 189     {
 190       if (c == '\n')
 191         --line_number;
 192
 193       if (phase1_pushback_length == SIZEOF (phase1_pushback))
 194         abort ();
 195       phase1_pushback[phase1_pushback_length++] = c;
 196     }
 197 }
 198
 199
 200 /* Phase 2: Conversion to Unicode.
 201    For now, we expect JavaScript files to be encoded as UTF-8.  */
 202
 203 /* End-of-file indicator for functions returning an UCS-4 character.  */
 204 #define UEOF -1
 205
 206 static lexical_context_ty lexical_context;
 207
 208 static int phase2_pushback[max (9, UNINAME_MAX + 3)];
 209 static int phase2_pushback_length;
 210
 211 /* Read the next Unicode UCS-4 character from the input file.  */
 212 static int
 213 phase2_getc ()
 214 {
 215   if (phase2_pushback_length)
 216     return phase2_pushback[--phase2_pushback_length];
 217
 218   if (xgettext_current_source_encoding == po_charset_ascii)
 219     {
 220       int c = phase1_getc ();
 221       if (c == EOF)
 222         return UEOF;
 223       if (!c_isascii (c))
 224         {
 225           multiline_error (xstrdup (""),
 226                            xasprintf ("%s\n%s\n",
 227                                       non_ascii_error_message (lexical_context,
 228                                                                real_file_name,
 229                                                                line_number),
 230                                       _("\
 231 Please specify the source encoding through --from-code\n")));
 232           exit (EXIT_FAILURE);
 233         }
 234       return c;
 235     }
 236   else if (xgettext_current_source_encoding != po_charset_utf8)
 237     {
 238 #if HAVE_ICONV
 239       /* Use iconv on an increasing number of bytes.  Read only as many bytes
 240          through phase1_getc as needed.  This is needed to give reasonable
 241          interactive behaviour when fp is connected to an interactive tty.  */
 242       unsigned char buf[MAX_PHASE1_PUSHBACK];
 243       size_t bufcount;
 244       int c = phase1_getc ();
 245       if (c == EOF)
 246         return UEOF;
 247       buf[0] = (unsigned char) c;
 248       bufcount = 1;
 249
 250       for (;;)
 251         {
 252           unsigned char scratchbuf[6];
 253           const char *inptr = (const char *) &buf[0];
 254           size_t insize = bufcount;
 255           char *outptr = (char *) &scratchbuf[0];
 256           size_t outsize = sizeof (scratchbuf);
 257
 258           size_t res = iconv (xgettext_current_source_iconv,
 259                               (ICONV_CONST char **) &inptr, &insize,
 260                               &outptr, &outsize);
 261           /* We expect that a character has been produced if and only if
 262              some input bytes have been consumed.  */
 263           if ((insize < bufcount) != (outsize < sizeof (scratchbuf)))
 264             abort ();
 265           if (outsize == sizeof (scratchbuf))
 266             {
 267               /* No character has been produced.  Must be an error.  */
 268               if (res != (size_t)(-1))
 269                 abort ();
 270
 271               if (errno == EILSEQ)
 272                 {
 273                   /* An invalid multibyte sequence was encountered.  */
 274                   multiline_error (xstrdup (""),
 275                                    xasprintf (_("\
 276 %s:%d: Invalid multibyte sequence.\n\
 277 Please specify the correct source encoding through --from-code\n"),
 278                                    real_file_name, line_number));
 279                   exit (EXIT_FAILURE);
 280                 }
 281               else if (errno == EINVAL)
 282                 {
 283                   /* An incomplete multibyte character.  */
 284                   int c;
 285
 286                   if (bufcount == MAX_PHASE1_PUSHBACK)
 287                     {
 288                       /* An overlong incomplete multibyte sequence was
 289                          encountered.  */
 290                       multiline_error (xstrdup (""),
 291                                        xasprintf (_("\
 292 %s:%d: Long incomplete multibyte sequence.\n\
 293 Please specify the correct source encoding through --from-code\n"),
 294                                        real_file_name, line_number));
 295                       exit (EXIT_FAILURE);
 296                     }
 297
 298                   /* Read one more byte and retry iconv.  */
 299                   c = phase1_getc ();
 300                   if (c == EOF)
 301                     {
 302                       multiline_error (xstrdup (""),
 303                                        xasprintf (_("\
 304 %s:%d: Incomplete multibyte sequence at end of file.\n\
 305 Please specify the correct source encoding through --from-code\n"),
 306                                        real_file_name, line_number));
 307                       exit (EXIT_FAILURE);
 308                     }
 309                   if (c == '\n')
 310                     {
 311                       multiline_error (xstrdup (""),
 312                                        xasprintf (_("\
 313 %s:%d: Incomplete multibyte sequence at end of line.\n\
 314 Please specify the correct source encoding through --from-code\n"),
 315                                        real_file_name, line_number - 1));
 316                       exit (EXIT_FAILURE);
 317                     }
 318                   buf[bufcount++] = (unsigned char) c;
 319                 }
 320               else
 321                 error (EXIT_FAILURE, errno, _("%s:%d: iconv failure"),
 322                        real_file_name, line_number);
 323             }
 324           else
 325             {
 326               size_t outbytes = sizeof (scratchbuf) - outsize;
 327               size_t bytes = bufcount - insize;
 328               ucs4_t uc;
 329
 330               /* We expect that one character has been produced.  */
 331               if (bytes == 0)
 332                 abort ();
 333               if (outbytes == 0)
 334                 abort ();
 335               /* Push back the unused bytes.  */
 336               while (insize > 0)
 337                 phase1_ungetc (buf[--insize]);
 338               /* Convert the character from UTF-8 to UCS-4.  */
 339               if (u8_mbtoucr (&uc, scratchbuf, outbytes) < (int) outbytes)
 340                 {
 341                   /* scratchbuf contains an out-of-range Unicode character
 342                      (> 0x10ffff).  */
 343                   multiline_error (xstrdup (""),
 344                                    xasprintf (_("\
 345 %s:%d: Invalid multibyte sequence.\n\
 346 Please specify the source encoding through --from-code\n"),
 347                                    real_file_name, line_number));
 348                   exit (EXIT_FAILURE);
 349                 }
 350               return uc;
 351             }
 352         }
 353 #else
 354       /* If we don't have iconv(), the only supported values for
 355          xgettext_global_source_encoding and thus also for
 356          xgettext_current_source_encoding are ASCII and UTF-8.  */
 357       abort ();
 358 #endif
 359     }
 360   else
 361     {
 362       /* Read an UTF-8 encoded character.  */
 363       unsigned char buf[6];
 364       unsigned int count;
 365       int c;
 366       ucs4_t uc;
 367
 368       c = phase1_getc ();
 369       if (c == EOF)
 370         return UEOF;
 371       buf[0] = c;
 372       count = 1;
 373
 374       if (buf[0] >= 0xc0)
 375         {
 376           c = phase1_getc ();
 377           if (c == EOF)
 378             return UEOF;
 379           buf[1] = c;
 380           count = 2;
 381         }
 382
 383       if (buf[0] >= 0xe0
 384           && ((buf[1] ^ 0x80) < 0x40))
 385         {
 386           c = phase1_getc ();
 387           if (c == EOF)
 388             return UEOF;
 389           buf[2] = c;
 390           count = 3;
 391         }
 392
 393       if (buf[0] >= 0xf0
 394           && ((buf[1] ^ 0x80) < 0x40)
 395           && ((buf[2] ^ 0x80) < 0x40))
 396         {
 397           c = phase1_getc ();
 398           if (c == EOF)
 399             return UEOF;
 400           buf[3] = c;
 401           count = 4;
 402         }
 403
 404       if (buf[0] >= 0xf8
 405           && ((buf[1] ^ 0x80) < 0x40)
 406           && ((buf[2] ^ 0x80) < 0x40)
 407           && ((buf[3] ^ 0x80) < 0x40))
 408         {
 409           c = phase1_getc ();
 410           if (c == EOF)
 411             return UEOF;
 412           buf[4] = c;
 413           count = 5;
 414         }
 415
 416       if (buf[0] >= 0xfc
 417           && ((buf[1] ^ 0x80) < 0x40)
 418           && ((buf[2] ^ 0x80) < 0x40)
 419           && ((buf[3] ^ 0x80) < 0x40)
 420           && ((buf[4] ^ 0x80) < 0x40))
 421         {
 422           c = phase1_getc ();
 423           if (c == EOF)
 424             return UEOF;
 425           buf[5] = c;
 426           count = 6;
 427         }
 428
 429       u8_mbtouc (&uc, buf, count);
 430       return uc;
 431     }
 432 }
 433
 434 /* Supports max (9, UNINAME_MAX + 3) pushback characters.  */
 435 static void
 436 phase2_ungetc (int c)
 437 {
 438   if (c != UEOF)
 439     {
 440       if (phase2_pushback_length == SIZEOF (phase2_pushback))
 441         abort ();
 442       phase2_pushback[phase2_pushback_length++] = c;
 443     }
 444 }
 445
 446
 447 /* ========================= Accumulating strings.  ======================== */
 448
 449 /* A string buffer type that allows appending Unicode characters.
 450    Returns the entire string in UTF-8 encoding.  */
 451
 452 struct unicode_string_buffer
 453 {
 454   /* The part of the string that has already been converted to UTF-8.  */
 455   char *utf8_buffer;
 456   size_t utf8_buflen;
 457   size_t utf8_allocated;
 458 };
 459
 460 /* Initialize a 'struct unicode_string_buffer' to empty.  */
 461 static inline void
 462 init_unicode_string_buffer (struct unicode_string_buffer *bp)
 463 {
 464   bp->utf8_buffer = NULL;
 465   bp->utf8_buflen = 0;
 466   bp->utf8_allocated = 0;
 467 }
 468
 469 /* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
 470 static inline void
 471 unicode_string_buffer_append_unicode_grow (struct unicode_string_buffer *bp,
 472                                            size_t count)
 473 {
 474   if (bp->utf8_buflen + count > bp->utf8_allocated)
 475     {
 476       size_t new_allocated = 2 * bp->utf8_allocated + 10;
 477       if (new_allocated < bp->utf8_buflen + count)
 478         new_allocated = bp->utf8_buflen + count;
 479       bp->utf8_allocated = new_allocated;
 480       bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
 481     }
 482 }
 483
 484 /* Auxiliary function: Append a Unicode character to bp->utf8.
 485    uc must be < 0x110000.  */
 486 static inline void
 487 unicode_string_buffer_append_unicode (struct unicode_string_buffer *bp,
 488                                       unsigned int uc)
 489 {
 490   unsigned char utf8buf[6];
 491   int count = u8_uctomb (utf8buf, uc, 6);
 492
 493   if (count < 0)
 494     /* The caller should have ensured that uc is not out-of-range.  */
 495     abort ();
 496
 497   unicode_string_buffer_append_unicode_grow (bp, count);
 498   memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
 499   bp->utf8_buflen += count;
 500 }
 501
 502 /* Return the string buffer's contents.  */
 503 static char *
 504 unicode_string_buffer_result (struct unicode_string_buffer *bp)
 505 {
 506   /* NUL-terminate it.  */
 507   unicode_string_buffer_append_unicode_grow (bp, 1);
 508   bp->utf8_buffer[bp->utf8_buflen] = '\0';
 509   /* Return it.  */
 510   return bp->utf8_buffer;
 511 }
 512
 513 /* Free the memory pointed to by a 'struct unicode_string_buffer'.  */
 514 static inline void
 515 free_unicode_string_buffer (struct unicode_string_buffer *bp)
 516 {
 517   free (bp->utf8_buffer);
 518 }
 519
 520
 521 /* ======================== Accumulating comments.  ======================== */
 522
 523
 524 /* Accumulating a single comment line.  */
 525
 526 static struct unicode_string_buffer comment_buffer;
 527
 528 static inline void
 529 comment_start ()
 530 {
 531   lexical_context = lc_comment;
 532   comment_buffer.utf8_buflen = 0;
 533 }
 534
 535 static inline bool
 536 comment_at_start ()
 537 {
 538   return (comment_buffer.utf8_buflen == 0);
 539 }
 540
 541 static inline void
 542 comment_add (int c)
 543 {
 544   unicode_string_buffer_append_unicode (&comment_buffer, c);
 545 }
 546
 547 static inline const char *
 548 comment_line_end ()
 549 {
 550   char *buffer = unicode_string_buffer_result (&comment_buffer);
 551   size_t buflen = strlen (buffer);
 552
 553   while (buflen >= 1
 554          && (buffer[buflen - 1] == ' ' || buffer[buflen - 1] == '\t'))
 555     --buflen;
 556   buffer[buflen] = '\0';
 557   savable_comment_add (buffer);
 558   lexical_context = lc_outside;
 559   return buffer;
 560 }
 561
 562
 563 /* These are for tracking whether comments count as immediately before
 564    keyword.  */
 565 static int last_comment_line;
 566 static int last_non_comment_line;
 567
 568
 569 /* ======================== Recognizing comments.  ======================== */
 570
 571
 572 /* Canonicalized encoding name for the current input file.  */
 573 static const char *xgettext_current_file_source_encoding;
 574
 575 #if HAVE_ICONV
 576 /* Converter from xgettext_current_file_source_encoding to UTF-8 (except from
 577    ASCII or UTF-8, when this conversion is a no-op).  */
 578 static iconv_t xgettext_current_file_source_iconv;
 579 #endif
 580
 581 /* Tracking whether the current line is a continuation line or contains a
 582    non-blank character.  */
 583 static bool continuation_or_nonblank_line = false;
 584
 585
 586 /* Phase 3: Outside strings, replace backslash-newline with nothing and a
 587    comment with nothing.  */
 588
 589 static int
 590 phase3_getc ()
 591 {
 592   int c;
 593
 594   for (;;)
 595     {
 596       c = phase2_getc ();
 597       if (c == '\\')
 598         {
 599           c = phase2_getc ();
 600           if (c != '\n')
 601             {
 602               phase2_ungetc (c);
 603               /* This shouldn't happen usually, because "A backslash is
 604                  illegal elsewhere on a line outside a string literal."  */
 605               return '\\';
 606             }
 607           /* Eat backslash-newline.  */
 608           continuation_or_nonblank_line = true;
 609         }
 610       else if (c == '/')
 611         {
 612           c = phase2_getc ();
 613           if (c == '/')
 614             {
 615               /* C++ style comment.  */
 616               last_comment_line = line_number;
 617               comment_start ();
 618               for (;;)
 619                 {
 620                   c = phase2_getc ();
 621                   if (c == UEOF || c == '\n')
 622                     break;
 623                   /* We skip all leading white space, but not EOLs.  */
 624                   if (!(comment_at_start () && (c == ' ' || c == '\t')))
 625                     comment_add (c);
 626                 }
 627               continuation_or_nonblank_line = false;
 628               return c;
 629             }
 630           else if (c == '*')
 631             {
 632               /* C style comment.  */
 633               bool last_was_star = false;
 634               last_comment_line = line_number;
 635               comment_start ();
 636               for (;;)
 637                 {
 638                   c = phase2_getc ();
 639                   if (c == UEOF)
 640                     break;
 641                   /* We skip all leading white space, but not EOLs.  */
 642                   if (!(comment_at_start () && (c == ' ' || c == '\t')))
 643                     comment_add (c);
 644                   switch (c)
 645                     {
 646                     case '\n':
 647                       comment_line_end (1);
 648                       comment_start ();
 649                       last_was_star = false;
 650                       continue;
 651
 652                     case '*':
 653                       last_was_star = true;
 654                       continue;
 655                     case '/':
 656                       if (last_was_star)
 657                         {
 658                           comment_line_end (2);
 659                           break;
 660                         }
 661                       /* FALLTHROUGH */
 662
 663                     default:
 664                       last_was_star = false;
 665                       continue;
 666                     }
 667                   break;
 668                 }
 669               continuation_or_nonblank_line = false;
 670             }
 671           else
 672             {
 673               phase2_ungetc (c);
 674               return '/';
 675             }
 676         }
 677       else
 678         {
 679           if (c == '\n')
 680             continuation_or_nonblank_line = false;
 681           else if (!(c == ' ' || c == '\t' || c == '\f'))
 682             continuation_or_nonblank_line = true;
 683           return c;
 684         }
 685     }
 686 }
 687
 688 /* Supports only one pushback character.  */
 689 static void
 690 phase3_ungetc (int c)
 691 {
 692   phase2_ungetc (c);
 693 }
 694
 695
 696 /* ========================= Accumulating strings.  ======================== */
 697
 698 /* Return value of phase7_getuc when EOF is reached.  */
 699 #define P7_EOF (-1)
 700 #define P7_STRING_END (-2)
 701
 702 /* Convert an UTF-16 or UTF-32 code point to a return value that can be
 703    distinguished from a single-byte return value.  */
 704 #define UNICODE(code) (0x100 + (code))
 705
 706 /* Test a return value of phase7_getuc whether it designates an UTF-16 or
 707    UTF-32 code point.  */
 708 #define IS_UNICODE(p7_result) ((p7_result) >= 0x100)
 709
 710 /* Extract the UTF-16 or UTF-32 code of a return value that satisfies
 711    IS_UNICODE.  */
 712 #define UNICODE_VALUE(p7_result) ((p7_result) - 0x100)
 713
 714 /* A string buffer type that allows appending bytes (in the
 715    xgettext_current_source_encoding) or Unicode characters.
 716    Returns the entire string in UTF-8 encoding.  */
 717
 718 struct mixed_string_buffer
 719 {
 720   /* The part of the string that has already been converted to UTF-8.  */
 721   char *utf8_buffer;
 722   size_t utf8_buflen;
 723   size_t utf8_allocated;
 724   /* The first half of an UTF-16 surrogate character.  */
 725   unsigned short utf16_surr;
 726   /* The part of the string that is still in the source encoding.  */
 727   char *curr_buffer;
 728   size_t curr_buflen;
 729   size_t curr_allocated;
 730   /* The lexical context.  Used only for error message purposes.  */
 731   lexical_context_ty lcontext;
 732 };
 733
 734 /* Initialize a 'struct mixed_string_buffer' to empty.  */
 735 static inline void
 736 init_mixed_string_buffer (struct mixed_string_buffer *bp, lexical_context_ty lcontext)
 737 {
 738   bp->utf8_buffer = NULL;
 739   bp->utf8_buflen = 0;
 740   bp->utf8_allocated = 0;
 741   bp->utf16_surr = 0;
 742   bp->curr_buffer = NULL;
 743   bp->curr_buflen = 0;
 744   bp->curr_allocated = 0;
 745   bp->lcontext = lcontext;
 746 }
 747
 748 /* Auxiliary function: Append a byte to bp->curr.  */
 749 static inline void
 750 mixed_string_buffer_append_byte (struct mixed_string_buffer *bp, unsigned char c)
 751 {
 752   if (bp->curr_buflen == bp->curr_allocated)
 753     {
 754       bp->curr_allocated = 2 * bp->curr_allocated + 10;
 755       bp->curr_buffer = xrealloc (bp->curr_buffer, bp->curr_allocated);
 756     }
 757   bp->curr_buffer[bp->curr_buflen++] = c;
 758 }
 759
 760 /* Auxiliary function: Ensure count more bytes are available in bp->utf8.  */
 761 static inline void
 762 mixed_string_buffer_append_unicode_grow (struct mixed_string_buffer *bp, size_t count)
 763 {
 764   if (bp->utf8_buflen + count > bp->utf8_allocated)
 765     {
 766       size_t new_allocated = 2 * bp->utf8_allocated + 10;
 767       if (new_allocated < bp->utf8_buflen + count)
 768         new_allocated = bp->utf8_buflen + count;
 769       bp->utf8_allocated = new_allocated;
 770       bp->utf8_buffer = xrealloc (bp->utf8_buffer, new_allocated);
 771     }
 772 }
 773
 774 /* Auxiliary function: Append a Unicode character to bp->utf8.
 775    uc must be < 0x110000.  */
 776 static inline void
 777 mixed_string_buffer_append_unicode (struct mixed_string_buffer *bp, ucs4_t uc)
 778 {
 779   unsigned char utf8buf[6];
 780   int count = u8_uctomb (utf8buf, uc, 6);
 781
 782   if (count < 0)
 783     /* The caller should have ensured that uc is not out-of-range.  */
 784     abort ();
 785
 786   mixed_string_buffer_append_unicode_grow (bp, count);
 787   memcpy (bp->utf8_buffer + bp->utf8_buflen, utf8buf, count);
 788   bp->utf8_buflen += count;
 789 }
 790
 791 /* Auxiliary function: Flush bp->utf16_surr into bp->utf8_buffer.  */
 792 static inline void
 793 mixed_string_buffer_flush_utf16_surr (struct mixed_string_buffer *bp)
 794 {
 795   if (bp->utf16_surr != 0)
 796     {
 797       /* A half surrogate is invalid, therefore use U+FFFD instead.  */
 798       mixed_string_buffer_append_unicode (bp, 0xfffd);
 799       bp->utf16_surr = 0;
 800     }
 801 }
 802
 803 /* Auxiliary function: Flush bp->curr_buffer into bp->utf8_buffer.  */
 804 static inline void
 805 mixed_string_buffer_flush_curr_buffer (struct mixed_string_buffer *bp, int lineno)
 806 {
 807   if (bp->curr_buflen > 0)
 808     {
 809       char *curr;
 810       size_t count;
 811
 812       mixed_string_buffer_append_byte (bp, '\0');
 813
 814       /* Convert from the source encoding to UTF-8.  */
 815       curr = from_current_source_encoding (bp->curr_buffer, bp->lcontext,
 816                                            logical_file_name, lineno);
 817
 818       /* Append it to bp->utf8_buffer.  */
 819       count = strlen (curr);
 820       mixed_string_buffer_append_unicode_grow (bp, count);
 821       memcpy (bp->utf8_buffer + bp->utf8_buflen, curr, count);
 822       bp->utf8_buflen += count;
 823
 824       if (curr != bp->curr_buffer)
 825         free (curr);
 826       bp->curr_buflen = 0;
 827     }
 828 }
 829
 830 /* Append a character or Unicode character to a 'struct mixed_string_buffer'.  */
 831 static void
 832 mixed_string_buffer_append (struct mixed_string_buffer *bp, int c)
 833 {
 834   if (IS_UNICODE (c))
 835     {
 836       /* Append a Unicode character.  */
 837
 838       /* Switch from multibyte character mode to Unicode character mode.  */
 839       mixed_string_buffer_flush_curr_buffer (bp, line_number);
 840
 841       /* Test whether this character and the previous one form a Unicode
 842          surrogate character pair.  */
 843       if (bp->utf16_surr != 0
 844           && (c >= UNICODE (0xdc00) && c < UNICODE (0xe000)))
 845         {
 846           unsigned short utf16buf[2];
 847           ucs4_t uc;
 848
 849           utf16buf[0] = bp->utf16_surr;
 850           utf16buf[1] = UNICODE_VALUE (c);
 851           if (u16_mbtouc (&uc, utf16buf, 2) != 2)
 852             abort ();
 853
 854           mixed_string_buffer_append_unicode (bp, uc);
 855           bp->utf16_surr = 0;
 856         }
 857       else
 858         {
 859           mixed_string_buffer_flush_utf16_surr (bp);
 860
 861           if (c >= UNICODE (0xd800) && c < UNICODE (0xdc00))
 862             bp->utf16_surr = UNICODE_VALUE (c);
 863           else if (c >= UNICODE (0xdc00) && c < UNICODE (0xe000))
 864             {
 865               /* A half surrogate is invalid, therefore use U+FFFD instead.  */
 866               mixed_string_buffer_append_unicode (bp, 0xfffd);
 867             }
 868           else
 869             mixed_string_buffer_append_unicode (bp, UNICODE_VALUE (c));
 870         }
 871     }
 872   else
 873     {
 874       /* Append a single byte.  */
 875
 876       /* Switch from Unicode character mode to multibyte character mode.  */
 877       mixed_string_buffer_flush_utf16_surr (bp);
 878
 879       /* When a newline is seen, convert the accumulated multibyte sequence.
 880          This ensures a correct line number in the error message in case of
 881          a conversion error.  The "- 1" is to account for the newline.  */
 882       if (c == '\n')
 883         mixed_string_buffer_flush_curr_buffer (bp, line_number - 1);
 884
 885       mixed_string_buffer_append_byte (bp, (unsigned char) c);
 886     }
 887 }
 888
 889 /* Return the string buffer's contents.  */
 890 static char *
 891 mixed_string_buffer_result (struct mixed_string_buffer *bp)
 892 {
 893   /* Flush all into bp->utf8_buffer.  */
 894   mixed_string_buffer_flush_utf16_surr (bp);
 895   mixed_string_buffer_flush_curr_buffer (bp, line_number);
 896   /* NUL-terminate it.  */
 897   mixed_string_buffer_append_unicode_grow (bp, 1);
 898   bp->utf8_buffer[bp->utf8_buflen] = '\0';
 899   /* Return it.  */
 900   return bp->utf8_buffer;
 901 }
 902
 903 /* Free the memory pointed to by a 'struct mixed_string_buffer'.  */
 904 static inline void
 905 free_mixed_string_buffer (struct mixed_string_buffer *bp)
 906 {
 907   free (bp->utf8_buffer);
 908   free (bp->curr_buffer);
 909 }
 910
 911
 912 /* ========================== Reading of tokens.  ========================== */
 913
 914
 915 enum token_type_ty
 916 {
 917   token_type_eof,
 918   token_type_lparen,            /* ( */
 919   token_type_rparen,            /* ) */
 920   token_type_comma,             /* , */
 921   token_type_lbracket,          /* [ */
 922   token_type_rbracket,          /* ] */
 923   token_type_plus,              /* + */
 924   token_type_regexp,            /* /.../ */
 925   token_type_operator,          /* - * / % . < > = ~ ! | & ? : ^ */
 926   token_type_string,            /* "abc", 'abc' */
 927   token_type_keyword,           /* return, else */
 928   token_type_symbol,            /* symbol, number */
 929   token_type_other              /* misc. operator */
 930 };
 931 typedef enum token_type_ty token_type_ty;
 932
 933 typedef struct token_ty token_ty;
 934 struct token_ty
 935 {
 936   token_type_ty type;
 937   char *string;         /* for token_type_string, token_type_symbol,
 938                            token_type_keyword */
 939   refcounted_string_list_ty *comment;   /* for token_type_string */
 940   int line_number;
 941 };
 942
 943
 944 /* Free the memory pointed to by a 'struct token_ty'.  */
 945 static inline void
 946 free_token (token_ty *tp)
 947 {
 948   if (tp->type == token_type_string || tp->type == token_type_symbol)
 949     free (tp->string);
 950   if (tp->type == token_type_string)
 951     drop_reference (tp->comment);
 952 }
 953
 954
 955 /* JavaScript provides strings with either double or single quotes:
 956      "abc" or 'abc'
 957    Both may contain special sequences after a backslash:
 958      \', \", \\, \b, \f, \n, \r, \t, \v
 959    Special characters can be entered using hexadecimal escape
 960    sequences or deprecated octal escape sequences:
 961      \xXX, \OOO
 962    Any unicode point can be entered using Unicode escape sequences:
 963      \uNNNN
 964    If a sequence after a backslash is not a legitimate character
 965    escape sequence, the character value is the sequence itself without
 966    a backslash.  For example, \xxx is treated as xxx.  */
 967
 968 static int
 969 phase7_getuc (int quote_char)
 970 {
 971   int c;
 972
 973   for (;;)
 974     {
 975       /* Use phase 2, because phase 3 elides comments.  */
 976       c = phase2_getc ();
 977
 978       if (c == UEOF)
 979         return P7_EOF;
 980
 981       if (c == quote_char)
 982         return P7_STRING_END;
 983
 984       if (c == '\n')
 985         {
 986           phase2_ungetc (c);
 987           error_with_progname = false;
 988           error (0, 0, _("%s:%d: warning: unterminated string"),
 989                  logical_file_name, line_number);
 990           error_with_progname = true;
 991           return P7_STRING_END;
 992         }
 993
 994       if (c != '\\')
 995         return UNICODE (c);
 996
 997       /* Dispatch according to the character following the backslash.  */
 998       c = phase2_getc ();
 999       if (c == UEOF)
1000         return P7_EOF;
1001
1002       switch (c)
1003         {
1004         case '\n':
1005           continue;
1006         case 'b':
1007           return UNICODE ('\b');
1008         case 'f':
1009           return UNICODE ('\f');
1010         case 'n':
1011           return UNICODE ('\n');
1012         case 'r':
1013           return UNICODE ('\r');
1014         case 't':
1015           return UNICODE ('\t');
1016         case 'v':
1017           return UNICODE ('\v');
1018         case '0': case '1': case '2': case '3': case '4':
1019         case '5': case '6': case '7':
1020           {
1021             int n = c - '0';
1022
1023             c = phase2_getc ();
1024             if (c != UEOF)
1025               {
1026                 if (c >= '0' && c <= '7')
1027                   {
1028                     n = (n << 3) + (c - '0');
1029                     c = phase2_getc ();
1030                     if (c != UEOF)
1031                       {
1032                         if (c >= '0' && c <= '7')
1033                           n = (n << 3) + (c - '0');
1034                         else
1035                           phase2_ungetc (c);
1036                       }
1037                   }
1038                 else
1039                   phase2_ungetc (c);
1040               }
1041             return UNICODE (n);
1042           }
1043         case 'x':
1044           {
1045             int c1 = phase2_getc ();
1046             int n1;
1047
1048             if (c1 >= '0' && c1 <= '9')
1049               n1 = c1 - '0';
1050             else if (c1 >= 'A' && c1 <= 'F')
1051               n1 = c1 - 'A' + 10;
1052             else if (c1 >= 'a' && c1 <= 'f')
1053               n1 = c1 - 'a' + 10;
1054             else
1055               n1 = -1;
1056
1057             if (n1 >= 0)
1058               {
1059                 int c2 = phase2_getc ();
1060                 int n2;
1061
1062                 if (c2 >= '0' && c2 <= '9')
1063                   n2 = c2 - '0';
1064                 else if (c2 >= 'A' && c2 <= 'F')
1065                   n2 = c2 - 'A' + 10;
1066                 else if (c2 >= 'a' && c2 <= 'f')
1067                   n2 = c2 - 'a' + 10;
1068                 else
1069                   n2 = -1;
1070
1071                 if (n2 >= 0)
1072                   {
1073                     int n = (n1 << 4) + n2;
1074                     return UNICODE (n);
1075                   }
1076
1077                 phase2_ungetc (c2);
1078               }
1079             phase2_ungetc (c1);
1080             return UNICODE (c);
1081           }
1082         case 'u':
1083           {
1084             unsigned char buf[4];
1085             unsigned int n = 0;
1086             int i;
1087
1088             for (i = 0; i < 4; i++)
1089               {
1090                 int c1 = phase2_getc ();
1091
1092                 if (c1 >= '0' && c1 <= '9')
1093                   n = (n << 4) + (c1 - '0');
1094                 else if (c1 >= 'A' && c1 <= 'F')
1095                   n = (n << 4) + (c1 - 'A' + 10);
1096                 else if (c1 >= 'a' && c1 <= 'f')
1097                   n = (n << 4) + (c1 - 'a' + 10);
1098                 else
1099                   {
1100                     phase2_ungetc (c1);
1101                     while (--i >= 0)
1102                       phase2_ungetc (buf[i]);
1103                     return UNICODE (c);
1104                   }
1105
1106                 buf[i] = c1;
1107               }
1108             return UNICODE (n);
1109           }
1110         default:
1111           return UNICODE (c);
1112         }
1113     }
1114 }
1115
1116
1117 /* Combine characters into tokens.  Discard whitespace except newlines at
1118    the end of logical lines.  */
1119
1120 static token_ty phase5_pushback[2];
1121 static int phase5_pushback_length;
1122
1123 static token_type_ty last_token_type = token_type_other;
1124
1125 static void
1126 phase5_scan_regexp ()
1127 {
1128     int c;
1129
1130     /* Scan for end of RegExp literal ('/').  */
1131     for (;;)
1132       {
1133         /* Must use phase2 as there can't be comments.  */
1134         c = phase2_getc ();
1135         if (c == '/')
1136           break;
1137         if (c == '\\')
1138           {
1139             c = phase2_getc ();
1140             if (c != UEOF)
1141               continue;
1142           }
1143         if (c == UEOF)
1144           {
1145             error_with_progname = false;
1146             error (0, 0,
1147                    _("%s:%d: warning: RegExp literal terminated too early"),
1148                    logical_file_name, line_number);
1149             error_with_progname = true;
1150             return;
1151           }
1152       }
1153
1154     /* Scan for modifier flags (ECMA-262 5th section 15.10.4.1).  */
1155     c = phase2_getc ();
1156     if (!(c == 'g' || c == 'i' || c == 'm'))
1157       phase2_ungetc (c);
1158 }
1159
1160 static void
1161 phase5_get (token_ty *tp)
1162 {
1163   int c;
1164
1165   if (phase5_pushback_length)
1166     {
1167       *tp = phase5_pushback[--phase5_pushback_length];
1168       last_token_type = tp->type;
1169       return;
1170     }
1171
1172   for (;;)
1173     {
1174       tp->line_number = line_number;
1175       c = phase3_getc ();
1176
1177       switch (c)
1178         {
1179         case UEOF:
1180           tp->type = last_token_type = token_type_eof;
1181           return;
1182
1183         case '\n':
1184           if (last_non_comment_line > last_comment_line)
1185             savable_comment_reset ();
1186           /* FALLTHROUGH */
1187         case ' ':
1188         case '\t':
1189         case '\f':
1190           /* Ignore whitespace and comments.  */
1191           continue;
1192         }
1193
1194       last_non_comment_line = tp->line_number;
1195
1196       switch (c)
1197         {
1198         case '.':
1199           {
1200             int c1 = phase3_getc ();
1201             phase3_ungetc (c1);
1202             if (!(c1 >= '0' && c1 <= '9'))
1203               {
1204
1205                 tp->type = last_token_type = token_type_other;
1206                 return;
1207               }
1208           }
1209           /* FALLTHROUGH */
1210         case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1211         case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1212         case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1213         case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1214         case 'Y': case 'Z':
1215         case '_':
1216         case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1217         case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1218         case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1219         case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1220         case 'y': case 'z':
1221         case '0': case '1': case '2': case '3': case '4':
1222         case '5': case '6': case '7': case '8': case '9':
1223           /* Symbol, or part of a number.  */
1224           {
1225             static char *buffer;
1226             static int bufmax;
1227             int bufpos;
1228
1229             bufpos = 0;
1230             for (;;)
1231               {
1232                 if (bufpos >= bufmax)
1233                   {
1234                     bufmax = 2 * bufmax + 10;
1235                     buffer = xrealloc (buffer, bufmax);
1236                   }
1237                 buffer[bufpos++] = c;
1238                 c = phase3_getc ();
1239                 switch (c)
1240                   {
1241                   case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
1242                   case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
1243                   case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
1244                   case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
1245                   case 'Y': case 'Z':
1246                   case '_':
1247                   case 'a': case 'b': case 'c': case 'd': case 'e': case 'f':
1248                   case 'g': case 'h': case 'i': case 'j': case 'k': case 'l':
1249                   case 'm': case 'n': case 'o': case 'p': case 'q': case 'r':
1250                   case 's': case 't': case 'u': case 'v': case 'w': case 'x':
1251                   case 'y': case 'z':
1252                   case '0': case '1': case '2': case '3': case '4':
1253                   case '5': case '6': case '7': case '8': case '9':
1254                     continue;
1255                   default:
1256                     phase3_ungetc (c);
1257                     break;
1258                   }
1259                 break;
1260               }
1261             if (bufpos >= bufmax)
1262               {
1263                 bufmax = 2 * bufmax + 10;
1264                 buffer = xrealloc (buffer, bufmax);
1265               }
1266             buffer[bufpos] = '\0';
1267             tp->string = xstrdup (buffer);
1268             if (strcmp (buffer, "return") == 0
1269                 || strcmp (buffer, "else") == 0)
1270               tp->type = last_token_type = token_type_keyword;
1271             else
1272               tp->type = last_token_type = token_type_symbol;
1273             return;
1274           }
1275
1276         /* Strings.  */
1277           {
1278             struct mixed_string_buffer literal;
1279             int quote_char;
1280
1281             case '"': case '\'':
1282               quote_char = c;
1283               lexical_context = lc_string;
1284               /* Start accumulating the string.  */
1285               init_mixed_string_buffer (&literal, lc_string);
1286               for (;;)
1287                 {
1288                   int uc = phase7_getuc (quote_char);
1289
1290                   if (uc == P7_EOF || uc == P7_STRING_END)
1291                     break;
1292
1293                   if (IS_UNICODE (uc))
1294                     assert (UNICODE_VALUE (uc) >= 0
1295                             && UNICODE_VALUE (uc) < 0x110000);
1296
1297                   mixed_string_buffer_append (&literal, uc);
1298                 }
1299               tp->string = xstrdup (mixed_string_buffer_result (&literal));
1300               free_mixed_string_buffer (&literal);
1301               tp->comment = add_reference (savable_comment);
1302               lexical_context = lc_outside;
1303               tp->type = last_token_type = token_type_string;
1304               return;
1305           }
1306
1307         case '+':
1308           tp->type = last_token_type = token_type_plus;
1309           return;
1310
1311         /* Identify operators. The multiple character ones are simply ignored
1312          * as they are recognized here and are otherwise not relevant. */
1313         case '-': case '*': /* '+' and '/' are not listed here! */
1314         case '%': case '<': case '>': case '=':
1315         case '~': case '!': case '|': case '&': case '^':
1316         case '?': case ':':
1317           tp->type = last_token_type = token_type_operator;
1318           return;
1319
1320         case '/':
1321           /* Either a division operator or the start of a regular
1322              expression literal.  If the '/' token is spotted after a
1323              symbol it's a division, otherwise it's a regular
1324              expression.  */
1325           if (last_token_type == token_type_symbol
1326               || last_token_type == token_type_rparen
1327               || last_token_type == token_type_rbracket)
1328             tp->type = last_token_type = token_type_operator;
1329           else
1330             {
1331               phase5_scan_regexp (tp);
1332               tp->type = last_token_type = token_type_regexp;
1333             }
1334           return;
1335
1336         case '(':
1337           tp->type = last_token_type = token_type_lparen;
1338           return;
1339
1340         case ')':
1341           tp->type = last_token_type = token_type_rparen;
1342           return;
1343
1344         case ',':
1345           tp->type = last_token_type = token_type_comma;
1346           return;
1347
1348         case '[':
1349           tp->type = last_token_type = token_type_lbracket;
1350           return;
1351
1352         case ']':
1353           tp->type = last_token_type = token_type_rbracket;
1354           return;
1355
1356         default:
1357           /* We could carefully recognize each of the 2 and 3 character
1358              operators, but it is not necessary, as we only need to recognize
1359              gettext invocations.  Don't bother.  */
1360           tp->type = last_token_type = token_type_other;
1361           return;
1362         }
1363     }
1364 }
1365
1366 /* Supports only one pushback token.  */
1367 static void
1368 phase5_unget (token_ty *tp)
1369 {
1370   if (tp->type != token_type_eof)
1371     {
1372       if (phase5_pushback_length == SIZEOF (phase5_pushback))
1373         abort ();
1374       phase5_pushback[phase5_pushback_length++] = *tp;
1375     }
1376 }
1377
1378
1379 /* String concatenation with '+'.  */
1380
1381 static void
1382 x_javascript_lex (token_ty *tp)
1383 {
1384   phase5_get (tp);
1385   if (tp->type == token_type_string)
1386     {
1387       char *sum = tp->string;
1388       size_t sum_len = strlen (sum);
1389
1390       for (;;)
1391         {
1392           token_ty token2;
1393
1394           phase5_get (&token2);
1395           if (token2.type == token_type_plus)
1396             {
1397               token_ty token3;
1398
1399               phase5_get (&token3);
1400               if (token3.type == token_type_string)
1401                 {
1402                   char *addend = token3.string;
1403                   size_t addend_len = strlen (addend);
1404
1405                   sum = (char *) xrealloc (sum, sum_len + addend_len + 1);
1406                   memcpy (sum + sum_len, addend, addend_len + 1);
1407                   sum_len += addend_len;
1408
1409                   free_token (&token3);
1410                   free_token (&token2);
1411                   continue;
1412                 }
1413               phase5_unget (&token3);
1414             }
1415           phase5_unget (&token2);
1416           break;
1417         }
1418       tp->string = sum;
1419     }
1420 }
1421
1422
1423 /* ========================= Extracting strings.  ========================== */
1424
1425
1426 /* Context lookup table.  */
1427 static flag_context_list_table_ty *flag_context_list_table;
1428
1429
1430 /* The file is broken into tokens.  Scan the token stream, looking for
1431    a keyword, followed by a left paren, followed by a string.  When we
1432    see this sequence, we have something to remember.  We assume we are
1433    looking at a valid JavaScript program, and leave the complaints about
1434    the grammar to the compiler.
1435
1436      Normal handling: Look for
1437        keyword ( ... msgid ... )
1438      Plural handling: Look for
1439        keyword ( ... msgid ... msgid_plural ... )
1440
1441    We use recursion because the arguments before msgid or between msgid
1442    and msgid_plural can contain subexpressions of the same form.  */
1443
1444
1445 /* Extract messages until the next balanced closing parenthesis or bracket.
1446    Extracted messages are added to MLP.
1447    DELIM can be either token_type_rparen or token_type_rbracket, or
1448    token_type_eof to accept both.
1449    Return true upon eof, false upon closing parenthesis or bracket.  */
1450 static bool
1451 extract_balanced (message_list_ty *mlp,
1452                   token_type_ty delim,
1453                   flag_context_ty outer_context,
1454                   flag_context_list_iterator_ty context_iter,
1455                   struct arglist_parser *argparser)
1456 {
1457   /* Current argument number.  */
1458   int arg = 1;
1459   /* 0 when no keyword has been seen.  1 right after a keyword is seen.  */
1460   int state;
1461   /* Parameters of the keyword just seen.  Defined only in state 1.  */
1462   const struct callshapes *next_shapes = NULL;
1463   /* Context iterator that will be used if the next token is a '('.  */
1464   flag_context_list_iterator_ty next_context_iter =
1465     passthrough_context_list_iterator;
1466   /* Current context.  */
1467   flag_context_ty inner_context =
1468     inherited_context (outer_context,
1469                        flag_context_list_iterator_advance (&context_iter));
1470
1471   /* Start state is 0.  */
1472   state = 0;
1473
1474   for (;;)
1475     {
1476       token_ty token;
1477
1478       x_javascript_lex (&token);
1479       switch (token.type)
1480         {
1481         case token_type_symbol:
1482           {
1483             void *keyword_value;
1484
1485             if (hash_find_entry (&keywords, token.string, strlen (token.string),
1486                                  &keyword_value)
1487                 == 0)
1488               {
1489                 next_shapes = (const struct callshapes *) keyword_value;
1490                 state = 1;
1491               }
1492             else
1493               state = 0;
1494           }
1495           next_context_iter =
1496             flag_context_list_iterator (
1497               flag_context_list_table_lookup (
1498                 flag_context_list_table,
1499                 token.string, strlen (token.string)));
1500           free (token.string);
1501           continue;
1502
1503         case token_type_lparen:
1504           if (extract_balanced (mlp, token_type_rparen,
1505                                 inner_context, next_context_iter,
1506                                 arglist_parser_alloc (mlp,
1507                                                       state ? next_shapes : NULL)))
1508             {
1509               xgettext_current_source_encoding = po_charset_utf8;
1510               arglist_parser_done (argparser, arg);
1511               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1512               return true;
1513             }
1514           next_context_iter = null_context_list_iterator;
1515           state = 0;
1516           continue;
1517
1518         case token_type_rparen:
1519           if (delim == token_type_rparen || delim == token_type_eof)
1520             {
1521               xgettext_current_source_encoding = po_charset_utf8;
1522               arglist_parser_done (argparser, arg);
1523               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1524               return false;
1525             }
1526           next_context_iter = null_context_list_iterator;
1527           state = 0;
1528           continue;
1529
1530         case token_type_comma:
1531           arg++;
1532           inner_context =
1533             inherited_context (outer_context,
1534                                flag_context_list_iterator_advance (
1535                                  &context_iter));
1536           next_context_iter = passthrough_context_list_iterator;
1537           state = 0;
1538           continue;
1539
1540         case token_type_lbracket:
1541           if (extract_balanced (mlp, token_type_rbracket,
1542                                 null_context, null_context_list_iterator,
1543                                 arglist_parser_alloc (mlp, NULL)))
1544             {
1545               xgettext_current_source_encoding = po_charset_utf8;
1546               arglist_parser_done (argparser, arg);
1547               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1548               return true;
1549             }
1550           next_context_iter = null_context_list_iterator;
1551           state = 0;
1552           continue;
1553
1554         case token_type_rbracket:
1555           if (delim == token_type_rbracket || delim == token_type_eof)
1556             {
1557               xgettext_current_source_encoding = po_charset_utf8;
1558               arglist_parser_done (argparser, arg);
1559               xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1560               return false;
1561             }
1562           next_context_iter = null_context_list_iterator;
1563           state = 0;
1564           continue;
1565
1566         case token_type_string:
1567           {
1568             lex_pos_ty pos;
1569             pos.file_name = logical_file_name;
1570             pos.line_number = token.line_number;
1571
1572             xgettext_current_source_encoding = po_charset_utf8;
1573             if (extract_all)
1574               remember_a_message (mlp, NULL, token.string, inner_context,
1575                                   &pos, NULL, token.comment);
1576             else
1577               arglist_parser_remember (argparser, arg, token.string,
1578                                        inner_context,
1579                                        pos.file_name, pos.line_number,
1580                                        token.comment);
1581             xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1582           }
1583           drop_reference (token.comment);
1584           next_context_iter = null_context_list_iterator;
1585           state = 0;
1586           continue;
1587
1588         case token_type_eof:
1589           xgettext_current_source_encoding = po_charset_utf8;
1590           arglist_parser_done (argparser, arg);
1591           xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1592           return true;
1593
1594         case token_type_keyword:
1595         case token_type_plus:
1596         case token_type_regexp:
1597         case token_type_operator:
1598         case token_type_other:
1599           next_context_iter = null_context_list_iterator;
1600           state = 0;
1601           continue;
1602
1603         default:
1604           abort ();
1605         }
1606     }
1607 }
1608
1609
1610 void
1611 extract_javascript (FILE *f,
1612                 const char *real_filename, const char *logical_filename,
1613                 flag_context_list_table_ty *flag_table,
1614                 msgdomain_list_ty *mdlp)
1615 {
1616   message_list_ty *mlp = mdlp->item[0]->messages;
1617
1618   fp = f;
1619   real_file_name = real_filename;
1620   logical_file_name = xstrdup (logical_filename);
1621   line_number = 1;
1622
1623   lexical_context = lc_outside;
1624
1625   last_comment_line = -1;
1626   last_non_comment_line = -1;
1627
1628   xgettext_current_file_source_encoding = xgettext_global_source_encoding;
1629 #if HAVE_ICONV
1630   xgettext_current_file_source_iconv = xgettext_global_source_iconv;
1631 #endif
1632
1633   xgettext_current_source_encoding = xgettext_current_file_source_encoding;
1634 #if HAVE_ICONV
1635   xgettext_current_source_iconv = xgettext_current_file_source_iconv;
1636 #endif
1637
1638   continuation_or_nonblank_line = false;
1639
1640   flag_context_list_table = flag_table;
1641
1642   init_keywords ();
1643
1644   /* Eat tokens until eof is seen.  When extract_balanced returns
1645      due to an unbalanced closing parenthesis, just restart it.  */
1646   while (!extract_balanced (mlp, token_type_eof,
1647                             null_context, null_context_list_iterator,
1648                             arglist_parser_alloc (mlp, NULL)))
1649     ;
1650
1651   fp = NULL;
1652   real_file_name = NULL;
1653   logical_file_name = NULL;
1654   line_number = 0;
1655 }