js/src/jsscan.cpp

   1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
   2  * vim: set sw=4 ts=8 et tw=78:
   3  *
   4  * ***** BEGIN LICENSE BLOCK *****
   5  * Version: MPL 1.1/GPL 2.0/LGPL 2.1
   6  *
   7  * The contents of this file are subject to the Mozilla Public License Version
   8  * 1.1 (the "License"); you may not use this file except in compliance with
   9  * the License. You may obtain a copy of the License at
  10  * http://www.mozilla.org/MPL/
  11  *
  12  * Software distributed under the License is distributed on an "AS IS" basis,
  13  * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
  14  * for the specific language governing rights and limitations under the
  15  * License.
  16  *
  17  * The Original Code is Mozilla Communicator client code, released
  18  * March 31, 1998.
  19  *
  20  * The Initial Developer of the Original Code is
  21  * Netscape Communications Corporation.
  22  * Portions created by the Initial Developer are Copyright (C) 1998
  23  * the Initial Developer. All Rights Reserved.
  24  *
  25  * Contributor(s):
  26  *
  27  * Alternatively, the contents of this file may be used under the terms of
  28  * either of the GNU General Public License Version 2 or later (the "GPL"),
  29  * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
  30  * in which case the provisions of the GPL or the LGPL are applicable instead
  31  * of those above. If you wish to allow use of your version of this file only
  32  * under the terms of either the GPL or the LGPL, and not to allow others to
  33  * use your version of this file under the terms of the MPL, indicate your
  34  * decision by deleting the provisions above and replace them with the notice
  35  * and other provisions required by the GPL or the LGPL. If you do not delete
  36  * the provisions above, a recipient may use your version of this file under
  37  * the terms of any one of the MPL, the GPL or the LGPL.
  38  *
  39  * ***** END LICENSE BLOCK ***** */
  40
  41 /*
  42  * JS lexical scanner.
  43  */
  44 #include <stdio.h>      /* first to avoid trouble on some systems */
  45 #include <errno.h>
  46 #include <limits.h>
  47 #include <math.h>
  48 #ifdef HAVE_MEMORY_H
  49 #include <memory.h>
  50 #endif
  51 #include <stdarg.h>
  52 #include <stdlib.h>
  53 #include <string.h>
  54 #include "jstypes.h"
  55 #include "jsstdint.h"
  56 #include "jsarena.h"
  57 #include "jsbit.h"
  58 #include "jsutil.h"
  59 #include "jsprf.h"
  60 #include "jsapi.h"
  61 #include "jsatom.h"
  62 #include "jscntxt.h"
  63 #include "jsversion.h"
  64 #include "jsemit.h"
  65 #include "jsexn.h"
  66 #include "jsnum.h"
  67 #include "jsopcode.h"
  68 #include "jsparse.h"
  69 #include "jsregexp.h"
  70 #include "jsscan.h"
  71 #include "jsscript.h"
  72 #include "jsstaticcheck.h"
  73 #include "jsvector.h"
  74
  75 #include "jsscriptinlines.h"
  76
  77 #if JS_HAS_XML_SUPPORT
  78 #include "jsxml.h"
  79 #endif
  80
  81 using namespace js;
  82
  83 #define JS_KEYWORD(keyword, type, op, version) \
  84     const char js_##keyword##_str[] = #keyword;
  85 #include "jskeyword.tbl"
  86 #undef JS_KEYWORD
  87
  88 static const KeywordInfo keywords[] = {
  89 #define JS_KEYWORD(keyword, type, op, version) \
  90     {js_##keyword##_str, type, op, version},
  91 #include "jskeyword.tbl"
  92 #undef JS_KEYWORD
  93 };
  94
  95 namespace js {
  96
  97 const KeywordInfo *
  98 FindKeyword(const jschar *s, size_t length)
  99 {
 100     JS_ASSERT(length != 0);
 101
 102     register size_t i;
 103     const struct KeywordInfo *kw;
 104     const char *chars;
 105
 106 #define JSKW_LENGTH()           length
 107 #define JSKW_AT(column)         s[column]
 108 #define JSKW_GOT_MATCH(index)   i = (index); goto got_match;
 109 #define JSKW_TEST_GUESS(index)  i = (index); goto test_guess;
 110 #define JSKW_NO_MATCH()         goto no_match;
 111 #include "jsautokw.h"
 112 #undef JSKW_NO_MATCH
 113 #undef JSKW_TEST_GUESS
 114 #undef JSKW_GOT_MATCH
 115 #undef JSKW_AT
 116 #undef JSKW_LENGTH
 117
 118   got_match:
 119     return &keywords[i];
 120
 121   test_guess:
 122     kw = &keywords[i];
 123     chars = kw->chars;
 124     do {
 125         if (*s++ != (unsigned char)(*chars++))
 126             goto no_match;
 127     } while (--length != 0);
 128     return kw;
 129
 130   no_match:
 131     return NULL;
 132 }
 133
 134 } // namespace js
 135
 136 JSBool
 137 js_IsIdentifier(JSLinearString *str)
 138 {
 139     const jschar *chars = str->chars();
 140     size_t length = str->length();
 141
 142     if (length == 0)
 143         return JS_FALSE;
 144     jschar c = *chars;
 145     if (!JS_ISIDSTART(c))
 146         return JS_FALSE;
 147     const jschar *end = chars + length;
 148     while (++chars != end) {
 149         c = *chars;
 150         if (!JS_ISIDENT(c))
 151             return JS_FALSE;
 152     }
 153     return JS_TRUE;
 154 }
 155
 156 #ifdef _MSC_VER
 157 #pragma warning(push)
 158 #pragma warning(disable:4351)
 159 #endif
 160
 161 /* Initialize members that aren't initialized in |init|. */
 162 TokenStream::TokenStream(JSContext *cx)
 163   : cx(cx), tokens(), cursor(), lookahead(), flags(), listenerTSData(), tokenbuf(cx)
 164 {}
 165
 166 #ifdef _MSC_VER
 167 #pragma warning(pop)
 168 #endif
 169
 170 bool
 171 TokenStream::init(const jschar *base, size_t length, const char *fn, uintN ln, JSVersion v)
 172 {
 173     filename = fn;
 174     lineno = ln;
 175     version = v;
 176     xml = VersionHasXML(v);
 177
 178     userbuf.base = (jschar *)base;
 179     userbuf.limit = (jschar *)base + length;
 180     userbuf.ptr = (jschar *)base;
 181
 182     linebase = userbuf.base;
 183     prevLinebase = NULL;
 184
 185     listener = cx->debugHooks->sourceHandler;
 186     listenerData = cx->debugHooks->sourceHandlerData;
 187
 188     /* See getChar() for an explanation of maybeEOL[]. */
 189     memset(maybeEOL, 0, sizeof(maybeEOL));
 190     maybeEOL['\n'] = true;
 191     maybeEOL['\r'] = true;
 192     maybeEOL[LINE_SEPARATOR & 0xff] = true;
 193     maybeEOL[PARA_SEPARATOR & 0xff] = true;
 194
 195     /* See getTokenInternal() for an explanation of maybeStrSpecial[]. */
 196     memset(maybeStrSpecial, 0, sizeof(maybeStrSpecial));
 197     maybeStrSpecial['"'] = true;
 198     maybeStrSpecial['\''] = true;
 199     maybeStrSpecial['\\'] = true;
 200     maybeStrSpecial['\n'] = true;
 201     maybeStrSpecial['\r'] = true;
 202     maybeStrSpecial[LINE_SEPARATOR & 0xff] = true;
 203     maybeStrSpecial[PARA_SEPARATOR & 0xff] = true;
 204     maybeStrSpecial[EOF & 0xff] = true;
 205     return true;
 206 }
 207
 208 void
 209 TokenStream::close()
 210 {
 211     if (flags & TSF_OWNFILENAME)
 212         cx->free((void *) filename);
 213 }
 214
 215 /* Use the fastest available getc. */
 216 #if defined(HAVE_GETC_UNLOCKED)
 217 # define fast_getc getc_unlocked
 218 #elif defined(HAVE__GETC_NOLOCK)
 219 # define fast_getc _getc_nolock
 220 #else
 221 # define fast_getc getc
 222 #endif
 223
 224 JS_FRIEND_API(int)
 225 js_fgets(char *buf, int size, FILE *file)
 226 {
 227     int n, i, c;
 228     JSBool crflag;
 229
 230     n = size - 1;
 231     if (n < 0)
 232         return -1;
 233
 234     crflag = JS_FALSE;
 235     for (i = 0; i < n && (c = fast_getc(file)) != EOF; i++) {
 236         buf[i] = c;
 237         if (c == '\n') {        /* any \n ends a line */
 238             i++;                /* keep the \n; we know there is room for \0 */
 239             break;
 240         }
 241         if (crflag) {           /* \r not followed by \n ends line at the \r */
 242             ungetc(c, file);
 243             break;              /* and overwrite c in buf with \0 */
 244         }
 245         crflag = (c == '\r');
 246     }
 247
 248     buf[i] = '\0';
 249     return i;
 250 }
 251
 252 /* This gets the next char, normalizing all EOL sequences to '\n' as it goes. */
 253 int32
 254 TokenStream::getChar()
 255 {
 256     int32 c;
 257     if (JS_LIKELY(userbuf.ptr < userbuf.limit)) {
 258         c = *userbuf.ptr++;
 259
 260         /*
 261          * Normalize the jschar if it was a newline.  We need to detect any of
 262          * these four characters:  '\n' (0x000a), '\r' (0x000d),
 263          * LINE_SEPARATOR (0x2028), PARA_SEPARATOR (0x2029).  Testing for each
 264          * one in turn is slow, so we use a single probabilistic check, and if
 265          * that succeeds, test for them individually.
 266          *
 267          * We use the bottom 8 bits to index into a lookup table, succeeding
 268          * when d&0xff is 0xa, 0xd, 0x28 or 0x29.  Among ASCII chars (which
 269          * are by the far the most common) this gives false positives for '('
 270          * (0x0028) and ')' (0x0029).  We could avoid those by incorporating
 271          * the 13th bit of d into the lookup, but that requires extra shifting
 272          * and masking and isn't worthwhile.  See TokenStream::init() for the
 273          * initialization of the relevant entries in the table.
 274          */
 275         if (JS_UNLIKELY(maybeEOL[c & 0xff])) {
 276             if (c == '\n')
 277                 goto eol;
 278             if (c == '\r') {
 279                 if (userbuf.ptr < userbuf.limit && *userbuf.ptr == '\n') {
 280                     /* a \r\n sequence: treat as a single EOL, skip over the \n */
 281                     userbuf.ptr++;
 282                 }
 283                 goto eol;
 284             }
 285             if (c == LINE_SEPARATOR || c == PARA_SEPARATOR)
 286                 goto eol;
 287         }
 288         return c;
 289     }
 290
 291     flags |= TSF_EOF;
 292     return EOF;
 293
 294   eol:
 295     prevLinebase = linebase;
 296     linebase = userbuf.ptr;
 297     lineno++;
 298     return '\n';
 299 }
 300
 301 /*
 302  * This gets the next char. It does nothing special with EOL sequences, not
 303  * even updating the line counters.
 304  */
 305 int32
 306 TokenStream::getCharIgnoreEOL()
 307 {
 308     if (JS_LIKELY(userbuf.ptr < userbuf.limit))
 309         return *userbuf.ptr++;
 310
 311     flags |= TSF_EOF;
 312     return EOF;
 313 }
 314
 315 void
 316 TokenStream::ungetChar(int32 c)
 317 {
 318     if (c == EOF)
 319         return;
 320     JS_ASSERT(userbuf.ptr > userbuf.base);
 321     userbuf.ptr--;
 322     if (c == '\n') {
 323 #ifdef DEBUG
 324         int32 c2 = *userbuf.ptr;
 325         JS_ASSERT(c2 == '\n' || c2 == '\r' || c2 == LINE_SEPARATOR || c2 == PARA_SEPARATOR);
 326 #endif
 327         if (userbuf.ptr > userbuf.base && *(userbuf.ptr - 1) == '\r')
 328             userbuf.ptr--;          /* also unget the \r in a \r\n sequence */
 329         JS_ASSERT(prevLinebase);    /* we should never get more than one EOL char */
 330         linebase = prevLinebase;
 331         prevLinebase = NULL;
 332         lineno--;
 333     } else {
 334         JS_ASSERT(*userbuf.ptr == c);
 335     }
 336 }
 337
 338 void
 339 TokenStream::ungetCharIgnoreEOL(int32 c)
 340 {
 341     JS_ASSERT(c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR || c == EOF);
 342     if (c == EOF)
 343         return;
 344     JS_ASSERT(userbuf.ptr > userbuf.base);
 345     userbuf.ptr--;
 346 }
 347
 348 /*
 349  * Peek n chars ahead into ts.  Return true if n chars were read, false if
 350  * there weren't enough characters in the input stream.  This function cannot
 351  * be used to peek into or past a newline.
 352  */
 353 JSBool
 354 TokenStream::peekChars(intN n, jschar *cp)
 355 {
 356     intN i, j;
 357     int32 c;
 358
 359     for (i = 0; i < n; i++) {
 360         c = getChar();
 361         if (c == EOF)
 362             break;
 363         if (c == '\n') {
 364             ungetChar(c);
 365             break;
 366         }
 367         cp[i] = (jschar)c;
 368     }
 369     for (j = i - 1; j >= 0; j--)
 370         ungetChar(cp[j]);
 371     return i == n;
 372 }
 373
 374 jschar *
 375 TokenStream::findEOL()
 376 {
 377     TokenBuf tmpUserbuf = userbuf;
 378     jschar *tmpLinebase = linebase;
 379     jschar *tmpPrevLinebase = prevLinebase;
 380     uintN tmpFlags = flags;
 381     uintN tmpLineno = lineno;
 382
 383     while (true) {
 384         int32 c = getChar();
 385         if (c == '\n' || c == EOF)
 386             break;
 387     }
 388     jschar *linelimit = userbuf.ptr;
 389
 390     /* Need to restore everything changed by getChar(). */
 391     userbuf = tmpUserbuf;
 392     linebase = tmpLinebase;
 393     prevLinebase = tmpPrevLinebase;
 394     flags = tmpFlags;
 395     lineno = tmpLineno;
 396
 397     return linelimit;
 398 }
 399
 400 bool
 401 TokenStream::reportCompileErrorNumberVA(JSParseNode *pn, uintN flags, uintN errorNumber,
 402                                         va_list ap)
 403 {
 404     JSErrorReport report;
 405     char *message;
 406     size_t linelength;
 407     jschar *linechars;
 408     jschar *linelimit;
 409     char *linebytes;
 410     bool warning;
 411     JSBool ok;
 412     TokenPos *tp;
 413     uintN index, i;
 414     JSErrorReporter onError;
 415
 416     if (JSREPORT_IS_STRICT(flags) && !cx->hasStrictOption())
 417         return JS_TRUE;
 418
 419     warning = JSREPORT_IS_WARNING(flags);
 420     if (warning && cx->hasWErrorOption()) {
 421         flags &= ~JSREPORT_WARNING;
 422         warning = false;
 423     }
 424
 425     PodZero(&report);
 426     report.flags = flags;
 427     report.errorNumber = errorNumber;
 428     message = NULL;
 429     linechars = NULL;
 430     linebytes = NULL;
 431
 432     MUST_FLOW_THROUGH("out");
 433     ok = js_ExpandErrorArguments(cx, js_GetErrorMessage, NULL,
 434                                  errorNumber, &message, &report,
 435                                  !(flags & JSREPORT_UC), ap);
 436     if (!ok) {
 437         warning = false;
 438         goto out;
 439     }
 440
 441     report.filename = filename;
 442
 443     if (pn) {
 444         report.lineno = pn->pn_pos.begin.lineno;
 445         if (report.lineno != lineno)
 446             goto report;
 447         tp = &pn->pn_pos;
 448     } else {
 449         /* Point to the current token, not the next one to get. */
 450         tp = &tokens[cursor].pos;
 451     }
 452     report.lineno = lineno;
 453
 454     linelimit = findEOL();
 455     linelength = linelimit - linebase;
 456
 457     linechars = (jschar *)cx->malloc((linelength + 1) * sizeof(jschar));
 458     if (!linechars) {
 459         warning = false;
 460         goto out;
 461     }
 462     memcpy(linechars, linebase, linelength * sizeof(jschar));
 463     linechars[linelength] = 0;
 464     linebytes = js_DeflateString(cx, linechars, linelength);
 465     if (!linebytes) {
 466         warning = false;
 467         goto out;
 468     }
 469     report.linebuf = linebytes;     /* the offending source line, without final \n */
 470
 471     index = (tp->begin.lineno == tp->end.lineno)
 472             ? tp->begin.index         /* the column number of the start of the bad token */
 473             : 0;
 474
 475     report.tokenptr = report.linebuf + index;
 476     report.uclinebuf = linechars;
 477     report.uctokenptr = report.uclinebuf + index;
 478
 479     /*
 480      * If there's a runtime exception type associated with this error
 481      * number, set that as the pending exception.  For errors occuring at
 482      * compile time, this is very likely to be a JSEXN_SYNTAXERR.
 483      *
 484      * If an exception is thrown but not caught, the JSREPORT_EXCEPTION
 485      * flag will be set in report.flags.  Proper behavior for an error
 486      * reporter is to ignore a report with this flag for all but top-level
 487      * compilation errors.  The exception will remain pending, and so long
 488      * as the non-top-level "load", "eval", or "compile" native function
 489      * returns false, the top-level reporter will eventually receive the
 490      * uncaught exception report.
 491      *
 492      * XXX it'd probably be best if there was only one call to this
 493      * function, but there seem to be two error reporter call points.
 494      */
 495   report:
 496     onError = cx->errorReporter;
 497
 498     /*
 499      * Try to raise an exception only if there isn't one already set --
 500      * otherwise the exception will describe the last compile-time error,
 501      * which is likely spurious.
 502      */
 503     if (!(flags & TSF_ERROR)) {
 504         if (js_ErrorToException(cx, message, &report, NULL, NULL))
 505             onError = NULL;
 506     }
 507
 508     /*
 509      * Suppress any compile-time errors that don't occur at the top level.
 510      * This may still fail, as interplevel may be zero in contexts where we
 511      * don't really want to call the error reporter, as when js is called
 512      * by other code which could catch the error.
 513      */
 514     if (cx->interpLevel != 0 && !JSREPORT_IS_WARNING(flags))
 515         onError = NULL;
 516
 517     if (onError) {
 518         JSDebugErrorHook hook = cx->debugHooks->debugErrorHook;
 519
 520         /*
 521          * If debugErrorHook is present then we give it a chance to veto
 522          * sending the error on to the regular error reporter.
 523          */
 524         if (hook && !hook(cx, message, &report,
 525                           cx->debugHooks->debugErrorHookData)) {
 526             onError = NULL;
 527         }
 528     }
 529     if (onError)
 530         (*onError)(cx, message, &report);
 531
 532   out:
 533     if (linebytes)
 534         cx->free(linebytes);
 535     if (linechars)
 536         cx->free(linechars);
 537     if (message)
 538         cx->free(message);
 539     if (report.ucmessage)
 540         cx->free((void *)report.ucmessage);
 541
 542     if (report.messageArgs) {
 543         if (!(flags & JSREPORT_UC)) {
 544             i = 0;
 545             while (report.messageArgs[i])
 546                 cx->free((void *)report.messageArgs[i++]);
 547         }
 548         cx->free((void *)report.messageArgs);
 549     }
 550
 551     if (!JSREPORT_IS_WARNING(flags)) {
 552         /* Set the error flag to suppress spurious reports. */
 553         flags |= TSF_ERROR;
 554     }
 555
 556     return warning;
 557 }
 558
 559 bool
 560 js::ReportStrictModeError(JSContext *cx, TokenStream *ts, JSTreeContext *tc, JSParseNode *pn,
 561                           uintN errorNumber, ...)
 562 {
 563     JS_ASSERT(ts || tc);
 564     JS_ASSERT(cx == ts->getContext());
 565
 566     /* In strict mode code, this is an error, not merely a warning. */
 567     uintN flags;
 568     if ((ts && ts->isStrictMode()) || (tc && (tc->flags & TCF_STRICT_MODE_CODE))) {
 569         flags = JSREPORT_ERROR;
 570     } else {
 571         if (!cx->hasStrictOption())
 572             return true;
 573         flags = JSREPORT_WARNING;
 574     }
 575
 576     va_list ap;
 577     va_start(ap, errorNumber);
 578     bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 579     va_end(ap);
 580
 581     return result;
 582 }
 583
 584 bool
 585 js::ReportCompileErrorNumber(JSContext *cx, TokenStream *ts, JSParseNode *pn,
 586                              uintN flags, uintN errorNumber, ...)
 587 {
 588     va_list ap;
 589
 590     /*
 591      * We don't accept a JSTreeContext argument, so we can't implement
 592      * JSREPORT_STRICT_MODE_ERROR here.  Use ReportStrictModeError instead,
 593      * or do the checks in the caller and pass plain old JSREPORT_ERROR.
 594      */
 595     JS_ASSERT(!(flags & JSREPORT_STRICT_MODE_ERROR));
 596
 597     va_start(ap, errorNumber);
 598     JS_ASSERT(cx == ts->getContext());
 599     bool result = ts->reportCompileErrorNumberVA(pn, flags, errorNumber, ap);
 600     va_end(ap);
 601
 602     return result;
 603 }
 604
 605 #if JS_HAS_XML_SUPPORT
 606
 607 JSBool
 608 TokenStream::getXMLEntity()
 609 {
 610     ptrdiff_t offset, length, i;
 611     int c, d;
 612     JSBool ispair;
 613     jschar *bp, digit;
 614     char *bytes;
 615     JSErrNum msg;
 616
 617     CharBuffer &tb = tokenbuf;
 618
 619     /* Put the entity, including the '&' already scanned, in tokenbuf. */
 620     offset = tb.length();
 621     if (!tb.append('&'))
 622         return JS_FALSE;
 623     while ((c = getChar()) != ';') {
 624         if (c == EOF || c == '\n') {
 625             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_END_OF_XML_ENTITY);
 626             return JS_FALSE;
 627         }
 628         if (!tb.append(c))
 629             return JS_FALSE;
 630     }
 631
 632     /* Let length be the number of jschars after the '&', including the ';'. */
 633     length = tb.length() - offset;
 634     bp = tb.begin() + offset;
 635     c = d = 0;
 636     ispair = JS_FALSE;
 637     if (length > 2 && bp[1] == '#') {
 638         /* Match a well-formed XML Character Reference. */
 639         i = 2;
 640         if (length > 3 && JS_TOLOWER(bp[i]) == 'x') {
 641             if (length > 9)     /* at most 6 hex digits allowed */
 642                 goto badncr;
 643             while (++i < length) {
 644                 digit = bp[i];
 645                 if (!JS7_ISHEX(digit))
 646                     goto badncr;
 647                 c = (c << 4) + JS7_UNHEX(digit);
 648             }
 649         } else {
 650             while (i < length) {
 651                 digit = bp[i++];
 652                 if (!JS7_ISDEC(digit))
 653                     goto badncr;
 654                 c = (c * 10) + JS7_UNDEC(digit);
 655                 if (c < 0)
 656                     goto badncr;
 657             }
 658         }
 659
 660         if (0x10000 <= c && c <= 0x10FFFF) {
 661             /* Form a surrogate pair (c, d) -- c is the high surrogate. */
 662             d = 0xDC00 + (c & 0x3FF);
 663             c = 0xD7C0 + (c >> 10);
 664             ispair = JS_TRUE;
 665         } else {
 666             /* Enforce the http://www.w3.org/TR/REC-xml/#wf-Legalchar WFC. */
 667             if (c != 0x9 && c != 0xA && c != 0xD &&
 668                 !(0x20 <= c && c <= 0xD7FF) &&
 669                 !(0xE000 <= c && c <= 0xFFFD)) {
 670                 goto badncr;
 671             }
 672         }
 673     } else {
 674         /* Try to match one of the five XML 1.0 predefined entities. */
 675         switch (length) {
 676           case 3:
 677             if (bp[2] == 't') {
 678                 if (bp[1] == 'l')
 679                     c = '<';
 680                 else if (bp[1] == 'g')
 681                     c = '>';
 682             }
 683             break;
 684           case 4:
 685             if (bp[1] == 'a' && bp[2] == 'm' && bp[3] == 'p')
 686                 c = '&';
 687             break;
 688           case 5:
 689             if (bp[3] == 'o') {
 690                 if (bp[1] == 'a' && bp[2] == 'p' && bp[4] == 's')
 691                     c = '\'';
 692                 else if (bp[1] == 'q' && bp[2] == 'u' && bp[4] == 't')
 693                     c = '"';
 694             }
 695             break;
 696         }
 697         if (c == 0) {
 698             msg = JSMSG_UNKNOWN_XML_ENTITY;
 699             goto bad;
 700         }
 701     }
 702
 703     /* If we matched, retract tokenbuf and store the entity's value. */
 704     *bp++ = (jschar) c;
 705     if (ispair)
 706         *bp++ = (jschar) d;
 707     tb.shrinkBy(tb.end() - bp);
 708     return JS_TRUE;
 709
 710   badncr:
 711     msg = JSMSG_BAD_XML_NCR;
 712   bad:
 713     /* No match: throw a TypeError per ECMA-357 10.3.2.1 step 8(a). */
 714     JS_ASSERT((tb.end() - bp) >= 1);
 715     bytes = js_DeflateString(cx, bp + 1, (tb.end() - bp) - 1);
 716     if (bytes) {
 717         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, msg, bytes);
 718         cx->free(bytes);
 719     }
 720     return JS_FALSE;
 721 }
 722
 723 #endif /* JS_HAS_XML_SUPPORT */
 724
 725 /*
 726  * We have encountered a '\': check for a Unicode escape sequence after it.
 727  * Return 'true' and the character code value (by value) if we found a
 728  * Unicode escape sequence.  Otherwise, return 'false'.  In both cases, do not
 729  * advance along the buffer.
 730  */
 731 bool
 732 TokenStream::peekUnicodeEscape(int *result)
 733 {
 734     jschar cp[5];
 735
 736     if (peekChars(5, cp) && cp[0] == 'u' &&
 737         JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
 738         JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4]))
 739     {
 740         *result = (((((JS7_UNHEX(cp[1]) << 4)
 741                 + JS7_UNHEX(cp[2])) << 4)
 742               + JS7_UNHEX(cp[3])) << 4)
 743             + JS7_UNHEX(cp[4]);
 744         return true;
 745     }
 746     return false;
 747 }
 748
 749 bool
 750 TokenStream::matchUnicodeEscapeIdStart(int32 *cp)
 751 {
 752     if (peekUnicodeEscape(cp) && JS_ISIDSTART(*cp)) {
 753         skipChars(5);
 754         return true;
 755     }
 756     return false;
 757 }
 758
 759 bool
 760 TokenStream::matchUnicodeEscapeIdent(int32 *cp)
 761 {
 762     if (peekUnicodeEscape(cp) && JS_ISIDENT(*cp)) {
 763         skipChars(5);
 764         return true;
 765     }
 766     return false;
 767 }
 768
 769 Token *
 770 TokenStream::newToken(ptrdiff_t adjust)
 771 {
 772     cursor = (cursor + 1) & ntokensMask;
 773     Token *tp = &tokens[cursor];
 774     tp->ptr = userbuf.ptr + adjust;
 775     tp->pos.begin.index = tp->ptr - linebase;
 776     tp->pos.begin.lineno = tp->pos.end.lineno = lineno;
 777     return tp;
 778 }
 779
 780 static JS_ALWAYS_INLINE JSBool
 781 ScanAsSpace(jschar c)
 782 {
 783     /* Treat little- and big-endian BOMs as whitespace for compatibility. */
 784     if (JS_ISSPACE(c) || c == 0xfffe || c == 0xfeff)
 785         return JS_TRUE;
 786     return JS_FALSE;
 787 }
 788
 789 JS_ALWAYS_INLINE JSAtom *
 790 TokenStream::atomize(JSContext *cx, CharBuffer &cb)
 791 {
 792     return js_AtomizeChars(cx, cb.begin(), cb.length(), 0);
 793 }
 794
 795 TokenKind
 796 TokenStream::getTokenInternal()
 797 {
 798     TokenKind tt;
 799     int c, qc;
 800     Token *tp;
 801     JSAtom *atom;
 802     bool hadUnicodeEscape;
 803 #if JS_HAS_XML_SUPPORT
 804     JSBool inTarget;
 805     size_t targetLength;
 806     ptrdiff_t contentIndex;
 807 #endif
 808
 809 #if JS_HAS_XML_SUPPORT
 810     /*
 811      * Look for XML text.
 812      */
 813
 814     if (flags & TSF_XMLTEXTMODE) {
 815         tt = TOK_XMLSPACE;      /* veto if non-space, return TOK_XMLTEXT */
 816         tp = newToken(0);
 817         tokenbuf.clear();
 818         qc = (flags & TSF_XMLONLYMODE) ? '<' : '{';
 819
 820         while ((c = getChar()) != qc && c != '<' && c != EOF) {
 821             if (c == '&' && qc == '<') {
 822                 if (!getXMLEntity())
 823                     goto error;
 824                 tt = TOK_XMLTEXT;
 825                 continue;
 826             }
 827
 828             if (!JS_ISXMLSPACE(c))
 829                 tt = TOK_XMLTEXT;
 830             if (!tokenbuf.append(c))
 831                 goto error;
 832         }
 833         ungetChar(c);
 834
 835         if (tokenbuf.empty()) {
 836             atom = NULL;
 837         } else {
 838             atom = atomize(cx, tokenbuf);
 839             if (!atom)
 840                 goto error;
 841         }
 842         tp->pos.end.lineno = lineno;
 843         tp->t_op = JSOP_STRING;
 844         tp->t_atom = atom;
 845         goto out;
 846     }
 847
 848     /*
 849      * Look for XML tags.
 850      */
 851
 852     if (flags & TSF_XMLTAGMODE) {
 853         tp = newToken(0);
 854         c = getChar();
 855         if (JS_ISXMLSPACE(c)) {
 856             do {
 857                 c = getChar();
 858             } while (JS_ISXMLSPACE(c));
 859             ungetChar(c);
 860             tp->pos.end.lineno = lineno;
 861             tt = TOK_XMLSPACE;
 862             goto out;
 863         }
 864
 865         if (c == EOF) {
 866             tt = TOK_EOF;
 867             goto out;
 868         }
 869
 870         tokenbuf.clear();
 871         if (JS_ISXMLNSSTART(c)) {
 872             JSBool sawColon = JS_FALSE;
 873
 874             if (!tokenbuf.append(c))
 875                 goto error;
 876             while ((c = getChar()) != EOF && JS_ISXMLNAME(c)) {
 877                 if (c == ':') {
 878                     int nextc;
 879
 880                     if (sawColon ||
 881                         (nextc = peekChar(),
 882                          ((flags & TSF_XMLONLYMODE) || nextc != '{') &&
 883                          !JS_ISXMLNAME(nextc))) {
 884                         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 885                                                  JSMSG_BAD_XML_QNAME);
 886                         goto error;
 887                     }
 888                     sawColon = JS_TRUE;
 889                 }
 890
 891                 if (!tokenbuf.append(c))
 892                     goto error;
 893             }
 894
 895             ungetChar(c);
 896             atom = atomize(cx, tokenbuf);
 897             if (!atom)
 898                 goto error;
 899             tp->t_op = JSOP_STRING;
 900             tp->t_atom = atom;
 901             tt = TOK_XMLNAME;
 902             goto out;
 903         }
 904
 905         switch (c) {
 906           case '{':
 907             if (flags & TSF_XMLONLYMODE)
 908                 goto bad_xml_char;
 909             tt = TOK_LC;
 910             goto out;
 911
 912           case '=':
 913             tt = TOK_ASSIGN;
 914             goto out;
 915
 916           case '"':
 917           case '\'':
 918             qc = c;
 919             while ((c = getChar()) != qc) {
 920                 if (c == EOF) {
 921                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
 922                                              JSMSG_UNTERMINATED_STRING);
 923                     goto error;
 924                 }
 925
 926                 /*
 927                  * XML attribute values are double-quoted when pretty-printed,
 928                  * so escape " if it is expressed directly in a single-quoted
 929                  * attribute value.
 930                  */
 931                 if (c == '"' && !(flags & TSF_XMLONLYMODE)) {
 932                     JS_ASSERT(qc == '\'');
 933                     if (!tokenbuf.append(js_quot_entity_str,
 934                                      strlen(js_quot_entity_str)))
 935                         goto error;
 936                     continue;
 937                 }
 938
 939                 if (c == '&' && (flags & TSF_XMLONLYMODE)) {
 940                     if (!getXMLEntity())
 941                         goto error;
 942                     continue;
 943                 }
 944
 945                 if (!tokenbuf.append(c))
 946                     goto error;
 947             }
 948             atom = atomize(cx, tokenbuf);
 949             if (!atom)
 950                 goto error;
 951             tp->pos.end.lineno = lineno;
 952             tp->t_op = JSOP_STRING;
 953             tp->t_atom = atom;
 954             tt = TOK_XMLATTR;
 955             goto out;
 956
 957           case '>':
 958             tt = TOK_XMLTAGC;
 959             goto out;
 960
 961           case '/':
 962             if (matchChar('>')) {
 963                 tt = TOK_XMLPTAGC;
 964                 goto out;
 965             }
 966             /* FALL THROUGH */
 967
 968           bad_xml_char:
 969           default:
 970             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_CHARACTER);
 971             goto error;
 972         }
 973         /* NOTREACHED */
 974     }
 975 #endif /* JS_HAS_XML_SUPPORT */
 976
 977   retry:
 978     /*
 979      * This gets the next non-space char and starts the token.
 980      */
 981     do {
 982         c = getChar();
 983         if (c == '\n') {
 984             flags &= ~TSF_DIRTYLINE;
 985             if (flags & TSF_NEWLINES)
 986                 break;
 987         }
 988     } while (ScanAsSpace((jschar)c));
 989
 990     tp = newToken(-1);
 991     if (c == EOF) {
 992         tt = TOK_EOF;
 993         goto out;
 994     }
 995
 996     /*
 997      * Look for an identifier.
 998      */
 999
1000     hadUnicodeEscape = false;
1001     if (JS_ISIDSTART(c) ||
1002         (c == '\\' && (hadUnicodeEscape = matchUnicodeEscapeIdStart(&qc))))
1003     {
1004         if (hadUnicodeEscape)
1005             c = qc;
1006         tokenbuf.clear();
1007         for (;;) {
1008             if (!tokenbuf.append(c))
1009                 goto error;
1010             c = getChar();
1011             if (c == '\\') {
1012                 if (!matchUnicodeEscapeIdent(&qc))
1013                     break;
1014                 c = qc;
1015                 hadUnicodeEscape = true;
1016             } else {
1017                 if (!JS_ISIDENT(c))
1018                     break;
1019             }
1020         }
1021         ungetChar(c);
1022
1023         /*
1024          * Check for keywords unless we saw Unicode escape or parser asks
1025          * to ignore keywords.
1026          */
1027         const KeywordInfo *kw;
1028         if (!hadUnicodeEscape &&
1029             !(flags & TSF_KEYWORD_IS_NAME) &&
1030             (kw = FindKeyword(tokenbuf.begin(), tokenbuf.length()))) {
1031             if (kw->tokentype == TOK_RESERVED) {
1032                 if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1033                                               JSMSG_RESERVED_ID, kw->chars)) {
1034                     goto error;
1035                 }
1036             } else if (kw->tokentype == TOK_STRICT_RESERVED) {
1037                 if (isStrictMode()
1038                     ? !ReportStrictModeError(cx, this, NULL, NULL, JSMSG_RESERVED_ID, kw->chars)
1039                     : !ReportCompileErrorNumber(cx, this, NULL,
1040                                                 JSREPORT_STRICT | JSREPORT_WARNING,
1041                                                 JSMSG_RESERVED_ID, kw->chars)) {
1042                     goto error;
1043                 }
1044             } else {
1045                 if (kw->version <= versionNumber()) {
1046                     tt = kw->tokentype;
1047                     tp->t_op = (JSOp) kw->op;
1048                     goto out;
1049                 }
1050
1051                 /*
1052                  * let/yield are a Mozilla extension starting in JS1.7. If we
1053                  * aren't parsing for a version supporting these extensions,
1054                  * conform to ES5 and forbid these names in strict mode.
1055                  */
1056                 if ((kw->tokentype == TOK_LET || kw->tokentype == TOK_YIELD) &&
1057                     !ReportStrictModeError(cx, this, NULL, NULL, JSMSG_RESERVED_ID, kw->chars))
1058                 {
1059                     goto error;
1060                 }
1061             }
1062         }
1063
1064         atom = atomize(cx, tokenbuf);
1065         if (!atom)
1066             goto error;
1067         tp->t_op = JSOP_NAME;
1068         tp->t_atom = atom;
1069         tt = TOK_NAME;
1070         goto out;
1071     }
1072
1073     /*
1074      * Look for a number.
1075      */
1076
1077     if (JS7_ISDEC(c) || (c == '.' && JS7_ISDEC(peekChar()))) {
1078         int radix = 10;
1079         tokenbuf.clear();
1080
1081         if (c == '0') {
1082             c = getChar();
1083             if (JS_TOLOWER(c) == 'x') {
1084                 radix = 16;
1085                 c = getChar();
1086                 if (!JS7_ISHEX(c)) {
1087                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1088                                              JSMSG_MISSING_HEXDIGITS);
1089                     goto error;
1090                 }
1091             } else if (JS7_ISDEC(c)) {
1092                 radix = 8;
1093             }
1094         }
1095
1096         while (JS7_ISHEX(c)) {
1097             if (radix < 16) {
1098                 if (JS7_ISLET(c))
1099                     break;
1100
1101                 if (radix == 8) {
1102                     /* Octal integer literals are not permitted in strict mode code. */
1103                     if (!ReportStrictModeError(cx, this, NULL, NULL, JSMSG_DEPRECATED_OCTAL))
1104                         goto error;
1105
1106                     /*
1107                      * Outside strict mode, we permit 08 and 09 as decimal numbers, which
1108                      * makes our behaviour a superset of the ECMA numeric grammar. We
1109                      * might not always be so permissive, so we warn about it.
1110                      */
1111                     if (c >= '8') {
1112                         if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING,
1113                                                       JSMSG_BAD_OCTAL, c == '8' ? "08" : "09")) {
1114                             goto error;
1115                         }
1116                         radix = 10;
1117                     }
1118                 }
1119             }
1120             if (!tokenbuf.append(c))
1121                 goto error;
1122             c = getChar();
1123         }
1124
1125         if (radix == 10 && (c == '.' || JS_TOLOWER(c) == 'e')) {
1126             if (c == '.') {
1127                 do {
1128                     if (!tokenbuf.append(c))
1129                         goto error;
1130                     c = getChar();
1131                 } while (JS7_ISDEC(c));
1132             }
1133             if (JS_TOLOWER(c) == 'e') {
1134                 if (!tokenbuf.append(c))
1135                     goto error;
1136                 c = getChar();
1137                 if (c == '+' || c == '-') {
1138                     if (!tokenbuf.append(c))
1139                         goto error;
1140                     c = getChar();
1141                 }
1142                 if (!JS7_ISDEC(c)) {
1143                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1144                                              JSMSG_MISSING_EXPONENT);
1145                     goto error;
1146                 }
1147                 do {
1148                     if (!tokenbuf.append(c))
1149                         goto error;
1150                     c = getChar();
1151                 } while (JS7_ISDEC(c));
1152             }
1153         }
1154
1155         if (JS_ISIDSTART(c)) {
1156             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_IDSTART_AFTER_NUMBER);
1157             goto error;
1158         }
1159
1160         /* Put back the next char and NUL-terminate tokenbuf for js_strto*. */
1161         ungetChar(c);
1162         if (!tokenbuf.append(0))
1163             goto error;
1164
1165         jsdouble dval;
1166         const jschar *dummy;
1167         if (radix == 10) {
1168             if (!js_strtod(cx, tokenbuf.begin(), tokenbuf.end(), &dummy, &dval))
1169                 goto error;
1170         } else {
1171             if (!GetPrefixInteger(cx, tokenbuf.begin(), tokenbuf.end(), radix, &dummy, &dval))
1172                 goto error;
1173         }
1174         tp->t_dval = dval;
1175         tt = TOK_NUMBER;
1176         goto out;
1177     }
1178
1179     /*
1180      * Look for a string.
1181      */
1182
1183     if (c == '"' || c == '\'') {
1184         qc = c;
1185         tokenbuf.clear();
1186         while (true) {
1187             /*
1188              * We need to detect any of these chars:  " or ', \n (or its
1189              * equivalents), \\, EOF.  We use maybeStrSpecial[] in a manner
1190              * similar to maybeEOL[], see above.  Because we detect EOL
1191              * sequences here and put them back immediately, we can use
1192              * getCharIgnoreEOL().
1193              */
1194             c = getCharIgnoreEOL();
1195             if (maybeStrSpecial[c & 0xff]) {
1196                 if (c == qc)
1197                     break;
1198                 if (c == '\\') {
1199                     switch (c = getChar()) {
1200                       case 'b': c = '\b'; break;
1201                       case 'f': c = '\f'; break;
1202                       case 'n': c = '\n'; break;
1203                       case 'r': c = '\r'; break;
1204                       case 't': c = '\t'; break;
1205                       case 'v': c = '\v'; break;
1206
1207                       default:
1208                         if ('0' <= c && c < '8') {
1209                             int32 val = JS7_UNDEC(c);
1210
1211                             c = peekChar();
1212                             /* Strict mode code allows only \0, then a non-digit. */
1213                             if (val != 0 || JS7_ISDEC(c)) {
1214                                 if (!ReportStrictModeError(cx, this, NULL, NULL,
1215                                                            JSMSG_DEPRECATED_OCTAL)) {
1216                                     goto error;
1217                                 }
1218                                 setOctalCharacterEscape();
1219                             }
1220                             if ('0' <= c && c < '8') {
1221                                 val = 8 * val + JS7_UNDEC(c);
1222                                 getChar();
1223                                 c = peekChar();
1224                                 if ('0' <= c && c < '8') {
1225                                     int32 save = val;
1226                                     val = 8 * val + JS7_UNDEC(c);
1227                                     if (val <= 0377)
1228                                         getChar();
1229                                     else
1230                                         val = save;
1231                                 }
1232                             }
1233
1234                             c = (jschar)val;
1235                         } else if (c == 'u') {
1236                             jschar cp[4];
1237                             if (peekChars(4, cp) &&
1238                                 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
1239                                 JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
1240                                 c = (((((JS7_UNHEX(cp[0]) << 4)
1241                                         + JS7_UNHEX(cp[1])) << 4)
1242                                       + JS7_UNHEX(cp[2])) << 4)
1243                                     + JS7_UNHEX(cp[3]);
1244                                 skipChars(4);
1245                             }
1246                         } else if (c == 'x') {
1247                             jschar cp[2];
1248                             if (peekChars(2, cp) &&
1249                                 JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1])) {
1250                                 c = (JS7_UNHEX(cp[0]) << 4) + JS7_UNHEX(cp[1]);
1251                                 skipChars(2);
1252                             }
1253                         } else if (c == '\n') {
1254                             /* ECMA follows C by removing escaped newlines. */
1255                             continue;
1256                         }
1257                         break;
1258                     }
1259                 } else if (c == '\n' || c == '\r' || c == LINE_SEPARATOR || c == PARA_SEPARATOR ||
1260                            c == EOF)
1261                 {
1262                     ungetCharIgnoreEOL(c);
1263                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1264                                              JSMSG_UNTERMINATED_STRING);
1265                     goto error;
1266                 }
1267             }
1268             if (!tokenbuf.append(c))
1269                 goto error;
1270         }
1271         atom = atomize(cx, tokenbuf);
1272         if (!atom)
1273             goto error;
1274         tp->pos.end.lineno = lineno;
1275         tp->t_op = JSOP_STRING;
1276         tp->t_atom = atom;
1277         tt = TOK_STRING;
1278         goto out;
1279     }
1280
1281     /*
1282      * This handles everything else.
1283      */
1284
1285     switch (c) {
1286       case '\n': tt = TOK_EOL; goto eol_out;
1287       case ';':  tt = TOK_SEMI; break;
1288       case '[':  tt = TOK_LB; break;
1289       case ']':  tt = TOK_RB; break;
1290       case '{':  tt = TOK_LC; break;
1291       case '}':  tt = TOK_RC; break;
1292       case '(':  tt = TOK_LP; break;
1293       case ')':  tt = TOK_RP; break;
1294       case ',':  tt = TOK_COMMA; break;
1295       case '?':  tt = TOK_HOOK; break;
1296
1297       case '.':
1298 #if JS_HAS_XML_SUPPORT
1299         if (matchChar(c))
1300             tt = TOK_DBLDOT;
1301         else
1302 #endif
1303             tt = TOK_DOT;
1304         break;
1305
1306       case ':':
1307 #if JS_HAS_XML_SUPPORT
1308         if (matchChar(c)) {
1309             tt = TOK_DBLCOLON;
1310             break;
1311         }
1312 #endif
1313         /*
1314          * Default so compiler can modify to JSOP_GETTER if 'p getter: v' in an
1315          * object initializer, likewise for setter.
1316          */
1317         tp->t_op = JSOP_NOP;
1318         tt = TOK_COLON;
1319         break;
1320
1321       case '|':
1322         if (matchChar(c)) {
1323             tt = TOK_OR;
1324         } else if (matchChar('=')) {
1325             tp->t_op = JSOP_BITOR;
1326             tt = TOK_ASSIGN;
1327         } else {
1328             tt = TOK_BITOR;
1329         }
1330         break;
1331
1332       case '^':
1333         if (matchChar('=')) {
1334             tp->t_op = JSOP_BITXOR;
1335             tt = TOK_ASSIGN;
1336         } else {
1337             tt = TOK_BITXOR;
1338         }
1339         break;
1340
1341       case '&':
1342         if (matchChar(c)) {
1343             tt = TOK_AND;
1344         } else if (matchChar('=')) {
1345             tp->t_op = JSOP_BITAND;
1346             tt = TOK_ASSIGN;
1347         } else {
1348             tt = TOK_BITAND;
1349         }
1350         break;
1351
1352       case '=':
1353         if (matchChar(c)) {
1354             tp->t_op = matchChar(c) ? JSOP_STRICTEQ : JSOP_EQ;
1355             tt = TOK_EQOP;
1356         } else {
1357             tp->t_op = JSOP_NOP;
1358             tt = TOK_ASSIGN;
1359         }
1360         break;
1361
1362       case '!':
1363         if (matchChar('=')) {
1364             tp->t_op = matchChar('=') ? JSOP_STRICTNE : JSOP_NE;
1365             tt = TOK_EQOP;
1366         } else {
1367             tp->t_op = JSOP_NOT;
1368             tt = TOK_UNARYOP;
1369         }
1370         break;
1371
1372 #if JS_HAS_XML_SUPPORT
1373       case '@':
1374         tt = TOK_AT;
1375         break;
1376 #endif
1377
1378       case '<':
1379 #if JS_HAS_XML_SUPPORT
1380         /*
1381          * After much testing, it's clear that Postel's advice to protocol
1382          * designers ("be liberal in what you accept, and conservative in what
1383          * you send") invites a natural-law repercussion for JS as "protocol":
1384          *
1385          * "If you are liberal in what you accept, others will utterly fail to
1386          *  be conservative in what they send."
1387          *
1388          * Which means you will get <!-- comments to end of line in the middle
1389          * of .js files, and after if conditions whose then statements are on
1390          * the next line, and other wonders.  See at least the following bugs:
1391          * https://bugzilla.mozilla.org/show_bug.cgi?id=309242
1392          * https://bugzilla.mozilla.org/show_bug.cgi?id=309712
1393          * https://bugzilla.mozilla.org/show_bug.cgi?id=310993
1394          *
1395          * So without JSOPTION_XML, we changed around Firefox 1.5 never to scan
1396          * an XML comment or CDATA literal.  Instead, we always scan <! as the
1397          * start of an HTML comment hack to end of line, used since Netscape 2
1398          * to hide script tag content from script-unaware browsers.
1399          *
1400          * But this still leaves XML resources with certain internal structure
1401          * vulnerable to being loaded as script cross-origin, and some internal
1402          * data stolen, so for Firefox 3.5 and beyond, we reject programs whose
1403          * source consists only of XML literals. See:
1404          *
1405          * https://bugzilla.mozilla.org/show_bug.cgi?id=336551
1406          *
1407          * The check for this is in jsparse.cpp, Compiler::compileScript.
1408          */
1409         if ((flags & TSF_OPERAND) && (hasXML() || peekChar() != '!')) {
1410             /* Check for XML comment or CDATA section. */
1411             if (matchChar('!')) {
1412                 tokenbuf.clear();
1413
1414                 /* Scan XML comment. */
1415                 if (matchChar('-')) {
1416                     if (!matchChar('-'))
1417                         goto bad_xml_markup;
1418                     while ((c = getChar()) != '-' || !matchChar('-')) {
1419                         if (c == EOF)
1420                             goto bad_xml_markup;
1421                         if (!tokenbuf.append(c))
1422                             goto error;
1423                     }
1424                     tt = TOK_XMLCOMMENT;
1425                     tp->t_op = JSOP_XMLCOMMENT;
1426                     goto finish_xml_markup;
1427                 }
1428
1429                 /* Scan CDATA section. */
1430                 if (matchChar('[')) {
1431                     jschar cp[6];
1432                     if (peekChars(6, cp) &&
1433                         cp[0] == 'C' &&
1434                         cp[1] == 'D' &&
1435                         cp[2] == 'A' &&
1436                         cp[3] == 'T' &&
1437                         cp[4] == 'A' &&
1438                         cp[5] == '[') {
1439                         skipChars(6);
1440                         while ((c = getChar()) != ']' ||
1441                                !peekChars(2, cp) ||
1442                                cp[0] != ']' ||
1443                                cp[1] != '>') {
1444                             if (c == EOF)
1445                                 goto bad_xml_markup;
1446                             if (!tokenbuf.append(c))
1447                                 goto error;
1448                         }
1449                         getChar();            /* discard ] but not > */
1450                         tt = TOK_XMLCDATA;
1451                         tp->t_op = JSOP_XMLCDATA;
1452                         goto finish_xml_markup;
1453                     }
1454                     goto bad_xml_markup;
1455                 }
1456             }
1457
1458             /* Check for processing instruction. */
1459             if (matchChar('?')) {
1460                 inTarget = JS_TRUE;
1461                 targetLength = 0;
1462                 contentIndex = -1;
1463
1464                 tokenbuf.clear();
1465                 while ((c = getChar()) != '?' || peekChar() != '>') {
1466                     if (c == EOF)
1467                         goto bad_xml_markup;
1468                     if (inTarget) {
1469                         if (JS_ISXMLSPACE(c)) {
1470                             if (tokenbuf.empty())
1471                                 goto bad_xml_markup;
1472                             inTarget = JS_FALSE;
1473                         } else {
1474                             if (!(tokenbuf.empty()
1475                                   ? JS_ISXMLNSSTART(c)
1476                                   : JS_ISXMLNS(c))) {
1477                                 goto bad_xml_markup;
1478                             }
1479                             ++targetLength;
1480                         }
1481                     } else {
1482                         if (contentIndex < 0 && !JS_ISXMLSPACE(c))
1483                             contentIndex = tokenbuf.length();
1484                     }
1485                     if (!tokenbuf.append(c))
1486                         goto error;
1487                 }
1488                 if (targetLength == 0)
1489                     goto bad_xml_markup;
1490                 if (contentIndex < 0) {
1491                     atom = cx->runtime->atomState.emptyAtom;
1492                 } else {
1493                     atom = js_AtomizeChars(cx, tokenbuf.begin() + contentIndex,
1494                                            tokenbuf.length() - contentIndex, 0);
1495                     if (!atom)
1496                         goto error;
1497                 }
1498                 tokenbuf.shrinkBy(tokenbuf.length() - targetLength);
1499                 tp->t_atom2 = atom;
1500                 tt = TOK_XMLPI;
1501
1502         finish_xml_markup:
1503                 if (!matchChar('>'))
1504                     goto bad_xml_markup;
1505                 atom = atomize(cx, tokenbuf);
1506                 if (!atom)
1507                     goto error;
1508                 tp->t_atom = atom;
1509                 tp->pos.end.lineno = lineno;
1510                 goto out;
1511             }
1512
1513             /* An XML start-of-tag character. */
1514             tt = matchChar('/') ? TOK_XMLETAGO : TOK_XMLSTAGO;
1515             goto out;
1516
1517         bad_xml_markup:
1518             ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_XML_MARKUP);
1519             goto error;
1520         }
1521 #endif /* JS_HAS_XML_SUPPORT */
1522
1523         /* NB: treat HTML begin-comment as comment-till-end-of-line */
1524         if (matchChar('!')) {
1525             if (matchChar('-')) {
1526                 if (matchChar('-')) {
1527                     flags |= TSF_IN_HTML_COMMENT;
1528                     goto skipline;
1529                 }
1530                 ungetChar('-');
1531             }
1532             ungetChar('!');
1533         }
1534         if (matchChar(c)) {
1535             tp->t_op = JSOP_LSH;
1536             tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1537         } else {
1538             tp->t_op = matchChar('=') ? JSOP_LE : JSOP_LT;
1539             tt = TOK_RELOP;
1540         }
1541         break;
1542
1543       case '>':
1544         if (matchChar(c)) {
1545             tp->t_op = matchChar(c) ? JSOP_URSH : JSOP_RSH;
1546             tt = matchChar('=') ? TOK_ASSIGN : TOK_SHOP;
1547         } else {
1548             tp->t_op = matchChar('=') ? JSOP_GE : JSOP_GT;
1549             tt = TOK_RELOP;
1550         }
1551         break;
1552
1553       case '*':
1554         tp->t_op = JSOP_MUL;
1555         tt = matchChar('=') ? TOK_ASSIGN : TOK_STAR;
1556         break;
1557
1558       case '/':
1559         if (matchChar('/')) {
1560             /*
1561              * Hack for source filters such as the Mozilla XUL preprocessor:
1562              * "//@line 123\n" sets the number of the *next* line after the
1563              * comment to 123.
1564              */
1565             if (cx->hasAtLineOption()) {
1566                 jschar cp[5];
1567                 uintN i, line, temp;
1568                 char filenameBuf[1024];
1569
1570                 if (peekChars(5, cp) &&
1571                     cp[0] == '@' &&
1572                     cp[1] == 'l' &&
1573                     cp[2] == 'i' &&
1574                     cp[3] == 'n' &&
1575                     cp[4] == 'e') {
1576                     skipChars(5);
1577                     while ((c = getChar()) != '\n' && ScanAsSpace((jschar)c))
1578                         continue;
1579                     if (JS7_ISDEC(c)) {
1580                         line = JS7_UNDEC(c);
1581                         while ((c = getChar()) != EOF && JS7_ISDEC(c)) {
1582                             temp = 10 * line + JS7_UNDEC(c);
1583                             if (temp < line) {
1584                                 /* Ignore overlarge line numbers. */
1585                                 goto skipline;
1586                             }
1587                             line = temp;
1588                         }
1589                         while (c != '\n' && ScanAsSpace((jschar)c))
1590                             c = getChar();
1591                         i = 0;
1592                         if (c == '"') {
1593                             while ((c = getChar()) != EOF && c != '"') {
1594                                 if (c == '\n') {
1595                                     ungetChar(c);
1596                                     goto skipline;
1597                                 }
1598                                 if ((c >> 8) != 0 || i >= sizeof filenameBuf - 1)
1599                                     goto skipline;
1600                                 filenameBuf[i++] = (char) c;
1601                             }
1602                             if (c == '"') {
1603                                 while ((c = getChar()) != '\n' &&
1604                                        ScanAsSpace((jschar)c)) {
1605                                     continue;
1606                                 }
1607                             }
1608                         }
1609                         filenameBuf[i] = '\0';
1610                         if (c == EOF || c == '\n') {
1611                             if (i > 0) {
1612                                 if (flags & TSF_OWNFILENAME)
1613                                     cx->free((void *) filename);
1614                                 filename = JS_strdup(cx, filenameBuf);
1615                                 if (!filename)
1616                                     goto error;
1617                                 flags |= TSF_OWNFILENAME;
1618                             }
1619                             lineno = line;
1620                         }
1621                     }
1622                     ungetChar(c);
1623                 }
1624             }
1625
1626   skipline:
1627             /* Optimize line skipping if we are not in an HTML comment. */
1628             if (flags & TSF_IN_HTML_COMMENT) {
1629                 while ((c = getChar()) != EOF && c != '\n') {
1630                     if (c == '-' && matchChar('-') && matchChar('>'))
1631                         flags &= ~TSF_IN_HTML_COMMENT;
1632                 }
1633             } else {
1634                 while ((c = getChar()) != EOF && c != '\n')
1635                     continue;
1636             }
1637             ungetChar(c);
1638             cursor = (cursor - 1) & ntokensMask;
1639             goto retry;
1640         }
1641
1642         if (matchChar('*')) {
1643             uintN linenoBefore = lineno;
1644             while ((c = getChar()) != EOF &&
1645                    !(c == '*' && matchChar('/'))) {
1646                 /* Ignore all characters until comment close. */
1647             }
1648             if (c == EOF) {
1649                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1650                                          JSMSG_UNTERMINATED_COMMENT);
1651                 goto error;
1652             }
1653             if ((flags & TSF_NEWLINES) && linenoBefore != lineno) {
1654                 flags &= ~TSF_DIRTYLINE;
1655                 tt = TOK_EOL;
1656                 goto eol_out;
1657             }
1658             cursor = (cursor - 1) & ntokensMask;
1659             goto retry;
1660         }
1661
1662         if (flags & TSF_OPERAND) {
1663             uintN reflags, length;
1664             JSBool inCharClass = JS_FALSE;
1665
1666             tokenbuf.clear();
1667             for (;;) {
1668                 c = getChar();
1669                 if (c == '\\') {
1670                     if (!tokenbuf.append(c))
1671                         goto error;
1672                     c = getChar();
1673                 } else if (c == '[') {
1674                     inCharClass = JS_TRUE;
1675                 } else if (c == ']') {
1676                     inCharClass = JS_FALSE;
1677                 } else if (c == '/' && !inCharClass) {
1678                     /* For compat with IE, allow unescaped / in char classes. */
1679                     break;
1680                 }
1681                 if (c == '\n' || c == EOF) {
1682                     ungetChar(c);
1683                     ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR,
1684                                              JSMSG_UNTERMINATED_REGEXP);
1685                     goto error;
1686                 }
1687                 if (!tokenbuf.append(c))
1688                     goto error;
1689             }
1690             for (reflags = 0, length = tokenbuf.length() + 1; ; length++) {
1691                 c = peekChar();
1692                 if (c == 'g' && !(reflags & JSREG_GLOB))
1693                     reflags |= JSREG_GLOB;
1694                 else if (c == 'i' && !(reflags & JSREG_FOLD))
1695                     reflags |= JSREG_FOLD;
1696                 else if (c == 'm' && !(reflags & JSREG_MULTILINE))
1697                     reflags |= JSREG_MULTILINE;
1698                 else if (c == 'y' && !(reflags & JSREG_STICKY))
1699                     reflags |= JSREG_STICKY;
1700                 else
1701                     break;
1702                 getChar();
1703             }
1704             c = peekChar();
1705             if (JS7_ISLET(c)) {
1706                 char buf[2] = { '\0' };
1707                 tp->pos.begin.index += length + 1;
1708                 buf[0] = (char)c;
1709                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_BAD_REGEXP_FLAG,
1710                                          buf);
1711                 (void) getChar();
1712                 goto error;
1713             }
1714             tp->t_reflags = reflags;
1715             tt = TOK_REGEXP;
1716             break;
1717         }
1718
1719         tp->t_op = JSOP_DIV;
1720         tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1721         break;
1722
1723       case '%':
1724         tp->t_op = JSOP_MOD;
1725         tt = matchChar('=') ? TOK_ASSIGN : TOK_DIVOP;
1726         break;
1727
1728       case '~':
1729         tp->t_op = JSOP_BITNOT;
1730         tt = TOK_UNARYOP;
1731         break;
1732
1733       case '+':
1734         if (matchChar('=')) {
1735             tp->t_op = JSOP_ADD;
1736             tt = TOK_ASSIGN;
1737         } else if (matchChar(c)) {
1738             tt = TOK_INC;
1739         } else {
1740             tp->t_op = JSOP_POS;
1741             tt = TOK_PLUS;
1742         }
1743         break;
1744
1745       case '-':
1746         if (matchChar('=')) {
1747             tp->t_op = JSOP_SUB;
1748             tt = TOK_ASSIGN;
1749         } else if (matchChar(c)) {
1750             if (peekChar() == '>' && !(flags & TSF_DIRTYLINE)) {
1751                 flags &= ~TSF_IN_HTML_COMMENT;
1752                 goto skipline;
1753             }
1754             tt = TOK_DEC;
1755         } else {
1756             tp->t_op = JSOP_NEG;
1757             tt = TOK_MINUS;
1758         }
1759         break;
1760
1761 #if JS_HAS_SHARP_VARS
1762       case '#':
1763       {
1764         uint32 n;
1765
1766         c = getChar();
1767         if (!JS7_ISDEC(c)) {
1768             ungetChar(c);
1769             goto badchar;
1770         }
1771         n = (uint32)JS7_UNDEC(c);
1772         for (;;) {
1773             c = getChar();
1774             if (!JS7_ISDEC(c))
1775                 break;
1776             n = 10 * n + JS7_UNDEC(c);
1777             if (n >= UINT16_LIMIT) {
1778                 ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_SHARPVAR_TOO_BIG);
1779                 goto error;
1780             }
1781         }
1782         tp->t_dval = (jsdouble) n;
1783         if (cx->hasStrictOption() &&
1784             (c == '=' || c == '#')) {
1785             char buf[20];
1786             JS_snprintf(buf, sizeof buf, "#%u%c", n, c);
1787             if (!ReportCompileErrorNumber(cx, this, NULL, JSREPORT_WARNING | JSREPORT_STRICT,
1788                                           JSMSG_DEPRECATED_USAGE, buf)) {
1789                 goto error;
1790             }
1791         }
1792         if (c == '=')
1793             tt = TOK_DEFSHARP;
1794         else if (c == '#')
1795             tt = TOK_USESHARP;
1796         else
1797             goto badchar;
1798         break;
1799       }
1800 #endif /* JS_HAS_SHARP_VARS */
1801
1802 #if JS_HAS_SHARP_VARS || JS_HAS_XML_SUPPORT
1803       badchar:
1804 #endif
1805
1806       default:
1807         ReportCompileErrorNumber(cx, this, NULL, JSREPORT_ERROR, JSMSG_ILLEGAL_CHARACTER);
1808         goto error;
1809     }
1810
1811   out:
1812     JS_ASSERT(tt != TOK_EOL);
1813     flags |= TSF_DIRTYLINE;
1814
1815   eol_out:
1816     JS_ASSERT(tt < TOK_LIMIT);
1817     tp->pos.end.index = userbuf.ptr - linebase;
1818     tp->type = tt;
1819     return tt;
1820
1821   error:
1822     tt = TOK_ERROR;
1823     flags |= TSF_ERROR;
1824     goto out;
1825 }
1826