src/v8/test/cctest/test-parsing.cc

   1 // Copyright 2012 the V8 project authors. All rights reserved.
   2 // Redistribution and use in source and binary forms, with or without
   3 // modification, are permitted provided that the following conditions are
   4 // met:
   5 //
   6 //     * Redistributions of source code must retain the above copyright
   7 //       notice, this list of conditions and the following disclaimer.
   8 //     * Redistributions in binary form must reproduce the above
   9 //       copyright notice, this list of conditions and the following
  10 //       disclaimer in the documentation and/or other materials provided
  11 //       with the distribution.
  12 //     * Neither the name of Google Inc. nor the names of its
  13 //       contributors may be used to endorse or promote products derived
  14 //       from this software without specific prior written permission.
  15 //
  16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  27
  28 #include <stdlib.h>
  29 #include <stdio.h>
  30 #include <string.h>
  31
  32 #include "v8.h"
  33
  34 #include "cctest.h"
  35 #include "compiler.h"
  36 #include "execution.h"
  37 #include "isolate.h"
  38 #include "parser.h"
  39 #include "preparser.h"
  40 #include "scanner-character-streams.h"
  41 #include "token.h"
  42 #include "utils.h"
  43
  44 TEST(ScanKeywords) {
  45   struct KeywordToken {
  46     const char* keyword;
  47     i::Token::Value token;
  48   };
  49
  50   static const KeywordToken keywords[] = {
  51 #define KEYWORD(t, s, d) { s, i::Token::t },
  52       TOKEN_LIST(IGNORE_TOKEN, KEYWORD)
  53 #undef KEYWORD
  54       { NULL, i::Token::IDENTIFIER }
  55   };
  56
  57   KeywordToken key_token;
  58   i::UnicodeCache unicode_cache;
  59   i::byte buffer[32];
  60   for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
  61     const i::byte* keyword =
  62         reinterpret_cast<const i::byte*>(key_token.keyword);
  63     int length = i::StrLength(key_token.keyword);
  64     CHECK(static_cast<int>(sizeof(buffer)) >= length);
  65     {
  66       i::Utf8ToUtf16CharacterStream stream(keyword, length);
  67       i::Scanner scanner(&unicode_cache);
  68       // The scanner should parse Harmony keywords for this test.
  69       scanner.SetHarmonyScoping(true);
  70       scanner.SetHarmonyModules(true);
  71       scanner.Initialize(&stream);
  72       CHECK_EQ(key_token.token, scanner.Next());
  73       CHECK_EQ(i::Token::EOS, scanner.Next());
  74     }
  75     // Removing characters will make keyword matching fail.
  76     {
  77       i::Utf8ToUtf16CharacterStream stream(keyword, length - 1);
  78       i::Scanner scanner(&unicode_cache);
  79       scanner.Initialize(&stream);
  80       CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
  81       CHECK_EQ(i::Token::EOS, scanner.Next());
  82     }
  83     // Adding characters will make keyword matching fail.
  84     static const char chars_to_append[] = { 'z', '0', '_' };
  85     for (int j = 0; j < static_cast<int>(ARRAY_SIZE(chars_to_append)); ++j) {
  86       i::OS::MemMove(buffer, keyword, length);
  87       buffer[length] = chars_to_append[j];
  88       i::Utf8ToUtf16CharacterStream stream(buffer, length + 1);
  89       i::Scanner scanner(&unicode_cache);
  90       scanner.Initialize(&stream);
  91       CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
  92       CHECK_EQ(i::Token::EOS, scanner.Next());
  93     }
  94     // Replacing characters will make keyword matching fail.
  95     {
  96       i::OS::MemMove(buffer, keyword, length);
  97       buffer[length - 1] = '_';
  98       i::Utf8ToUtf16CharacterStream stream(buffer, length);
  99       i::Scanner scanner(&unicode_cache);
 100       scanner.Initialize(&stream);
 101       CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
 102       CHECK_EQ(i::Token::EOS, scanner.Next());
 103     }
 104   }
 105 }
 106
 107
 108 TEST(ScanHTMLEndComments) {
 109   v8::V8::Initialize();
 110   v8::Isolate* isolate = CcTest::isolate();
 111   v8::HandleScope handles(isolate);
 112
 113   // Regression test. See:
 114   //    http://code.google.com/p/chromium/issues/detail?id=53548
 115   // Tests that --> is correctly interpreted as comment-to-end-of-line if there
 116   // is only whitespace before it on the line (with comments considered as
 117   // whitespace, even a multiline-comment containing a newline).
 118   // This was not the case if it occurred before the first real token
 119   // in the input.
 120   const char* tests[] = {
 121       // Before first real token.
 122       "--> is eol-comment\nvar y = 37;\n",
 123       "\n --> is eol-comment\nvar y = 37;\n",
 124       "/* precomment */ --> is eol-comment\nvar y = 37;\n",
 125       "\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
 126       // After first real token.
 127       "var x = 42;\n--> is eol-comment\nvar y = 37;\n",
 128       "var x = 42;\n/* precomment */ --> is eol-comment\nvar y = 37;\n",
 129       NULL
 130   };
 131
 132   const char* fail_tests[] = {
 133       "x --> is eol-comment\nvar y = 37;\n",
 134       "\"\\n\" --> is eol-comment\nvar y = 37;\n",
 135       "x/* precomment */ --> is eol-comment\nvar y = 37;\n",
 136       "x/* precomment\n */ --> is eol-comment\nvar y = 37;\n",
 137       "var x = 42; --> is eol-comment\nvar y = 37;\n",
 138       "var x = 42; /* precomment\n */ --> is eol-comment\nvar y = 37;\n",
 139       NULL
 140   };
 141
 142   // Parser/Scanner needs a stack limit.
 143   int marker;
 144   CcTest::i_isolate()->stack_guard()->SetStackLimit(
 145       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
 146
 147   for (int i = 0; tests[i]; i++) {
 148     v8::Handle<v8::String> source = v8::String::NewFromUtf8(
 149         isolate, tests[i], v8::String::kNormalString, i::StrLength(tests[i]));
 150     v8::ScriptData* data = v8::ScriptData::PreCompile(source);
 151     CHECK(data != NULL && !data->HasError());
 152     delete data;
 153   }
 154
 155   for (int i = 0; fail_tests[i]; i++) {
 156     v8::Handle<v8::String> source =
 157         v8::String::NewFromUtf8(isolate,
 158                                 fail_tests[i],
 159                                 v8::String::kNormalString,
 160                                 i::StrLength(fail_tests[i]));
 161     v8::ScriptData* data = v8::ScriptData::PreCompile(source);
 162     CHECK(data == NULL || data->HasError());
 163     delete data;
 164   }
 165 }
 166
 167
 168 class ScriptResource : public v8::String::ExternalAsciiStringResource {
 169  public:
 170   ScriptResource(const char* data, size_t length)
 171       : data_(data), length_(length) { }
 172
 173   const char* data() const { return data_; }
 174   size_t length() const { return length_; }
 175
 176  private:
 177   const char* data_;
 178   size_t length_;
 179 };
 180
 181
 182 TEST(Preparsing) {
 183   v8::Isolate* isolate = CcTest::isolate();
 184   v8::HandleScope handles(isolate);
 185   v8::Local<v8::Context> context = v8::Context::New(isolate);
 186   v8::Context::Scope context_scope(context);
 187   int marker;
 188   CcTest::i_isolate()->stack_guard()->SetStackLimit(
 189       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
 190
 191   // Source containing functions that might be lazily compiled  and all types
 192   // of symbols (string, propertyName, regexp).
 193   const char* source =
 194       "var x = 42;"
 195       "function foo(a) { return function nolazy(b) { return a + b; } }"
 196       "function bar(a) { if (a) return function lazy(b) { return b; } }"
 197       "var z = {'string': 'string literal', bareword: 'propertyName', "
 198       "         42: 'number literal', for: 'keyword as propertyName', "
 199       "         f\\u006fr: 'keyword propertyname with escape'};"
 200       "var v = /RegExp Literal/;"
 201       "var w = /RegExp Literal\\u0020With Escape/gin;"
 202       "var y = { get getter() { return 42; }, "
 203       "          set setter(v) { this.value = v; }};";
 204   int source_length = i::StrLength(source);
 205   const char* error_source = "var x = y z;";
 206   int error_source_length = i::StrLength(error_source);
 207
 208   v8::ScriptData* preparse = v8::ScriptData::PreCompile(v8::String::NewFromUtf8(
 209       isolate, source, v8::String::kNormalString, source_length));
 210   CHECK(!preparse->HasError());
 211   bool lazy_flag = i::FLAG_lazy;
 212   {
 213     i::FLAG_lazy = true;
 214     ScriptResource* resource = new ScriptResource(source, source_length);
 215     v8::Local<v8::String> script_source =
 216         v8::String::NewExternal(isolate, resource);
 217     v8::Script::Compile(script_source, NULL, preparse);
 218   }
 219
 220   {
 221     i::FLAG_lazy = false;
 222
 223     ScriptResource* resource = new ScriptResource(source, source_length);
 224     v8::Local<v8::String> script_source =
 225         v8::String::NewExternal(isolate, resource);
 226     v8::Script::New(script_source, NULL, preparse, v8::Local<v8::String>());
 227   }
 228   delete preparse;
 229   i::FLAG_lazy = lazy_flag;
 230
 231   // Syntax error.
 232   v8::ScriptData* error_preparse = v8::ScriptData::PreCompile(
 233       v8::String::NewFromUtf8(isolate,
 234                               error_source,
 235                               v8::String::kNormalString,
 236                               error_source_length));
 237   CHECK(error_preparse->HasError());
 238   i::ScriptDataImpl *pre_impl =
 239       reinterpret_cast<i::ScriptDataImpl*>(error_preparse);
 240   i::Scanner::Location error_location =
 241       pre_impl->MessageLocation();
 242   // Error is at "z" in source, location 10..11.
 243   CHECK_EQ(10, error_location.beg_pos);
 244   CHECK_EQ(11, error_location.end_pos);
 245   // Should not crash.
 246   const char* message = pre_impl->BuildMessage();
 247   pre_impl->BuildArgs();
 248   CHECK_GT(strlen(message), 0);
 249 }
 250
 251
 252 TEST(StandAlonePreParser) {
 253   v8::V8::Initialize();
 254
 255   int marker;
 256   CcTest::i_isolate()->stack_guard()->SetStackLimit(
 257       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
 258
 259   const char* programs[] = {
 260       "{label: 42}",
 261       "var x = 42;",
 262       "function foo(x, y) { return x + y; }",
 263       "%ArgleBargle(glop);",
 264       "var x = new new Function('this.x = 42');",
 265       NULL
 266   };
 267
 268   uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
 269   for (int i = 0; programs[i]; i++) {
 270     const char* program = programs[i];
 271     i::Utf8ToUtf16CharacterStream stream(
 272         reinterpret_cast<const i::byte*>(program),
 273         static_cast<unsigned>(strlen(program)));
 274     i::CompleteParserRecorder log;
 275     i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
 276     scanner.Initialize(&stream);
 277
 278     i::PreParser preparser(&scanner, &log, stack_limit);
 279     preparser.set_allow_lazy(true);
 280     preparser.set_allow_natives_syntax(true);
 281     i::PreParser::PreParseResult result = preparser.PreParseProgram();
 282     CHECK_EQ(i::PreParser::kPreParseSuccess, result);
 283     i::ScriptDataImpl data(log.ExtractData());
 284     CHECK(!data.has_error());
 285   }
 286 }
 287
 288
 289 TEST(StandAlonePreParserNoNatives) {
 290   v8::V8::Initialize();
 291
 292   int marker;
 293   CcTest::i_isolate()->stack_guard()->SetStackLimit(
 294       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
 295
 296   const char* programs[] = {
 297       "%ArgleBargle(glop);",
 298       "var x = %_IsSmi(42);",
 299       NULL
 300   };
 301
 302   uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
 303   for (int i = 0; programs[i]; i++) {
 304     const char* program = programs[i];
 305     i::Utf8ToUtf16CharacterStream stream(
 306         reinterpret_cast<const i::byte*>(program),
 307         static_cast<unsigned>(strlen(program)));
 308     i::CompleteParserRecorder log;
 309     i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
 310     scanner.Initialize(&stream);
 311
 312     // Preparser defaults to disallowing natives syntax.
 313     i::PreParser preparser(&scanner, &log, stack_limit);
 314     preparser.set_allow_lazy(true);
 315     i::PreParser::PreParseResult result = preparser.PreParseProgram();
 316     CHECK_EQ(i::PreParser::kPreParseSuccess, result);
 317     i::ScriptDataImpl data(log.ExtractData());
 318     // Data contains syntax error.
 319     CHECK(data.has_error());
 320   }
 321 }
 322
 323
 324 TEST(RegressChromium62639) {
 325   v8::V8::Initialize();
 326   i::Isolate* isolate = CcTest::i_isolate();
 327
 328   int marker;
 329   isolate->stack_guard()->SetStackLimit(
 330       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
 331
 332   const char* program = "var x = 'something';\n"
 333                         "escape: function() {}";
 334   // Fails parsing expecting an identifier after "function".
 335   // Before fix, didn't check *ok after Expect(Token::Identifier, ok),
 336   // and then used the invalid currently scanned literal. This always
 337   // failed in debug mode, and sometimes crashed in release mode.
 338
 339   i::Utf8ToUtf16CharacterStream stream(
 340       reinterpret_cast<const i::byte*>(program),
 341       static_cast<unsigned>(strlen(program)));
 342   i::ScriptDataImpl* data = i::PreParserApi::PreParse(isolate, &stream);
 343   CHECK(data->HasError());
 344   delete data;
 345 }
 346
 347
 348 TEST(Regress928) {
 349   v8::V8::Initialize();
 350   i::Isolate* isolate = CcTest::i_isolate();
 351   i::Factory* factory = isolate->factory();
 352
 353   // Preparsing didn't consider the catch clause of a try statement
 354   // as with-content, which made it assume that a function inside
 355   // the block could be lazily compiled, and an extra, unexpected,
 356   // entry was added to the data.
 357   int marker;
 358   isolate->stack_guard()->SetStackLimit(
 359       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
 360
 361   const char* program =
 362       "try { } catch (e) { var foo = function () { /* first */ } }"
 363       "var bar = function () { /* second */ }";
 364
 365   v8::HandleScope handles(CcTest::isolate());
 366   i::Handle<i::String> source(
 367       factory->NewStringFromAscii(i::CStrVector(program)));
 368   i::GenericStringUtf16CharacterStream stream(source, 0, source->length());
 369   i::ScriptDataImpl* data = i::PreParserApi::PreParse(isolate, &stream);
 370   CHECK(!data->HasError());
 371
 372   data->Initialize();
 373
 374   int first_function =
 375       static_cast<int>(strstr(program, "function") - program);
 376   int first_lbrace = first_function + i::StrLength("function () ");
 377   CHECK_EQ('{', program[first_lbrace]);
 378   i::FunctionEntry entry1 = data->GetFunctionEntry(first_lbrace);
 379   CHECK(!entry1.is_valid());
 380
 381   int second_function =
 382       static_cast<int>(strstr(program + first_lbrace, "function") - program);
 383   int second_lbrace =
 384       second_function + i::StrLength("function () ");
 385   CHECK_EQ('{', program[second_lbrace]);
 386   i::FunctionEntry entry2 = data->GetFunctionEntry(second_lbrace);
 387   CHECK(entry2.is_valid());
 388   CHECK_EQ('}', program[entry2.end_pos() - 1]);
 389   delete data;
 390 }
 391
 392
 393 TEST(PreParseOverflow) {
 394   v8::V8::Initialize();
 395
 396   int marker;
 397   CcTest::i_isolate()->stack_guard()->SetStackLimit(
 398       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
 399
 400   size_t kProgramSize = 1024 * 1024;
 401   i::SmartArrayPointer<char> program(i::NewArray<char>(kProgramSize + 1));
 402   memset(program.get(), '(', kProgramSize);
 403   program[kProgramSize] = '\0';
 404
 405   uintptr_t stack_limit = CcTest::i_isolate()->stack_guard()->real_climit();
 406
 407   i::Utf8ToUtf16CharacterStream stream(
 408       reinterpret_cast<const i::byte*>(program.get()),
 409       static_cast<unsigned>(kProgramSize));
 410   i::CompleteParserRecorder log;
 411   i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
 412   scanner.Initialize(&stream);
 413
 414   i::PreParser preparser(&scanner, &log, stack_limit);
 415   preparser.set_allow_lazy(true);
 416   i::PreParser::PreParseResult result = preparser.PreParseProgram();
 417   CHECK_EQ(i::PreParser::kPreParseStackOverflow, result);
 418 }
 419
 420
 421 class TestExternalResource: public v8::String::ExternalStringResource {
 422  public:
 423   explicit TestExternalResource(uint16_t* data, int length)
 424       : data_(data), length_(static_cast<size_t>(length)) { }
 425
 426   ~TestExternalResource() { }
 427
 428   const uint16_t* data() const {
 429     return data_;
 430   }
 431
 432   size_t length() const {
 433     return length_;
 434   }
 435  private:
 436   uint16_t* data_;
 437   size_t length_;
 438 };
 439
 440
 441 #define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
 442
 443 void TestCharacterStream(const char* ascii_source,
 444                          unsigned length,
 445                          unsigned start = 0,
 446                          unsigned end = 0) {
 447   if (end == 0) end = length;
 448   unsigned sub_length = end - start;
 449   i::Isolate* isolate = CcTest::i_isolate();
 450   i::Factory* factory = isolate->factory();
 451   i::HandleScope test_scope(isolate);
 452   i::SmartArrayPointer<i::uc16> uc16_buffer(new i::uc16[length]);
 453   for (unsigned i = 0; i < length; i++) {
 454     uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
 455   }
 456   i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
 457   i::Handle<i::String> ascii_string(
 458       factory->NewStringFromAscii(ascii_vector));
 459   TestExternalResource resource(uc16_buffer.get(), length);
 460   i::Handle<i::String> uc16_string(
 461       factory->NewExternalStringFromTwoByte(&resource));
 462
 463   i::ExternalTwoByteStringUtf16CharacterStream uc16_stream(
 464       i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
 465   i::GenericStringUtf16CharacterStream string_stream(ascii_string, start, end);
 466   i::Utf8ToUtf16CharacterStream utf8_stream(
 467       reinterpret_cast<const i::byte*>(ascii_source), end);
 468   utf8_stream.SeekForward(start);
 469
 470   unsigned i = start;
 471   while (i < end) {
 472     // Read streams one char at a time
 473     CHECK_EQU(i, uc16_stream.pos());
 474     CHECK_EQU(i, string_stream.pos());
 475     CHECK_EQU(i, utf8_stream.pos());
 476     int32_t c0 = ascii_source[i];
 477     int32_t c1 = uc16_stream.Advance();
 478     int32_t c2 = string_stream.Advance();
 479     int32_t c3 = utf8_stream.Advance();
 480     i++;
 481     CHECK_EQ(c0, c1);
 482     CHECK_EQ(c0, c2);
 483     CHECK_EQ(c0, c3);
 484     CHECK_EQU(i, uc16_stream.pos());
 485     CHECK_EQU(i, string_stream.pos());
 486     CHECK_EQU(i, utf8_stream.pos());
 487   }
 488   while (i > start + sub_length / 4) {
 489     // Pushback, re-read, pushback again.
 490     int32_t c0 = ascii_source[i - 1];
 491     CHECK_EQU(i, uc16_stream.pos());
 492     CHECK_EQU(i, string_stream.pos());
 493     CHECK_EQU(i, utf8_stream.pos());
 494     uc16_stream.PushBack(c0);
 495     string_stream.PushBack(c0);
 496     utf8_stream.PushBack(c0);
 497     i--;
 498     CHECK_EQU(i, uc16_stream.pos());
 499     CHECK_EQU(i, string_stream.pos());
 500     CHECK_EQU(i, utf8_stream.pos());
 501     int32_t c1 = uc16_stream.Advance();
 502     int32_t c2 = string_stream.Advance();
 503     int32_t c3 = utf8_stream.Advance();
 504     i++;
 505     CHECK_EQU(i, uc16_stream.pos());
 506     CHECK_EQU(i, string_stream.pos());
 507     CHECK_EQU(i, utf8_stream.pos());
 508     CHECK_EQ(c0, c1);
 509     CHECK_EQ(c0, c2);
 510     CHECK_EQ(c0, c3);
 511     uc16_stream.PushBack(c0);
 512     string_stream.PushBack(c0);
 513     utf8_stream.PushBack(c0);
 514     i--;
 515     CHECK_EQU(i, uc16_stream.pos());
 516     CHECK_EQU(i, string_stream.pos());
 517     CHECK_EQU(i, utf8_stream.pos());
 518   }
 519   unsigned halfway = start + sub_length / 2;
 520   uc16_stream.SeekForward(halfway - i);
 521   string_stream.SeekForward(halfway - i);
 522   utf8_stream.SeekForward(halfway - i);
 523   i = halfway;
 524   CHECK_EQU(i, uc16_stream.pos());
 525   CHECK_EQU(i, string_stream.pos());
 526   CHECK_EQU(i, utf8_stream.pos());
 527
 528   while (i < end) {
 529     // Read streams one char at a time
 530     CHECK_EQU(i, uc16_stream.pos());
 531     CHECK_EQU(i, string_stream.pos());
 532     CHECK_EQU(i, utf8_stream.pos());
 533     int32_t c0 = ascii_source[i];
 534     int32_t c1 = uc16_stream.Advance();
 535     int32_t c2 = string_stream.Advance();
 536     int32_t c3 = utf8_stream.Advance();
 537     i++;
 538     CHECK_EQ(c0, c1);
 539     CHECK_EQ(c0, c2);
 540     CHECK_EQ(c0, c3);
 541     CHECK_EQU(i, uc16_stream.pos());
 542     CHECK_EQU(i, string_stream.pos());
 543     CHECK_EQU(i, utf8_stream.pos());
 544   }
 545
 546   int32_t c1 = uc16_stream.Advance();
 547   int32_t c2 = string_stream.Advance();
 548   int32_t c3 = utf8_stream.Advance();
 549   CHECK_LT(c1, 0);
 550   CHECK_LT(c2, 0);
 551   CHECK_LT(c3, 0);
 552 }
 553
 554
 555 TEST(CharacterStreams) {
 556   v8::Isolate* isolate = CcTest::isolate();
 557   v8::HandleScope handles(isolate);
 558   v8::Local<v8::Context> context = v8::Context::New(isolate);
 559   v8::Context::Scope context_scope(context);
 560
 561   TestCharacterStream("abc\0\n\r\x7f", 7);
 562   static const unsigned kBigStringSize = 4096;
 563   char buffer[kBigStringSize + 1];
 564   for (unsigned i = 0; i < kBigStringSize; i++) {
 565     buffer[i] = static_cast<char>(i & 0x7f);
 566   }
 567   TestCharacterStream(buffer, kBigStringSize);
 568
 569   TestCharacterStream(buffer, kBigStringSize, 576, 3298);
 570
 571   TestCharacterStream("\0", 1);
 572   TestCharacterStream("", 0);
 573 }
 574
 575
 576 TEST(Utf8CharacterStream) {
 577   static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
 578   static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
 579
 580   static const int kAllUtf8CharsSize =
 581       (unibrow::Utf8::kMaxOneByteChar + 1) +
 582       (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
 583       (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
 584   static const unsigned kAllUtf8CharsSizeU =
 585       static_cast<unsigned>(kAllUtf8CharsSize);
 586
 587   char buffer[kAllUtf8CharsSizeU];
 588   unsigned cursor = 0;
 589   for (int i = 0; i <= kMaxUC16Char; i++) {
 590     cursor += unibrow::Utf8::Encode(buffer + cursor,
 591                                     i,
 592                                     unibrow::Utf16::kNoPreviousCharacter);
 593   }
 594   ASSERT(cursor == kAllUtf8CharsSizeU);
 595
 596   i::Utf8ToUtf16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
 597                                        kAllUtf8CharsSizeU);
 598   for (int i = 0; i <= kMaxUC16Char; i++) {
 599     CHECK_EQU(i, stream.pos());
 600     int32_t c = stream.Advance();
 601     CHECK_EQ(i, c);
 602     CHECK_EQU(i + 1, stream.pos());
 603   }
 604   for (int i = kMaxUC16Char; i >= 0; i--) {
 605     CHECK_EQU(i + 1, stream.pos());
 606     stream.PushBack(i);
 607     CHECK_EQU(i, stream.pos());
 608   }
 609   int i = 0;
 610   while (stream.pos() < kMaxUC16CharU) {
 611     CHECK_EQU(i, stream.pos());
 612     unsigned progress = stream.SeekForward(12);
 613     i += progress;
 614     int32_t c = stream.Advance();
 615     if (i <= kMaxUC16Char) {
 616       CHECK_EQ(i, c);
 617     } else {
 618       CHECK_EQ(-1, c);
 619     }
 620     i += 1;
 621     CHECK_EQU(i, stream.pos());
 622   }
 623 }
 624
 625 #undef CHECK_EQU
 626
 627 void TestStreamScanner(i::Utf16CharacterStream* stream,
 628                        i::Token::Value* expected_tokens,
 629                        int skip_pos = 0,  // Zero means not skipping.
 630                        int skip_to = 0) {
 631   i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
 632   scanner.Initialize(stream);
 633
 634   int i = 0;
 635   do {
 636     i::Token::Value expected = expected_tokens[i];
 637     i::Token::Value actual = scanner.Next();
 638     CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
 639     if (scanner.location().end_pos == skip_pos) {
 640       scanner.SeekForward(skip_to);
 641     }
 642     i++;
 643   } while (expected_tokens[i] != i::Token::ILLEGAL);
 644 }
 645
 646
 647 TEST(StreamScanner) {
 648   v8::V8::Initialize();
 649
 650   const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
 651   i::Utf8ToUtf16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
 652                                         static_cast<unsigned>(strlen(str1)));
 653   i::Token::Value expectations1[] = {
 654       i::Token::LBRACE,
 655       i::Token::IDENTIFIER,
 656       i::Token::IDENTIFIER,
 657       i::Token::FOR,
 658       i::Token::COLON,
 659       i::Token::MUL,
 660       i::Token::DIV,
 661       i::Token::LT,
 662       i::Token::SUB,
 663       i::Token::IDENTIFIER,
 664       i::Token::EOS,
 665       i::Token::ILLEGAL
 666   };
 667   TestStreamScanner(&stream1, expectations1, 0, 0);
 668
 669   const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
 670   i::Utf8ToUtf16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
 671                                         static_cast<unsigned>(strlen(str2)));
 672   i::Token::Value expectations2[] = {
 673       i::Token::CASE,
 674       i::Token::DEFAULT,
 675       i::Token::CONST,
 676       i::Token::LBRACE,
 677       // Skipped part here
 678       i::Token::RBRACE,
 679       i::Token::DO,
 680       i::Token::EOS,
 681       i::Token::ILLEGAL
 682   };
 683   ASSERT_EQ('{', str2[19]);
 684   ASSERT_EQ('}', str2[37]);
 685   TestStreamScanner(&stream2, expectations2, 20, 37);
 686
 687   const char* str3 = "{}}}}";
 688   i::Token::Value expectations3[] = {
 689       i::Token::LBRACE,
 690       i::Token::RBRACE,
 691       i::Token::RBRACE,
 692       i::Token::RBRACE,
 693       i::Token::RBRACE,
 694       i::Token::EOS,
 695       i::Token::ILLEGAL
 696   };
 697   // Skip zero-four RBRACEs.
 698   for (int i = 0; i <= 4; i++) {
 699      expectations3[6 - i] = i::Token::ILLEGAL;
 700      expectations3[5 - i] = i::Token::EOS;
 701      i::Utf8ToUtf16CharacterStream stream3(
 702          reinterpret_cast<const i::byte*>(str3),
 703          static_cast<unsigned>(strlen(str3)));
 704      TestStreamScanner(&stream3, expectations3, 1, 1 + i);
 705   }
 706 }
 707
 708
 709 void TestScanRegExp(const char* re_source, const char* expected) {
 710   i::Utf8ToUtf16CharacterStream stream(
 711        reinterpret_cast<const i::byte*>(re_source),
 712        static_cast<unsigned>(strlen(re_source)));
 713   i::Scanner scanner(CcTest::i_isolate()->unicode_cache());
 714   scanner.Initialize(&stream);
 715
 716   i::Token::Value start = scanner.peek();
 717   CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV);
 718   CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV));
 719   scanner.Next();  // Current token is now the regexp literal.
 720   CHECK(scanner.is_literal_ascii());
 721   i::Vector<const char> actual = scanner.literal_ascii_string();
 722   for (int i = 0; i < actual.length(); i++) {
 723     CHECK_NE('\0', expected[i]);
 724     CHECK_EQ(expected[i], actual[i]);
 725   }
 726 }
 727
 728
 729 TEST(RegExpScanning) {
 730   v8::V8::Initialize();
 731
 732   // RegExp token with added garbage at the end. The scanner should only
 733   // scan the RegExp until the terminating slash just before "flipperwald".
 734   TestScanRegExp("/b/flipperwald", "b");
 735   // Incomplete escape sequences doesn't hide the terminating slash.
 736   TestScanRegExp("/\\x/flipperwald", "\\x");
 737   TestScanRegExp("/\\u/flipperwald", "\\u");
 738   TestScanRegExp("/\\u1/flipperwald", "\\u1");
 739   TestScanRegExp("/\\u12/flipperwald", "\\u12");
 740   TestScanRegExp("/\\u123/flipperwald", "\\u123");
 741   TestScanRegExp("/\\c/flipperwald", "\\c");
 742   TestScanRegExp("/\\c//flipperwald", "\\c");
 743   // Slashes inside character classes are not terminating.
 744   TestScanRegExp("/[/]/flipperwald", "[/]");
 745   TestScanRegExp("/[\\s-/]/flipperwald", "[\\s-/]");
 746   // Incomplete escape sequences inside a character class doesn't hide
 747   // the end of the character class.
 748   TestScanRegExp("/[\\c/]/flipperwald", "[\\c/]");
 749   TestScanRegExp("/[\\c]/flipperwald", "[\\c]");
 750   TestScanRegExp("/[\\x]/flipperwald", "[\\x]");
 751   TestScanRegExp("/[\\x1]/flipperwald", "[\\x1]");
 752   TestScanRegExp("/[\\u]/flipperwald", "[\\u]");
 753   TestScanRegExp("/[\\u1]/flipperwald", "[\\u1]");
 754   TestScanRegExp("/[\\u12]/flipperwald", "[\\u12]");
 755   TestScanRegExp("/[\\u123]/flipperwald", "[\\u123]");
 756   // Escaped ']'s wont end the character class.
 757   TestScanRegExp("/[\\]/]/flipperwald", "[\\]/]");
 758   // Escaped slashes are not terminating.
 759   TestScanRegExp("/\\//flipperwald", "\\/");
 760   // Starting with '=' works too.
 761   TestScanRegExp("/=/", "=");
 762   TestScanRegExp("/=?/", "=?");
 763 }
 764
 765
 766 static int Utf8LengthHelper(const char* s) {
 767   int len = i::StrLength(s);
 768   int character_length = len;
 769   for (int i = 0; i < len; i++) {
 770     unsigned char c = s[i];
 771     int input_offset = 0;
 772     int output_adjust = 0;
 773     if (c > 0x7f) {
 774       if (c < 0xc0) continue;
 775       if (c >= 0xf0) {
 776         if (c >= 0xf8) {
 777           // 5 and 6 byte UTF-8 sequences turn into a kBadChar for each UTF-8
 778           // byte.
 779           continue;  // Handle first UTF-8 byte.
 780         }
 781         if ((c & 7) == 0 && ((s[i + 1] & 0x30) == 0)) {
 782           // This 4 byte sequence could have been coded as a 3 byte sequence.
 783           // Record a single kBadChar for the first byte and continue.
 784           continue;
 785         }
 786         input_offset = 3;
 787         // 4 bytes of UTF-8 turn into 2 UTF-16 code units.
 788         character_length -= 2;
 789       } else if (c >= 0xe0) {
 790         if ((c & 0xf) == 0 && ((s[i + 1] & 0x20) == 0)) {
 791           // This 3 byte sequence could have been coded as a 2 byte sequence.
 792           // Record a single kBadChar for the first byte and continue.
 793           continue;
 794         }
 795         input_offset = 2;
 796         // 3 bytes of UTF-8 turn into 1 UTF-16 code unit.
 797         output_adjust = 2;
 798       } else {
 799         if ((c & 0x1e) == 0) {
 800           // This 2 byte sequence could have been coded as a 1 byte sequence.
 801           // Record a single kBadChar for the first byte and continue.
 802           continue;
 803         }
 804         input_offset = 1;
 805         // 2 bytes of UTF-8 turn into 1 UTF-16 code unit.
 806         output_adjust = 1;
 807       }
 808       bool bad = false;
 809       for (int j = 1; j <= input_offset; j++) {
 810         if ((s[i + j] & 0xc0) != 0x80) {
 811           // Bad UTF-8 sequence turns the first in the sequence into kBadChar,
 812           // which is a single UTF-16 code unit.
 813           bad = true;
 814           break;
 815         }
 816       }
 817       if (!bad) {
 818         i += input_offset;
 819         character_length -= output_adjust;
 820       }
 821     }
 822   }
 823   return character_length;
 824 }
 825
 826
 827 TEST(ScopePositions) {
 828   // Test the parser for correctly setting the start and end positions
 829   // of a scope. We check the scope positions of exactly one scope
 830   // nested in the global scope of a program. 'inner source' is the
 831   // source code that determines the part of the source belonging
 832   // to the nested scope. 'outer_prefix' and 'outer_suffix' are
 833   // parts of the source that belong to the global scope.
 834   struct SourceData {
 835     const char* outer_prefix;
 836     const char* inner_source;
 837     const char* outer_suffix;
 838     i::ScopeType scope_type;
 839     i::LanguageMode language_mode;
 840   };
 841
 842   const SourceData source_data[] = {
 843     { "  with ({}) ", "{ block; }", " more;", i::WITH_SCOPE, i::CLASSIC_MODE },
 844     { "  with ({}) ", "{ block; }", "; more;", i::WITH_SCOPE, i::CLASSIC_MODE },
 845     { "  with ({}) ", "{\n"
 846       "    block;\n"
 847       "  }", "\n"
 848       "  more;", i::WITH_SCOPE, i::CLASSIC_MODE },
 849     { "  with ({}) ", "statement;", " more;", i::WITH_SCOPE, i::CLASSIC_MODE },
 850     { "  with ({}) ", "statement", "\n"
 851       "  more;", i::WITH_SCOPE, i::CLASSIC_MODE },
 852     { "  with ({})\n"
 853       "    ", "statement;", "\n"
 854       "  more;", i::WITH_SCOPE, i::CLASSIC_MODE },
 855     { "  try {} catch ", "(e) { block; }", " more;",
 856       i::CATCH_SCOPE, i::CLASSIC_MODE },
 857     { "  try {} catch ", "(e) { block; }", "; more;",
 858       i::CATCH_SCOPE, i::CLASSIC_MODE },
 859     { "  try {} catch ", "(e) {\n"
 860       "    block;\n"
 861       "  }", "\n"
 862       "  more;", i::CATCH_SCOPE, i::CLASSIC_MODE },
 863     { "  try {} catch ", "(e) { block; }", " finally { block; } more;",
 864       i::CATCH_SCOPE, i::CLASSIC_MODE },
 865     { "  start;\n"
 866       "  ", "{ let block; }", " more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 867     { "  start;\n"
 868       "  ", "{ let block; }", "; more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 869     { "  start;\n"
 870       "  ", "{\n"
 871       "    let block;\n"
 872       "  }", "\n"
 873       "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 874     { "  start;\n"
 875       "  function fun", "(a,b) { infunction; }", " more;",
 876       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 877     { "  start;\n"
 878       "  function fun", "(a,b) {\n"
 879       "    infunction;\n"
 880       "  }", "\n"
 881       "  more;", i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 882     { "  (function fun", "(a,b) { infunction; }", ")();",
 883       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 884     { "  for ", "(let x = 1 ; x < 10; ++ x) { block; }", " more;",
 885       i::BLOCK_SCOPE, i::EXTENDED_MODE },
 886     { "  for ", "(let x = 1 ; x < 10; ++ x) { block; }", "; more;",
 887       i::BLOCK_SCOPE, i::EXTENDED_MODE },
 888     { "  for ", "(let x = 1 ; x < 10; ++ x) {\n"
 889       "    block;\n"
 890       "  }", "\n"
 891       "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 892     { "  for ", "(let x = 1 ; x < 10; ++ x) statement;", " more;",
 893       i::BLOCK_SCOPE, i::EXTENDED_MODE },
 894     { "  for ", "(let x = 1 ; x < 10; ++ x) statement", "\n"
 895       "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 896     { "  for ", "(let x = 1 ; x < 10; ++ x)\n"
 897       "    statement;", "\n"
 898       "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 899     { "  for ", "(let x in {}) { block; }", " more;",
 900       i::BLOCK_SCOPE, i::EXTENDED_MODE },
 901     { "  for ", "(let x in {}) { block; }", "; more;",
 902       i::BLOCK_SCOPE, i::EXTENDED_MODE },
 903     { "  for ", "(let x in {}) {\n"
 904       "    block;\n"
 905       "  }", "\n"
 906       "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 907     { "  for ", "(let x in {}) statement;", " more;",
 908       i::BLOCK_SCOPE, i::EXTENDED_MODE },
 909     { "  for ", "(let x in {}) statement", "\n"
 910       "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 911     { "  for ", "(let x in {})\n"
 912       "    statement;", "\n"
 913       "  more;", i::BLOCK_SCOPE, i::EXTENDED_MODE },
 914     // Check that 6-byte and 4-byte encodings of UTF-8 strings do not throw
 915     // the preparser off in terms of byte offsets.
 916     // 6 byte encoding.
 917     { "  'foo\355\240\201\355\260\211';\n"
 918       "  (function fun", "(a,b) { infunction; }", ")();",
 919       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 920     // 4 byte encoding.
 921     { "  'foo\360\220\220\212';\n"
 922       "  (function fun", "(a,b) { infunction; }", ")();",
 923       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 924     // 3 byte encoding of \u0fff.
 925     { "  'foo\340\277\277';\n"
 926       "  (function fun", "(a,b) { infunction; }", ")();",
 927       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 928     // Broken 6 byte encoding with missing last byte.
 929     { "  'foo\355\240\201\355\211';\n"
 930       "  (function fun", "(a,b) { infunction; }", ")();",
 931       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 932     // Broken 3 byte encoding of \u0fff with missing last byte.
 933     { "  'foo\340\277';\n"
 934       "  (function fun", "(a,b) { infunction; }", ")();",
 935       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 936     // Broken 3 byte encoding of \u0fff with missing 2 last bytes.
 937     { "  'foo\340';\n"
 938       "  (function fun", "(a,b) { infunction; }", ")();",
 939       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 940     // Broken 3 byte encoding of \u00ff should be a 2 byte encoding.
 941     { "  'foo\340\203\277';\n"
 942       "  (function fun", "(a,b) { infunction; }", ")();",
 943       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 944     // Broken 3 byte encoding of \u007f should be a 2 byte encoding.
 945     { "  'foo\340\201\277';\n"
 946       "  (function fun", "(a,b) { infunction; }", ")();",
 947       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 948     // Unpaired lead surrogate.
 949     { "  'foo\355\240\201';\n"
 950       "  (function fun", "(a,b) { infunction; }", ")();",
 951       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 952     // Unpaired lead surrogate where following code point is a 3 byte sequence.
 953     { "  'foo\355\240\201\340\277\277';\n"
 954       "  (function fun", "(a,b) { infunction; }", ")();",
 955       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 956     // Unpaired lead surrogate where following code point is a 4 byte encoding
 957     // of a trail surrogate.
 958     { "  'foo\355\240\201\360\215\260\211';\n"
 959       "  (function fun", "(a,b) { infunction; }", ")();",
 960       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 961     // Unpaired trail surrogate.
 962     { "  'foo\355\260\211';\n"
 963       "  (function fun", "(a,b) { infunction; }", ")();",
 964       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 965     // 2 byte encoding of \u00ff.
 966     { "  'foo\303\277';\n"
 967       "  (function fun", "(a,b) { infunction; }", ")();",
 968       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 969     // Broken 2 byte encoding of \u00ff with missing last byte.
 970     { "  'foo\303';\n"
 971       "  (function fun", "(a,b) { infunction; }", ")();",
 972       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 973     // Broken 2 byte encoding of \u007f should be a 1 byte encoding.
 974     { "  'foo\301\277';\n"
 975       "  (function fun", "(a,b) { infunction; }", ")();",
 976       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 977     // Illegal 5 byte encoding.
 978     { "  'foo\370\277\277\277\277';\n"
 979       "  (function fun", "(a,b) { infunction; }", ")();",
 980       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 981     // Illegal 6 byte encoding.
 982     { "  'foo\374\277\277\277\277\277';\n"
 983       "  (function fun", "(a,b) { infunction; }", ")();",
 984       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 985     // Illegal 0xfe byte
 986     { "  'foo\376\277\277\277\277\277\277';\n"
 987       "  (function fun", "(a,b) { infunction; }", ")();",
 988       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 989     // Illegal 0xff byte
 990     { "  'foo\377\277\277\277\277\277\277\277';\n"
 991       "  (function fun", "(a,b) { infunction; }", ")();",
 992       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 993     { "  'foo';\n"
 994       "  (function fun", "(a,b) { 'bar\355\240\201\355\260\213'; }", ")();",
 995       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 996     { "  'foo';\n"
 997       "  (function fun", "(a,b) { 'bar\360\220\220\214'; }", ")();",
 998       i::FUNCTION_SCOPE, i::CLASSIC_MODE },
 999     { NULL, NULL, NULL, i::EVAL_SCOPE, i::CLASSIC_MODE }
1000   };
1001
1002   i::Isolate* isolate = CcTest::i_isolate();
1003   i::Factory* factory = isolate->factory();
1004
1005   v8::HandleScope handles(CcTest::isolate());
1006   v8::Handle<v8::Context> context = v8::Context::New(CcTest::isolate());
1007   v8::Context::Scope context_scope(context);
1008
1009   int marker;
1010   isolate->stack_guard()->SetStackLimit(
1011       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
1012
1013   for (int i = 0; source_data[i].outer_prefix; i++) {
1014     int kPrefixLen = Utf8LengthHelper(source_data[i].outer_prefix);
1015     int kInnerLen = Utf8LengthHelper(source_data[i].inner_source);
1016     int kSuffixLen = Utf8LengthHelper(source_data[i].outer_suffix);
1017     int kPrefixByteLen = i::StrLength(source_data[i].outer_prefix);
1018     int kInnerByteLen = i::StrLength(source_data[i].inner_source);
1019     int kSuffixByteLen = i::StrLength(source_data[i].outer_suffix);
1020     int kProgramSize = kPrefixLen + kInnerLen + kSuffixLen;
1021     int kProgramByteSize = kPrefixByteLen + kInnerByteLen + kSuffixByteLen;
1022     i::Vector<char> program = i::Vector<char>::New(kProgramByteSize + 1);
1023     i::OS::SNPrintF(program, "%s%s%s",
1024                              source_data[i].outer_prefix,
1025                              source_data[i].inner_source,
1026                              source_data[i].outer_suffix);
1027
1028     // Parse program source.
1029     i::Handle<i::String> source(
1030         factory->NewStringFromUtf8(i::CStrVector(program.start())));
1031     CHECK_EQ(source->length(), kProgramSize);
1032     i::Handle<i::Script> script = factory->NewScript(source);
1033     i::CompilationInfoWithZone info(script);
1034     i::Parser parser(&info);
1035     parser.set_allow_lazy(true);
1036     parser.set_allow_harmony_scoping(true);
1037     info.MarkAsGlobal();
1038     info.SetLanguageMode(source_data[i].language_mode);
1039     parser.Parse();
1040     CHECK(info.function() != NULL);
1041
1042     // Check scope types and positions.
1043     i::Scope* scope = info.function()->scope();
1044     CHECK(scope->is_global_scope());
1045     CHECK_EQ(scope->start_position(), 0);
1046     CHECK_EQ(scope->end_position(), kProgramSize);
1047     CHECK_EQ(scope->inner_scopes()->length(), 1);
1048
1049     i::Scope* inner_scope = scope->inner_scopes()->at(0);
1050     CHECK_EQ(inner_scope->scope_type(), source_data[i].scope_type);
1051     CHECK_EQ(inner_scope->start_position(), kPrefixLen);
1052     // The end position of a token is one position after the last
1053     // character belonging to that token.
1054     CHECK_EQ(inner_scope->end_position(), kPrefixLen + kInnerLen);
1055   }
1056 }
1057
1058
1059 i::Handle<i::String> FormatMessage(i::ScriptDataImpl* data) {
1060   i::Isolate* isolate = CcTest::i_isolate();
1061   i::Factory* factory = isolate->factory();
1062   const char* message = data->BuildMessage();
1063   i::Handle<i::String> format = v8::Utils::OpenHandle(
1064       *v8::String::NewFromUtf8(CcTest::isolate(), message));
1065   i::Vector<const char*> args = data->BuildArgs();
1066   i::Handle<i::JSArray> args_array = factory->NewJSArray(args.length());
1067   for (int i = 0; i < args.length(); i++) {
1068     i::JSArray::SetElement(
1069         args_array, i, v8::Utils::OpenHandle(*v8::String::NewFromUtf8(
1070                                                   CcTest::isolate(), args[i])),
1071         NONE, i::kNonStrictMode);
1072   }
1073   i::Handle<i::JSObject> builtins(isolate->js_builtins_object());
1074   i::Handle<i::Object> format_fun =
1075       i::GetProperty(builtins, "FormatMessage");
1076   i::Handle<i::Object> arg_handles[] = { format, args_array };
1077   bool has_exception = false;
1078   i::Handle<i::Object> result = i::Execution::Call(
1079       isolate, format_fun, builtins, 2, arg_handles, &has_exception);
1080   CHECK(!has_exception);
1081   CHECK(result->IsString());
1082   for (int i = 0; i < args.length(); i++) {
1083     i::DeleteArray(args[i]);
1084   }
1085   i::DeleteArray(args.start());
1086   i::DeleteArray(message);
1087   return i::Handle<i::String>::cast(result);
1088 }
1089
1090
1091 enum ParserFlag {
1092   kAllowLazy,
1093   kAllowNativesSyntax,
1094   kAllowHarmonyScoping,
1095   kAllowModules,
1096   kAllowGenerators,
1097   kAllowForOf,
1098   kAllowHarmonyNumericLiterals
1099 };
1100
1101
1102 void SetParserFlags(i::ParserBase* parser, i::EnumSet<ParserFlag> flags) {
1103   parser->set_allow_lazy(flags.Contains(kAllowLazy));
1104   parser->set_allow_natives_syntax(flags.Contains(kAllowNativesSyntax));
1105   parser->set_allow_harmony_scoping(flags.Contains(kAllowHarmonyScoping));
1106   parser->set_allow_modules(flags.Contains(kAllowModules));
1107   parser->set_allow_generators(flags.Contains(kAllowGenerators));
1108   parser->set_allow_for_of(flags.Contains(kAllowForOf));
1109   parser->set_allow_harmony_numeric_literals(
1110       flags.Contains(kAllowHarmonyNumericLiterals));
1111 }
1112
1113
1114 void TestParserSyncWithFlags(i::Handle<i::String> source,
1115                              i::EnumSet<ParserFlag> flags) {
1116   i::Isolate* isolate = CcTest::i_isolate();
1117   i::Factory* factory = isolate->factory();
1118
1119   uintptr_t stack_limit = isolate->stack_guard()->real_climit();
1120
1121   // Preparse the data.
1122   i::CompleteParserRecorder log;
1123   {
1124     i::Scanner scanner(isolate->unicode_cache());
1125     i::GenericStringUtf16CharacterStream stream(source, 0, source->length());
1126     i::PreParser preparser(&scanner, &log, stack_limit);
1127     SetParserFlags(&preparser, flags);
1128     scanner.Initialize(&stream);
1129     i::PreParser::PreParseResult result = preparser.PreParseProgram();
1130     CHECK_EQ(i::PreParser::kPreParseSuccess, result);
1131   }
1132   i::ScriptDataImpl data(log.ExtractData());
1133
1134   // Parse the data
1135   i::FunctionLiteral* function;
1136   {
1137     i::Handle<i::Script> script = factory->NewScript(source);
1138     i::CompilationInfoWithZone info(script);
1139     i::Parser parser(&info);
1140     SetParserFlags(&parser, flags);
1141     info.MarkAsGlobal();
1142     parser.Parse();
1143     function = info.function();
1144   }
1145
1146   // Check that preparsing fails iff parsing fails.
1147   if (function == NULL) {
1148     // Extract exception from the parser.
1149     CHECK(isolate->has_pending_exception());
1150     i::MaybeObject* maybe_object = isolate->pending_exception();
1151     i::JSObject* exception = NULL;
1152     CHECK(maybe_object->To(&exception));
1153     i::Handle<i::JSObject> exception_handle(exception);
1154     i::Handle<i::String> message_string =
1155         i::Handle<i::String>::cast(i::GetProperty(exception_handle, "message"));
1156
1157     if (!data.has_error()) {
1158       i::OS::Print(
1159           "Parser failed on:\n"
1160           "\t%s\n"
1161           "with error:\n"
1162           "\t%s\n"
1163           "However, the preparser succeeded",
1164           source->ToCString().get(), message_string->ToCString().get());
1165       CHECK(false);
1166     }
1167     // Check that preparser and parser produce the same error.
1168     i::Handle<i::String> preparser_message = FormatMessage(&data);
1169     if (!message_string->Equals(*preparser_message)) {
1170       i::OS::Print(
1171           "Expected parser and preparser to produce the same error on:\n"
1172           "\t%s\n"
1173           "However, found the following error messages\n"
1174           "\tparser:    %s\n"
1175           "\tpreparser: %s\n",
1176           source->ToCString().get(),
1177           message_string->ToCString().get(),
1178           preparser_message->ToCString().get());
1179       CHECK(false);
1180     }
1181   } else if (data.has_error()) {
1182     i::OS::Print(
1183         "Preparser failed on:\n"
1184         "\t%s\n"
1185         "with error:\n"
1186         "\t%s\n"
1187         "However, the parser succeeded",
1188         source->ToCString().get(), FormatMessage(&data)->ToCString().get());
1189     CHECK(false);
1190   }
1191 }
1192
1193
1194 void TestParserSync(const char* source,
1195                     const ParserFlag* flag_list,
1196                     size_t flag_list_length) {
1197   i::Handle<i::String> str =
1198       CcTest::i_isolate()->factory()->NewStringFromAscii(i::CStrVector(source));
1199   for (int bits = 0; bits < (1 << flag_list_length); bits++) {
1200     i::EnumSet<ParserFlag> flags;
1201     for (size_t flag_index = 0; flag_index < flag_list_length; flag_index++) {
1202       if ((bits & (1 << flag_index)) != 0) flags.Add(flag_list[flag_index]);
1203     }
1204     TestParserSyncWithFlags(str, flags);
1205   }
1206 }
1207
1208
1209 TEST(ParserSync) {
1210   const char* context_data[][2] = {
1211     { "", "" },
1212     { "{", "}" },
1213     { "if (true) ", " else {}" },
1214     { "if (true) {} else ", "" },
1215     { "if (true) ", "" },
1216     { "do ", " while (false)" },
1217     { "while (false) ", "" },
1218     { "for (;;) ", "" },
1219     { "with ({})", "" },
1220     { "switch (12) { case 12: ", "}" },
1221     { "switch (12) { default: ", "}" },
1222     { "switch (12) { ", "case 12: }" },
1223     { "label2: ", "" },
1224     { NULL, NULL }
1225   };
1226
1227   const char* statement_data[] = {
1228     "{}",
1229     "var x",
1230     "var x = 1",
1231     "const x",
1232     "const x = 1",
1233     ";",
1234     "12",
1235     "if (false) {} else ;",
1236     "if (false) {} else {}",
1237     "if (false) {} else 12",
1238     "if (false) ;"
1239     "if (false) {}",
1240     "if (false) 12",
1241     "do {} while (false)",
1242     "for (;;) ;",
1243     "for (;;) {}",
1244     "for (;;) 12",
1245     "continue",
1246     "continue label",
1247     "continue\nlabel",
1248     "break",
1249     "break label",
1250     "break\nlabel",
1251     "return",
1252     "return  12",
1253     "return\n12",
1254     "with ({}) ;",
1255     "with ({}) {}",
1256     "with ({}) 12",
1257     "switch ({}) { default: }"
1258     "label3: "
1259     "throw",
1260     "throw  12",
1261     "throw\n12",
1262     "try {} catch(e) {}",
1263     "try {} finally {}",
1264     "try {} catch(e) {} finally {}",
1265     "debugger",
1266     NULL
1267   };
1268
1269   const char* termination_data[] = {
1270     "",
1271     ";",
1272     "\n",
1273     ";\n",
1274     "\n;",
1275     NULL
1276   };
1277
1278   v8::HandleScope handles(CcTest::isolate());
1279   v8::Handle<v8::Context> context = v8::Context::New(CcTest::isolate());
1280   v8::Context::Scope context_scope(context);
1281
1282   int marker;
1283   CcTest::i_isolate()->stack_guard()->SetStackLimit(
1284       reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
1285
1286   static const ParserFlag flags1[] = {
1287     kAllowLazy, kAllowHarmonyScoping, kAllowModules, kAllowGenerators,
1288     kAllowForOf
1289   };
1290   for (int i = 0; context_data[i][0] != NULL; ++i) {
1291     for (int j = 0; statement_data[j] != NULL; ++j) {
1292       for (int k = 0; termination_data[k] != NULL; ++k) {
1293         int kPrefixLen = i::StrLength(context_data[i][0]);
1294         int kStatementLen = i::StrLength(statement_data[j]);
1295         int kTerminationLen = i::StrLength(termination_data[k]);
1296         int kSuffixLen = i::StrLength(context_data[i][1]);
1297         int kProgramSize = kPrefixLen + kStatementLen + kTerminationLen
1298             + kSuffixLen + i::StrLength("label: for (;;) {  }");
1299
1300         // Plug the source code pieces together.
1301         i::ScopedVector<char> program(kProgramSize + 1);
1302         int length = i::OS::SNPrintF(program,
1303             "label: for (;;) { %s%s%s%s }",
1304             context_data[i][0],
1305             statement_data[j],
1306             termination_data[k],
1307             context_data[i][1]);
1308         CHECK(length == kProgramSize);
1309         TestParserSync(program.start(), flags1, ARRAY_SIZE(flags1));
1310       }
1311     }
1312   }
1313
1314   // Neither Harmony numeric literals nor our natives syntax have any
1315   // interaction with the flags above, so test these separately to reduce
1316   // the combinatorial explosion.
1317   static const ParserFlag flags2[] = { kAllowHarmonyNumericLiterals };
1318   TestParserSync("0o1234", flags2, ARRAY_SIZE(flags2));
1319   TestParserSync("0b1011", flags2, ARRAY_SIZE(flags2));
1320
1321   static const ParserFlag flags3[] = { kAllowNativesSyntax };
1322   TestParserSync("%DebugPrint(123)", flags3, ARRAY_SIZE(flags3));
1323 }
1324
1325
1326 TEST(PreparserStrictOctal) {
1327   // Test that syntax error caused by octal literal is reported correctly as
1328   // such (issue 2220).
1329   v8::internal::FLAG_min_preparse_length = 1;  // Force preparsing.
1330   v8::V8::Initialize();
1331   v8::HandleScope scope(CcTest::isolate());
1332   v8::Context::Scope context_scope(
1333       v8::Context::New(CcTest::isolate()));
1334   v8::TryCatch try_catch;
1335   const char* script =
1336       "\"use strict\";       \n"
1337       "a = function() {      \n"
1338       "  b = function() {    \n"
1339       "    01;               \n"
1340       "  };                  \n"
1341       "};                    \n";
1342   v8::Script::Compile(v8::String::NewFromUtf8(CcTest::isolate(), script));
1343   CHECK(try_catch.HasCaught());
1344   v8::String::Utf8Value exception(try_catch.Exception());
1345   CHECK_EQ("SyntaxError: Octal literals are not allowed in strict mode.",
1346            *exception);
1347 }