1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #ifndef V8_PREPARSER_H
29 #define V8_PREPARSER_H
45 // Preparsing checks a JavaScript program and emits preparse-data that helps
46 // a later parsing to be faster.
47 // See preparse-data-format.h for the data format.
49 // The PreParser checks that the syntax follows the grammar for JavaScript,
50 // and collects some information about the program along the way.
51 // The grammar check is only performed in order to understand the program
52 // sufficiently to deduce some information about it, that can be used
53 // to speed up later parsing. Finding errors is not the goal of pre-parsing,
54 // rather it is to speed up properly written and correct programs.
55 // That means that contextual checks (like a label being declared where
56 // it is used) are generally omitted.
58 namespace i = v8::internal;
60 class DuplicateFinder {
62 explicit DuplicateFinder(i::UnicodeCache* constants)
63 : unicode_constants_(constants),
67 int AddAsciiSymbol(i::Vector<const char> key, int value);
68 int AddUtf16Symbol(i::Vector<const uint16_t> key, int value);
69 // Add a a number literal by converting it (if necessary)
70 // to the string that ToString(ToNumber(literal)) would generate.
71 // and then adding that string with AddAsciiSymbol.
72 // This string is the actual value used as key in an object literal,
73 // and the one that must be different from the other keys.
74 int AddNumber(i::Vector<const char> key, int value);
77 int AddSymbol(i::Vector<const byte> key, bool is_ascii, int value);
78 // Backs up the key and its length in the backing store.
79 // The backup is stored with a base 127 encoding of the
80 // length (plus a bit saying whether the string is ASCII),
81 // followed by the bytes of the key.
82 byte* BackupKey(i::Vector<const byte> key, bool is_ascii);
84 // Compare two encoded keys (both pointing into the backing store)
85 // for having the same base-127 encoded lengths and ASCII-ness,
86 // and then having the same 'length' bytes following.
87 static bool Match(void* first, void* second);
88 // Creates a hash from a sequence of bytes.
89 static uint32_t Hash(i::Vector<const byte> key, bool is_ascii);
90 // Checks whether a string containing a JS number is its canonical
92 static bool IsNumberCanonical(i::Vector<const char> key);
94 // Size of buffer. Sufficient for using it to call DoubleToCString in
95 // from conversions.h.
96 static const int kBufferSize = 100;
98 i::UnicodeCache* unicode_constants_;
99 // Backing store used to store strings used as hashmap keys.
100 i::SequenceCollector<unsigned char> backing_store_;
102 // Buffer used for string->number->canonical string conversions.
103 char number_buffer_[kBufferSize];
109 enum PreParseResult {
110 kPreParseStackOverflow,
115 PreParser(i::Scanner* scanner,
116 i::ParserRecorder* log,
117 uintptr_t stack_limit,
119 bool allow_natives_syntax,
124 stack_limit_(stack_limit),
125 strict_mode_violation_location_(i::Scanner::Location::invalid()),
126 strict_mode_violation_type_(NULL),
127 stack_overflow_(false),
128 allow_lazy_(allow_lazy),
129 allow_modules_(allow_modules),
130 allow_natives_syntax_(allow_natives_syntax),
131 parenthesized_function_(false),
132 harmony_scoping_(scanner->HarmonyScoping()) { }
136 // Pre-parse the program from the character stream; returns true on
137 // success (even if parsing failed, the pre-parse data successfully
138 // captured the syntax error), and false if a stack-overflow happened
140 static PreParseResult PreParseProgram(i::Scanner* scanner,
141 i::ParserRecorder* log,
143 uintptr_t stack_limit) {
144 bool allow_lazy = (flags & i::kAllowLazy) != 0;
145 bool allow_natives_syntax = (flags & i::kAllowNativesSyntax) != 0;
146 bool allow_modules = (flags & i::kAllowModules) != 0;
147 return PreParser(scanner, log, stack_limit, allow_lazy,
148 allow_natives_syntax, allow_modules).PreParse();
151 // Parses a single function literal, from the opening parentheses before
152 // parameters to the closing brace after the body.
153 // Returns a FunctionEntry describing the body of the funciton in enough
154 // detail that it can be lazily compiled.
155 // The scanner is expected to have matched the "function" keyword and
156 // parameters, and have consumed the initial '{'.
157 // At return, unless an error occured, the scanner is positioned before the
159 PreParseResult PreParseLazyFunction(i::LanguageMode mode,
160 i::ParserRecorder* log);
163 // Used to detect duplicates in object literals. Each of the values
164 // kGetterProperty, kSetterProperty and kValueProperty represents
165 // a type of object literal property. When parsing a property, its
166 // type value is stored in the DuplicateFinder for the property name.
167 // Values are chosen so that having intersection bits means the there is
168 // an incompatibility.
169 // I.e., you can add a getter to a property that already has a setter, since
170 // kGetterProperty and kSetterProperty doesn't intersect, but not if it
171 // already has a getter or a value. Adding the getter to an existing
172 // setter will store the value (kGetterProperty | kSetterProperty), which
173 // is incompatible with adding any further properties.
176 // Bit patterns representing different object literal property types.
184 // Checks the type of conflict based on values coming from PropertyType.
185 bool HasConflict(int type1, int type2) { return (type1 & type2) != 0; }
186 bool IsDataDataConflict(int type1, int type2) {
187 return ((type1 & type2) & kValueFlag) != 0;
189 bool IsDataAccessorConflict(int type1, int type2) {
190 return ((type1 ^ type2) & kValueFlag) != 0;
192 bool IsAccessorAccessorConflict(int type1, int type2) {
193 return ((type1 | type2) & kValueFlag) == 0;
197 void CheckDuplicate(DuplicateFinder* finder,
198 i::Token::Value property,
202 // These types form an algebra over syntactic categories that is just
203 // rich enough to let us recognize and propagate the constructs that
204 // are either being counted in the preparser data, or is important
205 // to throw the correct syntax error exceptions.
212 enum VariableDeclarationContext {
218 // If a list of variable declarations includes any initializers.
219 enum VariableDeclarationProperties {
228 static Identifier Default() {
229 return Identifier(kUnknownIdentifier);
231 static Identifier Eval() {
232 return Identifier(kEvalIdentifier);
234 static Identifier Arguments() {
235 return Identifier(kArgumentsIdentifier);
237 static Identifier FutureReserved() {
238 return Identifier(kFutureReservedIdentifier);
240 static Identifier FutureStrictReserved() {
241 return Identifier(kFutureStrictReservedIdentifier);
243 bool IsEval() { return type_ == kEvalIdentifier; }
244 bool IsArguments() { return type_ == kArgumentsIdentifier; }
245 bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; }
246 bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; }
247 bool IsFutureStrictReserved() {
248 return type_ == kFutureStrictReservedIdentifier;
250 bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; }
255 kFutureReservedIdentifier,
256 kFutureStrictReservedIdentifier,
260 explicit Identifier(Type type) : type_(type) { }
263 friend class Expression;
266 // Bits 0 and 1 are used to identify the type of expression:
267 // If bit 0 is set, it's an identifier.
268 // if bit 1 is set, it's a string literal.
269 // If neither is set, it's no particular type, and both set isn't
271 // Bit 2 is used to mark the expression as being parenthesized,
272 // so "(foo)" isn't recognized as a pure identifier (and possible label).
275 static Expression Default() {
276 return Expression(kUnknownExpression);
279 static Expression FromIdentifier(Identifier id) {
280 return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift));
283 static Expression StringLiteral() {
284 return Expression(kUnknownStringLiteral);
287 static Expression UseStrictStringLiteral() {
288 return Expression(kUseStrictString);
291 static Expression This() {
292 return Expression(kThisExpression);
295 static Expression ThisProperty() {
296 return Expression(kThisPropertyExpression);
299 static Expression StrictFunction() {
300 return Expression(kStrictFunctionExpression);
303 bool IsIdentifier() {
304 return (code_ & kIdentifierFlag) != 0;
307 // Only works corretly if it is actually an identifier expression.
308 PreParser::Identifier AsIdentifier() {
309 return PreParser::Identifier(
310 static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift));
313 bool IsParenthesized() {
314 // If bit 0 or 1 is set, we interpret bit 2 as meaning parenthesized.
315 return (code_ & 7) > 4;
318 bool IsRawIdentifier() {
319 return !IsParenthesized() && IsIdentifier();
322 bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; }
324 bool IsRawStringLiteral() {
325 return !IsParenthesized() && IsStringLiteral();
328 bool IsUseStrictLiteral() {
329 return (code_ & kStringLiteralMask) == kUseStrictString;
333 return code_ == kThisExpression;
336 bool IsThisProperty() {
337 return code_ == kThisPropertyExpression;
340 bool IsStrictFunction() {
341 return code_ == kStrictFunctionExpression;
344 Expression Parenthesize() {
345 int type = code_ & 3;
347 // Identifiers and string literals can be parenthesized.
348 // They no longer work as labels or directive prologues,
349 // but are still recognized in other contexts.
350 return Expression(code_ | kParentesizedExpressionFlag);
352 // For other types of expressions, it's not important to remember
358 // First two/three bits are used as flags.
359 // Bit 0 and 1 represent identifiers or strings literals, and are
360 // mutually exclusive, but can both be absent.
361 // If bit 0 or 1 are set, bit 2 marks that the expression has
362 // been wrapped in parentheses (a string literal can no longer
363 // be a directive prologue, and an identifier can no longer be
366 kUnknownExpression = 0,
368 kIdentifierFlag = 1, // Used to detect labels.
369 kIdentifierShift = 3,
371 kStringLiteralFlag = 2, // Used to detect directive prologue.
372 kUnknownStringLiteral = kStringLiteralFlag,
373 kUseStrictString = kStringLiteralFlag | 8,
374 kStringLiteralMask = kUseStrictString,
376 kParentesizedExpressionFlag = 4, // Only if identifier or string literal.
378 // Below here applies if neither identifier nor string literal.
380 kThisPropertyExpression = 8,
381 kStrictFunctionExpression = 12
384 explicit Expression(int expression_code) : code_(expression_code) { }
391 static Statement Default() {
392 return Statement(kUnknownStatement);
395 static Statement FunctionDeclaration() {
396 return Statement(kFunctionDeclaration);
399 // Creates expression statement from expression.
400 // Preserves being an unparenthesized string literal, possibly
402 static Statement ExpressionStatement(Expression expression) {
403 if (!expression.IsParenthesized()) {
404 if (expression.IsUseStrictLiteral()) {
405 return Statement(kUseStrictExpressionStatement);
407 if (expression.IsStringLiteral()) {
408 return Statement(kStringLiteralExpressionStatement);
414 bool IsStringLiteral() {
415 return code_ != kUnknownStatement;
418 bool IsUseStrictLiteral() {
419 return code_ == kUseStrictExpressionStatement;
422 bool IsFunctionDeclaration() {
423 return code_ == kFunctionDeclaration;
429 kStringLiteralExpressionStatement,
430 kUseStrictExpressionStatement,
434 explicit Statement(Type code) : code_(code) {}
438 enum SourceElements {
439 kUnknownSourceElements
442 typedef int Arguments;
446 Scope(Scope** variable, ScopeType type)
447 : variable_(variable),
450 materialized_literal_count_(0),
451 expected_properties_(0),
452 with_nesting_count_(0),
454 (prev_ != NULL) ? prev_->language_mode() : i::CLASSIC_MODE) {
457 ~Scope() { *variable_ = prev_; }
458 void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
459 void AddProperty() { expected_properties_++; }
460 ScopeType type() { return type_; }
461 int expected_properties() { return expected_properties_; }
462 int materialized_literal_count() { return materialized_literal_count_; }
463 bool IsInsideWith() { return with_nesting_count_ != 0; }
464 bool is_classic_mode() {
465 return language_mode_ == i::CLASSIC_MODE;
467 i::LanguageMode language_mode() {
468 return language_mode_;
470 void set_language_mode(i::LanguageMode language_mode) {
471 language_mode_ = language_mode;
473 void EnterWith() { with_nesting_count_++; }
474 void LeaveWith() { with_nesting_count_--; }
477 Scope** const variable_;
479 const ScopeType type_;
480 int materialized_literal_count_;
481 int expected_properties_;
482 int with_nesting_count_;
483 i::LanguageMode language_mode_;
486 // Preparse the program. Only called in PreParseProgram after creating
488 PreParseResult PreParse() {
489 Scope top_scope(&scope_, kTopLevelScope);
491 int start_position = scanner_->peek_location().beg_pos;
492 ParseSourceElements(i::Token::EOS, &ok);
493 if (stack_overflow_) return kPreParseStackOverflow;
495 ReportUnexpectedToken(scanner_->current_token());
496 } else if (!scope_->is_classic_mode()) {
497 CheckOctalLiteral(start_position, scanner_->location().end_pos, &ok);
499 return kPreParseSuccess;
502 // Report syntax error
503 void ReportUnexpectedToken(i::Token::Value token);
504 void ReportMessageAt(i::Scanner::Location location,
506 const char* name_opt) {
507 log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt);
509 void ReportMessageAt(int start_pos,
512 const char* name_opt) {
513 log_->LogMessage(start_pos, end_pos, type, name_opt);
516 void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok);
518 // All ParseXXX functions take as the last argument an *ok parameter
519 // which is set to false if parsing failed; it is unchanged otherwise.
520 // By making the 'exception handling' explicit, we are forced to check
521 // for failure at the call sites.
522 Statement ParseSourceElement(bool* ok);
523 SourceElements ParseSourceElements(int end_token, bool* ok);
524 Statement ParseStatement(bool* ok);
525 Statement ParseFunctionDeclaration(bool* ok);
526 Statement ParseBlock(bool* ok);
527 Statement ParseVariableStatement(VariableDeclarationContext var_context,
529 Statement ParseVariableDeclarations(VariableDeclarationContext var_context,
530 VariableDeclarationProperties* decl_props,
533 Statement ParseExpressionOrLabelledStatement(bool* ok);
534 Statement ParseIfStatement(bool* ok);
535 Statement ParseContinueStatement(bool* ok);
536 Statement ParseBreakStatement(bool* ok);
537 Statement ParseReturnStatement(bool* ok);
538 Statement ParseWithStatement(bool* ok);
539 Statement ParseSwitchStatement(bool* ok);
540 Statement ParseDoWhileStatement(bool* ok);
541 Statement ParseWhileStatement(bool* ok);
542 Statement ParseForStatement(bool* ok);
543 Statement ParseThrowStatement(bool* ok);
544 Statement ParseTryStatement(bool* ok);
545 Statement ParseDebuggerStatement(bool* ok);
547 Expression ParseExpression(bool accept_IN, bool* ok);
548 Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
549 Expression ParseConditionalExpression(bool accept_IN, bool* ok);
550 Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
551 Expression ParseUnaryExpression(bool* ok);
552 Expression ParsePostfixExpression(bool* ok);
553 Expression ParseLeftHandSideExpression(bool* ok);
554 Expression ParseNewExpression(bool* ok);
555 Expression ParseMemberExpression(bool* ok);
556 Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok);
557 Expression ParsePrimaryExpression(bool* ok);
558 Expression ParseArrayLiteral(bool* ok);
559 Expression ParseObjectLiteral(bool* ok);
560 Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
561 Expression ParseV8Intrinsic(bool* ok);
563 Arguments ParseArguments(bool* ok);
564 Expression ParseFunctionLiteral(bool* ok);
565 void ParseLazyFunctionLiteralBody(bool* ok);
567 Identifier ParseIdentifier(bool* ok);
568 Identifier ParseIdentifierName(bool* ok);
569 Identifier ParseIdentifierNameOrGetOrSet(bool* is_get,
573 // Logs the currently parsed literal as a symbol in the preparser data.
575 // Log the currently parsed identifier.
576 Identifier GetIdentifierSymbol();
577 // Log the currently parsed string literal.
578 Expression GetStringSymbol();
580 i::Token::Value peek() {
581 if (stack_overflow_) return i::Token::ILLEGAL;
582 return scanner_->peek();
585 i::Token::Value Next() {
586 if (stack_overflow_) return i::Token::ILLEGAL;
589 if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) {
590 // Further calls to peek/Next will return illegal token.
591 // The current one will still be returned. It might already
592 // have been seen using peek.
593 stack_overflow_ = true;
596 return scanner_->Next();
599 bool peek_any_identifier();
601 void set_language_mode(i::LanguageMode language_mode) {
602 scope_->set_language_mode(language_mode);
605 bool is_classic_mode() {
606 return scope_->language_mode() == i::CLASSIC_MODE;
609 bool is_extended_mode() {
610 return scope_->language_mode() == i::EXTENDED_MODE;
613 i::LanguageMode language_mode() { return scope_->language_mode(); }
615 void Consume(i::Token::Value token) { Next(); }
617 void Expect(i::Token::Value token, bool* ok) {
618 if (Next() != token) {
623 bool Check(i::Token::Value token) {
624 i::Token::Value next = peek();
631 void ExpectSemicolon(bool* ok);
633 static int Precedence(i::Token::Value tok, bool accept_IN);
635 void SetStrictModeViolation(i::Scanner::Location,
639 void CheckDelayedStrictModeViolation(int beg_pos, int end_pos, bool* ok);
641 void StrictModeIdentifierViolation(i::Scanner::Location,
642 const char* eval_args_type,
643 Identifier identifier,
646 i::Scanner* scanner_;
647 i::ParserRecorder* log_;
649 uintptr_t stack_limit_;
650 i::Scanner::Location strict_mode_violation_location_;
651 const char* strict_mode_violation_type_;
652 bool stack_overflow_;
655 bool allow_natives_syntax_;
656 bool parenthesized_function_;
657 bool harmony_scoping_;
661 #endif // V8_PREPARSER_H