1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Redistribution and use in source and binary forms, with or without
3 // modification, are permitted provided that the following conditions are
6 // * Redistributions of source code must retain the above copyright
7 // notice, this list of conditions and the following disclaimer.
8 // * Redistributions in binary form must reproduce the above
9 // copyright notice, this list of conditions and the following
10 // disclaimer in the documentation and/or other materials provided
11 // with the distribution.
12 // * Neither the name of Google Inc. nor the names of its
13 // contributors may be used to endorse or promote products derived
14 // from this software without specific prior written permission.
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #ifndef V8_PREPARSER_H
29 #define V8_PREPARSER_H
38 // Common base class shared between parser and pre-parser.
41 ParserBase(Scanner* scanner, uintptr_t stack_limit)
43 stack_limit_(stack_limit),
44 stack_overflow_(false),
46 allow_natives_syntax_(false),
47 allow_generators_(false),
48 allow_for_of_(false) { }
49 // TODO(mstarzinger): Only virtual until message reporting has been unified.
50 virtual ~ParserBase() { }
52 // Getters that indicate whether certain syntactical constructs are
53 // allowed to be parsed by this instance of the parser.
54 bool allow_lazy() const { return allow_lazy_; }
55 bool allow_natives_syntax() const { return allow_natives_syntax_; }
56 bool allow_generators() const { return allow_generators_; }
57 bool allow_for_of() const { return allow_for_of_; }
58 bool allow_modules() const { return scanner()->HarmonyModules(); }
59 bool allow_harmony_scoping() const { return scanner()->HarmonyScoping(); }
60 bool allow_harmony_numeric_literals() const {
61 return scanner()->HarmonyNumericLiterals();
64 // Setters that determine whether certain syntactical constructs are
65 // allowed to be parsed by this instance of the parser.
66 void set_allow_lazy(bool allow) { allow_lazy_ = allow; }
67 void set_allow_natives_syntax(bool allow) { allow_natives_syntax_ = allow; }
68 void set_allow_generators(bool allow) { allow_generators_ = allow; }
69 void set_allow_for_of(bool allow) { allow_for_of_ = allow; }
70 void set_allow_modules(bool allow) { scanner()->SetHarmonyModules(allow); }
71 void set_allow_harmony_scoping(bool allow) {
72 scanner()->SetHarmonyScoping(allow);
74 void set_allow_harmony_numeric_literals(bool allow) {
75 scanner()->SetHarmonyNumericLiterals(allow);
79 enum AllowEvalOrArgumentsAsIdentifier {
80 kAllowEvalOrArguments,
81 kDontAllowEvalOrArguments
84 Scanner* scanner() const { return scanner_; }
85 int position() { return scanner_->location().beg_pos; }
86 int peek_position() { return scanner_->peek_location().beg_pos; }
87 bool stack_overflow() const { return stack_overflow_; }
88 void set_stack_overflow() { stack_overflow_ = true; }
90 virtual bool is_classic_mode() = 0;
92 INLINE(Token::Value peek()) {
93 if (stack_overflow_) return Token::ILLEGAL;
94 return scanner()->peek();
97 INLINE(Token::Value Next()) {
98 if (stack_overflow_) return Token::ILLEGAL;
101 if (reinterpret_cast<uintptr_t>(&marker) < stack_limit_) {
102 // Any further calls to Next or peek will return the illegal token.
103 // The current call must return the next token, which might already
104 // have been peek'ed.
105 stack_overflow_ = true;
108 return scanner()->Next();
111 void Consume(Token::Value token) {
112 Token::Value next = Next();
115 ASSERT(next == token);
118 bool Check(Token::Value token) {
119 Token::Value next = peek();
127 void Expect(Token::Value token, bool* ok) {
128 Token::Value next = Next();
130 ReportUnexpectedToken(next);
135 bool peek_any_identifier();
136 void ExpectSemicolon(bool* ok);
137 bool CheckContextualKeyword(Vector<const char> keyword);
138 void ExpectContextualKeyword(Vector<const char> keyword, bool* ok);
140 // Strict mode octal literal validation.
141 void CheckOctalLiteral(int beg_pos, int end_pos, bool* ok);
143 // Determine precedence of given token.
144 static int Precedence(Token::Value token, bool accept_IN);
146 // Report syntax errors.
147 void ReportUnexpectedToken(Token::Value token);
148 void ReportMessageAt(Scanner::Location location, const char* type) {
149 ReportMessageAt(location, type, Vector<const char*>::empty());
151 virtual void ReportMessageAt(Scanner::Location source_location,
153 Vector<const char*> args) = 0;
155 // Used to detect duplicates in object literals. Each of the values
156 // kGetterProperty, kSetterProperty and kValueProperty represents
157 // a type of object literal property. When parsing a property, its
158 // type value is stored in the DuplicateFinder for the property name.
159 // Values are chosen so that having intersection bits means the there is
160 // an incompatibility.
161 // I.e., you can add a getter to a property that already has a setter, since
162 // kGetterProperty and kSetterProperty doesn't intersect, but not if it
163 // already has a getter or a value. Adding the getter to an existing
164 // setter will store the value (kGetterProperty | kSetterProperty), which
165 // is incompatible with adding any further properties.
168 // Bit patterns representing different object literal property types.
176 // Validation per ECMA 262 - 11.1.5 "Object Initialiser".
177 class ObjectLiteralChecker {
179 ObjectLiteralChecker(ParserBase* parser, LanguageMode mode)
181 finder_(scanner()->unicode_cache()),
182 language_mode_(mode) { }
184 void CheckProperty(Token::Value property, PropertyKind type, bool* ok);
187 ParserBase* parser() const { return parser_; }
188 Scanner* scanner() const { return parser_->scanner(); }
190 // Checks the type of conflict based on values coming from PropertyType.
191 bool HasConflict(PropertyKind type1, PropertyKind type2) {
192 return (type1 & type2) != 0;
194 bool IsDataDataConflict(PropertyKind type1, PropertyKind type2) {
195 return ((type1 & type2) & kValueFlag) != 0;
197 bool IsDataAccessorConflict(PropertyKind type1, PropertyKind type2) {
198 return ((type1 ^ type2) & kValueFlag) != 0;
200 bool IsAccessorAccessorConflict(PropertyKind type1, PropertyKind type2) {
201 return ((type1 | type2) & kValueFlag) == 0;
205 DuplicateFinder finder_;
206 LanguageMode language_mode_;
211 uintptr_t stack_limit_;
212 bool stack_overflow_;
215 bool allow_natives_syntax_;
216 bool allow_generators_;
221 // Preparsing checks a JavaScript program and emits preparse-data that helps
222 // a later parsing to be faster.
223 // See preparse-data-format.h for the data format.
225 // The PreParser checks that the syntax follows the grammar for JavaScript,
226 // and collects some information about the program along the way.
227 // The grammar check is only performed in order to understand the program
228 // sufficiently to deduce some information about it, that can be used
229 // to speed up later parsing. Finding errors is not the goal of pre-parsing,
230 // rather it is to speed up properly written and correct programs.
231 // That means that contextual checks (like a label being declared where
232 // it is used) are generally omitted.
233 class PreParser : public ParserBase {
235 enum PreParseResult {
236 kPreParseStackOverflow,
240 PreParser(Scanner* scanner,
242 uintptr_t stack_limit)
243 : ParserBase(scanner, stack_limit),
246 parenthesized_function_(false) { }
250 // Pre-parse the program from the character stream; returns true on
251 // success (even if parsing failed, the pre-parse data successfully
252 // captured the syntax error), and false if a stack-overflow happened
254 PreParseResult PreParseProgram() {
255 Scope top_scope(&scope_, kTopLevelScope);
257 int start_position = scanner()->peek_location().beg_pos;
258 ParseSourceElements(Token::EOS, &ok);
259 if (stack_overflow()) return kPreParseStackOverflow;
261 ReportUnexpectedToken(scanner()->current_token());
262 } else if (!scope_->is_classic_mode()) {
263 CheckOctalLiteral(start_position, scanner()->location().end_pos, &ok);
265 return kPreParseSuccess;
268 // Parses a single function literal, from the opening parentheses before
269 // parameters to the closing brace after the body.
270 // Returns a FunctionEntry describing the body of the function in enough
271 // detail that it can be lazily compiled.
272 // The scanner is expected to have matched the "function" or "function*"
273 // keyword and parameters, and have consumed the initial '{'.
274 // At return, unless an error occurred, the scanner is positioned before the
276 PreParseResult PreParseLazyFunction(LanguageMode mode,
278 ParserRecorder* log);
281 // These types form an algebra over syntactic categories that is just
282 // rich enough to let us recognize and propagate the constructs that
283 // are either being counted in the preparser data, or is important
284 // to throw the correct syntax error exceptions.
291 enum VariableDeclarationContext {
297 // If a list of variable declarations includes any initializers.
298 enum VariableDeclarationProperties {
307 static Identifier Default() {
308 return Identifier(kUnknownIdentifier);
310 static Identifier Eval() {
311 return Identifier(kEvalIdentifier);
313 static Identifier Arguments() {
314 return Identifier(kArgumentsIdentifier);
316 static Identifier FutureReserved() {
317 return Identifier(kFutureReservedIdentifier);
319 static Identifier FutureStrictReserved() {
320 return Identifier(kFutureStrictReservedIdentifier);
322 static Identifier Yield() {
323 return Identifier(kYieldIdentifier);
325 bool IsEval() { return type_ == kEvalIdentifier; }
326 bool IsArguments() { return type_ == kArgumentsIdentifier; }
327 bool IsEvalOrArguments() { return type_ >= kEvalIdentifier; }
328 bool IsYield() { return type_ == kYieldIdentifier; }
329 bool IsFutureReserved() { return type_ == kFutureReservedIdentifier; }
330 bool IsFutureStrictReserved() {
331 return type_ == kFutureStrictReservedIdentifier;
333 bool IsValidStrictVariable() { return type_ == kUnknownIdentifier; }
338 kFutureReservedIdentifier,
339 kFutureStrictReservedIdentifier,
344 explicit Identifier(Type type) : type_(type) { }
347 friend class Expression;
350 // Bits 0 and 1 are used to identify the type of expression:
351 // If bit 0 is set, it's an identifier.
352 // if bit 1 is set, it's a string literal.
353 // If neither is set, it's no particular type, and both set isn't
357 static Expression Default() {
358 return Expression(kUnknownExpression);
361 static Expression FromIdentifier(Identifier id) {
362 return Expression(kIdentifierFlag | (id.type_ << kIdentifierShift));
365 static Expression StringLiteral() {
366 return Expression(kUnknownStringLiteral);
369 static Expression UseStrictStringLiteral() {
370 return Expression(kUseStrictString);
373 static Expression This() {
374 return Expression(kThisExpression);
377 static Expression ThisProperty() {
378 return Expression(kThisPropertyExpression);
381 static Expression StrictFunction() {
382 return Expression(kStrictFunctionExpression);
385 bool IsIdentifier() {
386 return (code_ & kIdentifierFlag) != 0;
389 // Only works corretly if it is actually an identifier expression.
390 PreParser::Identifier AsIdentifier() {
391 return PreParser::Identifier(
392 static_cast<PreParser::Identifier::Type>(code_ >> kIdentifierShift));
395 bool IsStringLiteral() { return (code_ & kStringLiteralFlag) != 0; }
397 bool IsUseStrictLiteral() {
398 return (code_ & kStringLiteralMask) == kUseStrictString;
402 return code_ == kThisExpression;
405 bool IsThisProperty() {
406 return code_ == kThisPropertyExpression;
409 bool IsStrictFunction() {
410 return code_ == kStrictFunctionExpression;
414 // First two/three bits are used as flags.
415 // Bit 0 and 1 represent identifiers or strings literals, and are
416 // mutually exclusive, but can both be absent.
418 kUnknownExpression = 0,
420 kIdentifierFlag = 1, // Used to detect labels.
421 kIdentifierShift = 3,
423 kStringLiteralFlag = 2, // Used to detect directive prologue.
424 kUnknownStringLiteral = kStringLiteralFlag,
425 kUseStrictString = kStringLiteralFlag | 8,
426 kStringLiteralMask = kUseStrictString,
428 // Below here applies if neither identifier nor string literal.
430 kThisPropertyExpression = 8,
431 kStrictFunctionExpression = 12
434 explicit Expression(int expression_code) : code_(expression_code) { }
441 static Statement Default() {
442 return Statement(kUnknownStatement);
445 static Statement FunctionDeclaration() {
446 return Statement(kFunctionDeclaration);
449 // Creates expression statement from expression.
450 // Preserves being an unparenthesized string literal, possibly
452 static Statement ExpressionStatement(Expression expression) {
453 if (expression.IsUseStrictLiteral()) {
454 return Statement(kUseStrictExpressionStatement);
456 if (expression.IsStringLiteral()) {
457 return Statement(kStringLiteralExpressionStatement);
462 bool IsStringLiteral() {
463 return code_ == kStringLiteralExpressionStatement;
466 bool IsUseStrictLiteral() {
467 return code_ == kUseStrictExpressionStatement;
470 bool IsFunctionDeclaration() {
471 return code_ == kFunctionDeclaration;
477 kStringLiteralExpressionStatement,
478 kUseStrictExpressionStatement,
482 explicit Statement(Type code) : code_(code) {}
486 enum SourceElements {
487 kUnknownSourceElements
490 typedef int Arguments;
494 Scope(Scope** variable, ScopeType type)
495 : variable_(variable),
498 materialized_literal_count_(0),
499 expected_properties_(0),
500 with_nesting_count_(0),
502 (prev_ != NULL) ? prev_->language_mode() : CLASSIC_MODE),
503 is_generator_(false) {
506 ~Scope() { *variable_ = prev_; }
507 void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
508 void AddProperty() { expected_properties_++; }
509 ScopeType type() { return type_; }
510 int expected_properties() { return expected_properties_; }
511 int materialized_literal_count() { return materialized_literal_count_; }
512 bool IsInsideWith() { return with_nesting_count_ != 0; }
513 bool is_generator() { return is_generator_; }
514 void set_is_generator(bool is_generator) { is_generator_ = is_generator; }
515 bool is_classic_mode() {
516 return language_mode_ == CLASSIC_MODE;
518 LanguageMode language_mode() {
519 return language_mode_;
521 void set_language_mode(LanguageMode language_mode) {
522 language_mode_ = language_mode;
527 explicit InsideWith(Scope* scope) : scope_(scope) {
528 scope->with_nesting_count_++;
531 ~InsideWith() { scope_->with_nesting_count_--; }
535 DISALLOW_COPY_AND_ASSIGN(InsideWith);
539 Scope** const variable_;
541 const ScopeType type_;
542 int materialized_literal_count_;
543 int expected_properties_;
544 int with_nesting_count_;
545 LanguageMode language_mode_;
549 // Report syntax error
550 void ReportMessageAt(Scanner::Location location,
552 Vector<const char*> args) {
553 ReportMessageAt(location.beg_pos,
556 args.length() > 0 ? args[0] : NULL);
558 void ReportMessageAt(Scanner::Location location,
560 const char* name_opt) {
561 log_->LogMessage(location.beg_pos, location.end_pos, type, name_opt);
563 void ReportMessageAt(int start_pos,
566 const char* name_opt) {
567 log_->LogMessage(start_pos, end_pos, type, name_opt);
570 // All ParseXXX functions take as the last argument an *ok parameter
571 // which is set to false if parsing failed; it is unchanged otherwise.
572 // By making the 'exception handling' explicit, we are forced to check
573 // for failure at the call sites.
574 Statement ParseSourceElement(bool* ok);
575 SourceElements ParseSourceElements(int end_token, bool* ok);
576 Statement ParseStatement(bool* ok);
577 Statement ParseFunctionDeclaration(bool* ok);
578 Statement ParseBlock(bool* ok);
579 Statement ParseVariableStatement(VariableDeclarationContext var_context,
581 Statement ParseVariableDeclarations(VariableDeclarationContext var_context,
582 VariableDeclarationProperties* decl_props,
585 Statement ParseExpressionOrLabelledStatement(bool* ok);
586 Statement ParseIfStatement(bool* ok);
587 Statement ParseContinueStatement(bool* ok);
588 Statement ParseBreakStatement(bool* ok);
589 Statement ParseReturnStatement(bool* ok);
590 Statement ParseWithStatement(bool* ok);
591 Statement ParseSwitchStatement(bool* ok);
592 Statement ParseDoWhileStatement(bool* ok);
593 Statement ParseWhileStatement(bool* ok);
594 Statement ParseForStatement(bool* ok);
595 Statement ParseThrowStatement(bool* ok);
596 Statement ParseTryStatement(bool* ok);
597 Statement ParseDebuggerStatement(bool* ok);
599 Expression ParseExpression(bool accept_IN, bool* ok);
600 Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
601 Expression ParseYieldExpression(bool* ok);
602 Expression ParseConditionalExpression(bool accept_IN, bool* ok);
603 Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
604 Expression ParseUnaryExpression(bool* ok);
605 Expression ParsePostfixExpression(bool* ok);
606 Expression ParseLeftHandSideExpression(bool* ok);
607 Expression ParseNewExpression(bool* ok);
608 Expression ParseMemberExpression(bool* ok);
609 Expression ParseMemberWithNewPrefixesExpression(unsigned new_count, bool* ok);
610 Expression ParsePrimaryExpression(bool* ok);
611 Expression ParseArrayLiteral(bool* ok);
612 Expression ParseObjectLiteral(bool* ok);
613 Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
614 Expression ParseV8Intrinsic(bool* ok);
616 Arguments ParseArguments(bool* ok);
617 Expression ParseFunctionLiteral(
619 Scanner::Location function_name_location,
620 bool name_is_strict_reserved,
623 void ParseLazyFunctionLiteralBody(bool* ok);
625 Identifier ParseIdentifier(AllowEvalOrArgumentsAsIdentifier, bool* ok);
626 Identifier ParseIdentifierOrStrictReservedWord(bool* is_strict_reserved,
628 Identifier ParseIdentifierName(bool* ok);
629 Identifier ParseIdentifierNameOrGetOrSet(bool* is_get,
633 // Logs the currently parsed literal as a symbol in the preparser data.
635 // Log the currently parsed identifier.
636 Identifier GetIdentifierSymbol();
637 // Log the currently parsed string literal.
638 Expression GetStringSymbol();
640 void set_language_mode(LanguageMode language_mode) {
641 scope_->set_language_mode(language_mode);
644 virtual bool is_classic_mode() {
645 return scope_->language_mode() == CLASSIC_MODE;
648 bool is_extended_mode() {
649 return scope_->language_mode() == EXTENDED_MODE;
652 LanguageMode language_mode() { return scope_->language_mode(); }
654 bool CheckInOrOf(bool accept_OF);
656 ParserRecorder* log_;
658 bool parenthesized_function_;
663 #endif // V8_PREPARSER_H