From 3ed8c2f5208468ed8659c73d2088fbb18605f899 Mon Sep 17 00:00:00 2001 From: "sandholm@chromium.org" Date: Wed, 1 Jun 2011 14:06:30 +0000 Subject: [PATCH] Remove scanner abstraction layer from JSON parsing. Review URL: http://codereview.chromium.org/7020018 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@8147 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/json-parser.cc | 350 +++++++++++++++++++---------------------------------- src/json-parser.h | 93 +++++++------- 2 files changed, 173 insertions(+), 270 deletions(-) diff --git a/src/json-parser.cc b/src/json-parser.cc index b7f57c2..7e24310 100644 --- a/src/json-parser.cc +++ b/src/json-parser.cc @@ -53,50 +53,50 @@ Handle JsonParser::ParseJson(Handle source) { // Set initial position right before the string. position_ = -1; // Advance to the first character (posibly EOS) - Advance(); - Next(); + AdvanceSkipWhitespace(); Handle result = ParseJsonValue(); - if (result.is_null() || Next() != Token::EOS) { - // Parse failed. Scanner's current token is the unexpected token. - Token::Value token = current_.token; + if (result.is_null() || c0_ != kEndOfString) { + // Parse failed. Current character is the unexpected token. const char* message; - const char* name_opt = NULL; + Factory* factory = isolate()->factory(); + Handle array; - switch (token) { - case Token::EOS: + switch (c0_) { + case kEndOfString: message = "unexpected_eos"; + array = factory->NewJSArray(0); break; - case Token::NUMBER: + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': message = "unexpected_token_number"; + array = factory->NewJSArray(0); break; - case Token::STRING: + case '"': message = "unexpected_token_string"; - break; - case Token::IDENTIFIER: - case Token::FUTURE_RESERVED_WORD: - message = "unexpected_token_identifier"; + array = factory->NewJSArray(0); break; default: message = "unexpected_token"; - name_opt = Token::String(token); - ASSERT(name_opt != NULL); + Handle name = LookupSingleCharacterStringFromCode(c0_); + Handle element = factory->NewFixedArray(1); + element->set(0, *name); + array = factory->NewJSArrayWithElements(element); break; } - Factory* factory = isolate()->factory(); MessageLocation location(factory->NewScript(source), - current_.beg_pos, - current_.end_pos); - Handle array; - if (name_opt == NULL) { - array = factory->NewJSArray(0); - } else { - Handle name = factory->NewStringFromUtf8(CStrVector(name_opt)); - Handle element = factory->NewFixedArray(1); - element->set(0, *name); - array = factory->NewJSArrayWithElements(element); - } + position_, + position_ + 1); Handle result = factory->NewSyntaxError(message, array); isolate()->Throw(*result, &location); return Handle::null(); @@ -107,49 +107,71 @@ Handle JsonParser::ParseJson(Handle source) { // Parse any JSON value. Handle JsonParser::ParseJsonValue() { - Token::Value token = Next(); - switch (token) { - case Token::STRING: - return GetString(false); - case Token::NUMBER: - return isolate()->factory()->NewNumber(number_); - case Token::FALSE_LITERAL: - return isolate()->factory()->false_value(); - case Token::TRUE_LITERAL: - return isolate()->factory()->true_value(); - case Token::NULL_LITERAL: - return isolate()->factory()->null_value(); - case Token::LBRACE: + switch (c0_) { + case '"': + return ParseJsonString(); + case '-': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return ParseJsonNumber(); + case 'f': + if (AdvanceGetChar() == 'a' && AdvanceGetChar() == 'l' && + AdvanceGetChar() == 's' && AdvanceGetChar() == 'e') { + AdvanceSkipWhitespace(); + return isolate()->factory()->false_value(); + } else { + return ReportUnexpectedCharacter(); + } + case 't': + if (AdvanceGetChar() == 'r' && AdvanceGetChar() == 'u' && + AdvanceGetChar() == 'e') { + AdvanceSkipWhitespace(); + return isolate()->factory()->true_value(); + } else { + return ReportUnexpectedCharacter(); + } + case 'n': + if (AdvanceGetChar() == 'u' && AdvanceGetChar() == 'l' && + AdvanceGetChar() == 'l') { + AdvanceSkipWhitespace(); + return isolate()->factory()->null_value(); + } else { + return ReportUnexpectedCharacter(); + } + case '{': return ParseJsonObject(); - case Token::LBRACK: + case '[': return ParseJsonArray(); default: - return ReportUnexpectedToken(); + return ReportUnexpectedCharacter(); } } -// Parse a JSON object. Scanner must be right after '{' token. +// Parse a JSON object. Position must be right at '{'. Handle JsonParser::ParseJsonObject() { Handle object_constructor( isolate()->global_context()->object_function()); Handle json_object = isolate()->factory()->NewJSObject(object_constructor); + ASSERT_EQ(c0_, '{'); - if (Peek() == Token::RBRACE) { - Next(); - } else { + AdvanceSkipWhitespace(); + if (c0_ != '}') { do { - if (Next() != Token::STRING) { - return ReportUnexpectedToken(); - } - Handle key = GetString(true); - if (Next() != Token::COLON) { - return ReportUnexpectedToken(); - } - + Handle key = ParseJsonSymbol(); + if (key.is_null() || c0_ != ':') return ReportUnexpectedCharacter(); + AdvanceSkipWhitespace(); Handle value = ParseJsonValue(); - if (value.is_null()) return Handle::null(); + if (value.is_null()) return ReportUnexpectedCharacter(); uint32_t index; if (key->AsArrayIndex(&index)) { @@ -159,150 +181,46 @@ Handle JsonParser::ParseJsonObject() { } else { SetLocalPropertyIgnoreAttributes(json_object, key, value, NONE); } - } while (Next() == Token::COMMA); - if (current_.token != Token::RBRACE) { - return ReportUnexpectedToken(); + } while (MatchSkipWhiteSpace(',')); + if (c0_ != '}') { + return ReportUnexpectedCharacter(); } } + AdvanceSkipWhitespace(); return json_object; } -// Parse a JSON array. Scanner must be right after '[' token. +// Parse a JSON array. Position must be right at '['. Handle JsonParser::ParseJsonArray() { ZoneScope zone_scope(isolate(), DELETE_ON_EXIT); ZoneList > elements(4); + ASSERT_EQ(c0_, '['); - Token::Value token = Peek(); - if (token == Token::RBRACK) { - Next(); - } else { + AdvanceSkipWhitespace(); + if (c0_ != ']') { do { Handle element = ParseJsonValue(); - if (element.is_null()) return Handle::null(); + if (element.is_null()) return ReportUnexpectedCharacter(); elements.Add(element); - token = Next(); - } while (token == Token::COMMA); - if (token != Token::RBRACK) { - return ReportUnexpectedToken(); + } while (MatchSkipWhiteSpace(',')); + if (c0_ != ']') { + return ReportUnexpectedCharacter(); } } - + AdvanceSkipWhitespace(); // Allocate a fixed array with all the elements. Handle fast_elements = isolate()->factory()->NewFixedArray(elements.length()); - for (int i = 0, n = elements.length(); i < n; i++) { fast_elements->set(i, *elements[i]); } - return isolate()->factory()->NewJSArrayWithElements(fast_elements); } -Token::Value JsonParser::Next() { - current_ = next_; - ScanJson(); - return current_.token; -} - -void JsonParser::ScanJson() { - if (source_->IsSeqAsciiString()) { - is_sequential_ascii_ = true; - } else { - is_sequential_ascii_ = false; - } - - Token::Value token; - do { - // Remember the position of the next token - next_.beg_pos = position_; - switch (c0_) { - case '\t': - case '\r': - case '\n': - case ' ': - Advance(); - token = Token::WHITESPACE; - break; - case '{': - Advance(); - token = Token::LBRACE; - break; - case '}': - Advance(); - token = Token::RBRACE; - break; - case '[': - Advance(); - token = Token::LBRACK; - break; - case ']': - Advance(); - token = Token::RBRACK; - break; - case ':': - Advance(); - token = Token::COLON; - break; - case ',': - Advance(); - token = Token::COMMA; - break; - case '"': - token = ScanJsonString(); - break; - case '-': - case '0': - case '1': - case '2': - case '3': - case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - token = ScanJsonNumber(); - break; - case 't': - token = ScanJsonIdentifier("true", Token::TRUE_LITERAL); - break; - case 'f': - token = ScanJsonIdentifier("false", Token::FALSE_LITERAL); - break; - case 'n': - token = ScanJsonIdentifier("null", Token::NULL_LITERAL); - break; - default: - if (c0_ < 0) { - Advance(); - token = Token::EOS; - } else { - Advance(); - token = Token::ILLEGAL; - } - } - } while (token == Token::WHITESPACE); - - next_.end_pos = position_; - next_.token = token; -} - - -Token::Value JsonParser::ScanJsonIdentifier(const char* text, - Token::Value token) { - while (*text != '\0') { - if (c0_ != *text) return Token::ILLEGAL; - Advance(); - text++; - } - return token; -} - - -Token::Value JsonParser::ScanJsonNumber() { +Handle JsonParser::ParseJsonNumber() { bool negative = false; - + beg_pos_ = position_; if (c0_ == '-') { Advance(); negative = true; @@ -311,11 +229,11 @@ Token::Value JsonParser::ScanJsonNumber() { Advance(); // Prefix zero is only allowed if it's the only digit before // a decimal point or exponent. - if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL; + if ('0' <= c0_ && c0_ <= '9') return ReportUnexpectedCharacter(); } else { int i = 0; int digits = 0; - if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL; + if (c0_ < '1' || c0_ > '9') return ReportUnexpectedCharacter(); do { i = i * 10 + c0_ - '0'; digits++; @@ -323,12 +241,13 @@ Token::Value JsonParser::ScanJsonNumber() { } while (c0_ >= '0' && c0_ <= '9'); if (c0_ != '.' && c0_ != 'e' && c0_ != 'E' && digits < 10) { number_ = (negative ? -i : i); - return Token::NUMBER; + SkipWhitespace(); + return isolate()->factory()->NewNumber(number_); } } if (c0_ == '.') { Advance(); - if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; + if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); do { Advance(); } while (c0_ >= '0' && c0_ <= '9'); @@ -336,37 +255,38 @@ Token::Value JsonParser::ScanJsonNumber() { if (AsciiAlphaToLower(c0_) == 'e') { Advance(); if (c0_ == '-' || c0_ == '+') Advance(); - if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL; + if (c0_ < '0' || c0_ > '9') return ReportUnexpectedCharacter(); do { Advance(); } while (c0_ >= '0' && c0_ <= '9'); } + int length = position_ - beg_pos_; if (is_sequential_ascii_) { - Vector chars(seq_source_->GetChars() + next_.beg_pos, - position_ - next_.beg_pos); + Vector chars(seq_source_->GetChars() + beg_pos_, length); number_ = StringToDouble(isolate()->unicode_cache(), chars, NO_FLAGS, // Hex, octal or trailing junk. OS::nan_value()); } else { - Vector buffer = Vector::New(position_ - next_.beg_pos); - String::WriteToFlat(*source_, buffer.start(), next_.beg_pos, position_); + Vector buffer = Vector::New(length); + String::WriteToFlat(*source_, buffer.start(), beg_pos_, position_); Vector result = Vector(reinterpret_cast(buffer.start()), - position_ - next_.beg_pos); + length); number_ = StringToDouble(isolate()->unicode_cache(), result, NO_FLAGS, // Hex, octal or trailing junk. 0.0); buffer.Dispose(); } - return Token::NUMBER; + SkipWhitespace(); + return isolate()->factory()->NewNumber(number_); } -Token::Value JsonParser::SlowScanJsonString() { +Handle JsonParser::SlowScanJsonString() { // The currently scanned ascii characters. Handle ascii(isolate()->factory()->NewSubString(source_, - next_.beg_pos + 1, + beg_pos_, position_)); Handle two_byte = isolate()->factory()->NewRawTwoByteString(kInitialSpecialStringSize, @@ -392,7 +312,7 @@ Token::Value JsonParser::SlowScanJsonString() { } // Check for control character (0x00-0x1f) or unterminated string (<0). - if (c0_ < 0x20) return Token::ILLEGAL; + if (c0_ < 0x20) return ReportUnexpectedCharacter(); if (c0_ != '\\') { seq_two_byte->SeqTwoByteStringSet(count++, c0_); Advance(); @@ -425,7 +345,7 @@ Token::Value JsonParser::SlowScanJsonString() { Advance(); int digit = HexValue(c0_); if (digit < 0) { - return Token::ILLEGAL; + return ReportUnexpectedCharacter(); } value = value * 16 + digit; } @@ -433,14 +353,14 @@ Token::Value JsonParser::SlowScanJsonString() { break; } default: - return Token::ILLEGAL; + return ReportUnexpectedCharacter(); } Advance(); } } // Advance past the last '"'. ASSERT_EQ('"', c0_); - Advance(); + AdvanceSkipWhitespace(); // Shrink the the string to our length. if (isolate()->heap()->InNewSpace(*seq_two_byte)) { @@ -456,21 +376,19 @@ Token::Value JsonParser::SlowScanJsonString() { seq_two_byte->set_length(count); isolate()->heap()->CreateFillerObjectAt(start_filler_object, delta); } - string_val_ = isolate()->factory()->NewConsString(ascii, seq_two_byte); - return Token::STRING; + return isolate()->factory()->NewConsString(ascii, seq_two_byte); } -Token::Value JsonParser::ScanJsonString() { +template +Handle JsonParser::ScanJsonString() { ASSERT_EQ('"', c0_); - // Set string_val to null. If string_val is not set we assume an - // ascii string begining at next_.beg_pos + 1 to next_.end_pos - 1. - string_val_ = Handle::null(); Advance(); + beg_pos_ = position_; // Fast case for ascii only without escape characters. while (c0_ != '"') { // Check for control character (0x00-0x1f) or unterminated string (<0). - if (c0_ < 0x20) return Token::ILLEGAL; + if (c0_ < 0x20) return ReportUnexpectedCharacter(); if (c0_ != '\\' && c0_ < kMaxAsciiCharCode) { Advance(); } else { @@ -478,36 +396,16 @@ Token::Value JsonParser::ScanJsonString() { } } ASSERT_EQ('"', c0_); + end_pos_ = position_; // Advance past the last '"'. - Advance(); - return Token::STRING; -} - -Handle JsonParser::GetString() { - return GetString(false); -} - -Handle JsonParser::GetSymbol() { - Handle result = GetString(true); - if (result->IsSymbol()) return result; - return isolate()->factory()->LookupSymbol(result); -} - -Handle JsonParser::GetString(bool hint_symbol) { - // We have a non ascii string, return that. - if (!string_val_.is_null()) return string_val_; - - if (is_sequential_ascii_ && hint_symbol) { - Handle seq = Handle::cast(source_); - // The current token includes the '"' in both ends. - int length = current_.end_pos - current_.beg_pos - 2; + AdvanceSkipWhitespace(); + if (is_sequential_ascii_ && is_symbol) { return isolate()->factory()->LookupAsciiSymbol(seq_source_, - current_.beg_pos + 1, - length); + beg_pos_, + end_pos_ - beg_pos_); + } else { + return isolate()->factory()->NewSubString(source_, beg_pos_, end_pos_); } - // The current token includes the '"' in both ends. - return isolate()->factory()->NewSubString( - source_, current_.beg_pos + 1, current_.end_pos - 1); } } } // namespace v8::internal diff --git a/src/json-parser.h b/src/json-parser.h index 5903d21..07f00f5 100644 --- a/src/json-parser.h +++ b/src/json-parser.h @@ -47,41 +47,62 @@ class JsonParser BASE_EMBEDDED { Handle ParseJson(Handle source); inline void Advance() { - if (position_ >= source_length_) { - position_++; + position_++; + if (position_ > source_length_) { c0_ = kEndOfString; } else if (is_sequential_ascii_) { - position_++; c0_ = seq_source_->SeqAsciiStringGet(position_); } else { - position_++; c0_ = source_->Get(position_); } } - inline Isolate* isolate() { return isolate_; } + // The JSON lexical grammar is specified in the ECMAScript 5 standard, + // section 15.12.1.1. The only allowed whitespace characters between tokens + // are tab, carriage-return, newline and space. + + inline void AdvanceSkipWhitespace() { + do { + Advance(); + } while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' '); + } - // Get the string for the current string token. - Handle GetString(bool hint_symbol); - Handle GetString(); - Handle GetSymbol(); + inline void SkipWhitespace() { + while (c0_ == '\t' || c0_ == '\r' || c0_ == '\n' || c0_ == ' ') { + Advance(); + } + } - // Scan a single JSON token. The JSON lexical grammar is specified in the - // ECMAScript 5 standard, section 15.12.1.1. - // Recognizes all of the single-character tokens directly, or calls a function - // to scan a number, string or identifier literal. - // The only allowed whitespace characters between tokens are tab, - // carriage-return, newline and space. - void ScanJson(); + inline uc32 AdvanceGetChar() { + Advance(); + return c0_; + } + + // Checks that current charater is c. + // If so, then consume c and skip whitespace. + inline bool MatchSkipWhiteSpace(uc32 c) { + if (c0_ == c) { + AdvanceSkipWhitespace(); + return true; + } + return false; + } // A JSON string (production JSONString) is subset of valid JavaScript string // literals. The string must only be double-quoted (not single-quoted), and // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and // four-digit hex escapes (uXXXX). Any other use of backslashes is invalid. - Token::Value ScanJsonString(); + Handle ParseJsonString() { + return ScanJsonString(); + } + Handle ParseJsonSymbol() { + return ScanJsonString(); + } + template + Handle ScanJsonString(); // Slow version for unicode support, uses the first ascii_count characters, // as first part of a ConsString - Token::Value SlowScanJsonString(); + Handle SlowScanJsonString(); // A JSON number (production JSONNumber) is a subset of the valid JavaScript // decimal number literals. @@ -89,12 +110,7 @@ class JsonParser BASE_EMBEDDED { // digit before and after a decimal point, may not have prefixed zeros (unless // the integer part is zero), and may include an exponent part (e.g., "e-10"). // Hexadecimal and octal numbers are not allowed. - Token::Value ScanJsonNumber(); - - // Used to recognizes one of the literals "true", "false", or "null". These - // are the only valid JSON identifiers (productions JSONBooleanLiteral, - // JSONNullLiteral). - Token::Value ScanJsonIdentifier(const char* text, Token::Value token); + Handle ParseJsonNumber(); // Parse a single JSON value from input (grammar production JSONValue). // A JSON value is either a (double-quoted) string literal, a number literal, @@ -119,21 +135,11 @@ class JsonParser BASE_EMBEDDED { // Mark that a parsing error has happened at the current token, and // return a null handle. Primarily for readability. - Handle ReportUnexpectedToken() { return Handle::null(); } - - // Peek at the next token. - Token::Value Peek() { return next_.token; } - // Scan the next token and return the token scanned on the last call. - Token::Value Next(); - - struct TokenInfo { - TokenInfo() : token(Token::ILLEGAL), - beg_pos(0), - end_pos(0) { } - Token::Value token; - int beg_pos; - int end_pos; - }; + inline Handle ReportUnexpectedCharacter() { + return Handle::null(); + } + + inline Isolate* isolate() { return isolate_; } static const int kInitialSpecialStringSize = 1024; @@ -144,15 +150,14 @@ class JsonParser BASE_EMBEDDED { Handle seq_source_; bool is_sequential_ascii_; - // Current and next token - TokenInfo current_; - TokenInfo next_; + // begin and end position of scanned string or number + int beg_pos_; + int end_pos_; + Isolate* isolate_; uc32 c0_; int position_; - - Handle string_val_; double number_; }; -- 2.7.4