From 87e77085a6380efd34831601956f559ddfb58cd1 Mon Sep 17 00:00:00 2001 From: "dcarney@chromium.org" Date: Wed, 12 Mar 2014 14:03:25 +0000 Subject: [PATCH] Move most scanner buffer accesses into scanner. R=marja@chromium.org BUG= Review URL: https://codereview.chromium.org/197103002 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@19849 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/parser.cc | 30 ++----------- src/parser.h | 10 ----- src/preparser.cc | 33 +++++--------- src/preparser.h | 13 ++---- src/scanner.cc | 88 +++++++++++++++++++++++++++---------- src/scanner.h | 70 +++++++++++++++++++---------- test/cctest/test-parsing.cc | 4 +- 7 files changed, 134 insertions(+), 114 deletions(-) diff --git a/src/parser.cc b/src/parser.cc index de46bb8d1..c401257c4 100644 --- a/src/parser.cc +++ b/src/parser.cc @@ -212,13 +212,7 @@ Handle Parser::LookupSymbol(int symbol_id) { // count. if (symbol_id < 0 || (pre_parse_data_ && symbol_id >= pre_parse_data_->symbol_count())) { - if (scanner()->is_literal_ascii()) { - return isolate()->factory()->InternalizeOneByteString( - Vector::cast(scanner()->literal_ascii_string())); - } else { - return isolate()->factory()->InternalizeTwoByteString( - scanner()->literal_utf16_string()); - } + return scanner()->AllocateInternalizedString(isolate_); } return LookupCachedSymbol(symbol_id); } @@ -233,13 +227,7 @@ Handle Parser::LookupCachedSymbol(int symbol_id) { } Handle result = symbol_cache_.at(symbol_id); if (result.is_null()) { - if (scanner()->is_literal_ascii()) { - result = isolate()->factory()->InternalizeOneByteString( - Vector::cast(scanner()->literal_ascii_string())); - } else { - result = isolate()->factory()->InternalizeTwoByteString( - scanner()->literal_utf16_string()); - } + result = scanner()->AllocateInternalizedString(isolate_); symbol_cache_.at(symbol_id) = result; return result; } @@ -514,13 +502,7 @@ Handle ParserTraits::GetSymbol(Scanner* scanner) { Handle ParserTraits::NextLiteralString(Scanner* scanner, PretenureFlag tenured) { - if (scanner->is_next_literal_ascii()) { - return parser_->isolate_->factory()->NewStringFromAscii( - scanner->next_literal_ascii_string(), tenured); - } else { - return parser_->isolate_->factory()->NewStringFromTwoByte( - scanner->next_literal_utf16_string(), tenured); - } + return scanner->AllocateNextLiteralString(parser_->isolate(), tenured); } @@ -544,11 +526,7 @@ Literal* ParserTraits::ExpressionFromLiteral( case Token::FALSE_LITERAL: return factory->NewLiteral(isolate_factory->false_value(), pos); case Token::NUMBER: { - ASSERT(scanner->is_literal_ascii()); - double value = StringToDouble(parser_->isolate()->unicode_cache(), - scanner->literal_ascii_string(), - ALLOW_HEX | ALLOW_OCTAL | - ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); + double value = scanner->DoubleValue(); return factory->NewNumberLiteral(value, pos); } default: diff --git a/src/parser.h b/src/parser.h index 799ec5860..4bdf65957 100644 --- a/src/parser.h +++ b/src/parser.h @@ -704,16 +704,6 @@ class Parser : public ParserBase { bool CheckInOrOf(bool accept_OF, ForEachStatement::VisitMode* visit_mode); - Handle LiteralString(PretenureFlag tenured) { - if (scanner()->is_literal_ascii()) { - return isolate_->factory()->NewStringFromAscii( - scanner()->literal_ascii_string(), tenured); - } else { - return isolate_->factory()->NewStringFromTwoByte( - scanner()->literal_utf16_string(), tenured); - } - } - // Get odd-ball literals. Literal* GetLiteralUndefined(int position); diff --git a/src/preparser.cc b/src/preparser.cc index a111fe7cd..8865c505b 100644 --- a/src/preparser.cc +++ b/src/preparser.cc @@ -91,16 +91,11 @@ PreParserIdentifier PreParserTraits::GetSymbol(Scanner* scanner) { } else if (scanner->current_token() == Token::YIELD) { return PreParserIdentifier::Yield(); } - if (scanner->is_literal_ascii()) { - // Detect strict-mode poison words. - if (scanner->literal_length() == 4 && - !strncmp(scanner->literal_ascii_string().start(), "eval", 4)) { - return PreParserIdentifier::Eval(); - } - if (scanner->literal_length() == 9 && - !strncmp(scanner->literal_ascii_string().start(), "arguments", 9)) { - return PreParserIdentifier::Arguments(); - } + if (scanner->UnescapedLiteralMatches("eval", 4)) { + return PreParserIdentifier::Eval(); + } + if (scanner->UnescapedLiteralMatches("arguments", 9)) { + return PreParserIdentifier::Arguments(); } return PreParserIdentifier::Default(); } @@ -108,14 +103,8 @@ PreParserIdentifier PreParserTraits::GetSymbol(Scanner* scanner) { PreParserExpression PreParserTraits::ExpressionFromString( int pos, Scanner* scanner, PreParserFactory* factory) { - const int kUseStrictLength = 10; - const char* kUseStrictChars = "use strict"; pre_parser_->LogSymbol(); - if (scanner->is_literal_ascii() && - scanner->literal_length() == kUseStrictLength && - !scanner->literal_contains_escapes() && - !strncmp(scanner->literal_ascii_string().start(), kUseStrictChars, - kUseStrictLength)) { + if (scanner->UnescapedLiteralMatches("use strict", 10)) { return PreParserExpression::UseStrictStringLiteral(); } return PreParserExpression::StringLiteral(); @@ -1176,9 +1165,9 @@ PreParser::Expression PreParser::ParseFunctionLiteral( } int prev_value; - if (scanner()->is_literal_ascii()) { - prev_value = - duplicate_finder.AddAsciiSymbol(scanner()->literal_ascii_string(), 1); + if (scanner()->is_literal_one_byte()) { + prev_value = duplicate_finder.AddAsciiSymbol( + scanner()->literal_one_byte_string(), 1); } else { prev_value = duplicate_finder.AddUtf16Symbol(scanner()->literal_utf16_string(), 1); @@ -1285,8 +1274,8 @@ PreParser::Expression PreParser::ParseV8Intrinsic(bool* ok) { void PreParser::LogSymbol() { int identifier_pos = position(); - if (scanner()->is_literal_ascii()) { - log_->LogAsciiSymbol(identifier_pos, scanner()->literal_ascii_string()); + if (scanner()->is_literal_one_byte()) { + log_->LogAsciiSymbol(identifier_pos, scanner()->literal_one_byte_string()); } else { log_->LogUtf16Symbol(identifier_pos, scanner()->literal_utf16_string()); } diff --git a/src/preparser.h b/src/preparser.h index 59a9a6108..55685a124 100644 --- a/src/preparser.h +++ b/src/preparser.h @@ -1114,12 +1114,7 @@ ParserBase::ParseIdentifierNameOrGetOrSet(bool* is_get, bool* ok) { typename Traits::Type::Identifier result = ParseIdentifierName(ok); if (!*ok) return Traits::EmptyIdentifier(); - if (scanner()->is_literal_ascii() && - scanner()->literal_length() == 3) { - const char* token = scanner()->literal_ascii_string().start(); - *is_get = strncmp(token, "get", 3) == 0; - *is_set = !*is_get && strncmp(token, "set", 3) == 0; - } + scanner()->IsGetOrSet(is_get, is_set); return result; } @@ -1517,9 +1512,9 @@ void ParserBase::ObjectLiteralChecker::CheckProperty( bool* ok) { int old; if (property == Token::NUMBER) { - old = finder_.AddNumber(scanner()->literal_ascii_string(), type); - } else if (scanner()->is_literal_ascii()) { - old = finder_.AddAsciiSymbol(scanner()->literal_ascii_string(), type); + old = finder_.AddNumber(scanner()->literal_one_byte_string(), type); + } else if (scanner()->is_literal_one_byte()) { + old = finder_.AddAsciiSymbol(scanner()->literal_one_byte_string(), type); } else { old = finder_.AddUtf16Symbol(scanner()->literal_utf16_string(), type); } diff --git a/src/scanner.cc b/src/scanner.cc index 27768547f..45e0cae87 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -35,6 +35,7 @@ #include "char-predicates-inl.h" #include "conversions-inl.h" #include "list-inl.h" +#include "v8.h" namespace v8 { namespace internal { @@ -982,8 +983,8 @@ Token::Value Scanner::ScanIdentifierOrKeyword() { literal.Complete(); - if (next_.literal_chars->is_ascii()) { - Vector chars = next_.literal_chars->ascii_literal(); + if (next_.literal_chars->is_one_byte()) { + Vector chars = next_.literal_chars->one_byte_literal(); return KeywordOrIdentifierToken(chars.start(), chars.length(), harmony_scoping_, @@ -1114,6 +1115,49 @@ bool Scanner::ScanRegExpFlags() { } +Handle Scanner::AllocateLiteralString(Isolate* isolate, + PretenureFlag tenured) { + if (is_literal_one_byte()) { + return isolate->factory()->NewStringFromOneByte( + Vector::cast(literal_one_byte_string()), tenured); + } else { + return isolate->factory()->NewStringFromTwoByte( + literal_utf16_string(), tenured); + } +} + + +Handle Scanner::AllocateNextLiteralString(Isolate* isolate, + PretenureFlag tenured) { + if (is_next_literal_one_byte()) { + return isolate->factory()->NewStringFromOneByte( + Vector::cast(next_literal_one_byte_string()), tenured); + } else { + return isolate->factory()->NewStringFromTwoByte( + next_literal_utf16_string(), tenured); + } +} + + +Handle Scanner::AllocateInternalizedString(Isolate* isolate) { + if (is_literal_one_byte()) { + return isolate->factory()->InternalizeOneByteString( + Vector::cast(literal_one_byte_string())); + } else { + return isolate->factory()->InternalizeTwoByteString( + literal_utf16_string()); + } +} + + +double Scanner::DoubleValue() { + ASSERT(is_literal_one_byte()); + return StringToDouble( + unicode_cache_, literal_one_byte_string(), + ALLOW_HEX | ALLOW_OCTAL | ALLOW_IMPLICIT_OCTAL | ALLOW_BINARY); +} + + int DuplicateFinder::AddAsciiSymbol(Vector key, int value) { return AddSymbol(Vector::cast(key), true, value); } @@ -1125,10 +1169,10 @@ int DuplicateFinder::AddUtf16Symbol(Vector key, int value) { int DuplicateFinder::AddSymbol(Vector key, - bool is_ascii, + bool is_one_byte, int value) { - uint32_t hash = Hash(key, is_ascii); - byte* encoding = BackupKey(key, is_ascii); + uint32_t hash = Hash(key, is_one_byte); + byte* encoding = BackupKey(key, is_one_byte); HashMap::Entry* entry = map_.Lookup(encoding, hash, true); int old_value = static_cast(reinterpret_cast(entry->value)); entry->value = @@ -1189,11 +1233,11 @@ bool DuplicateFinder::IsNumberCanonical(Vector number) { } -uint32_t DuplicateFinder::Hash(Vector key, bool is_ascii) { +uint32_t DuplicateFinder::Hash(Vector key, bool is_one_byte) { // Primitive hash function, almost identical to the one used // for strings (except that it's seeded by the length and ASCII-ness). int length = key.length(); - uint32_t hash = (length << 1) | (is_ascii ? 1 : 0) ; + uint32_t hash = (length << 1) | (is_one_byte ? 1 : 0) ; for (int i = 0; i < length; i++) { uint32_t c = key[i]; hash = (hash + c) * 1025; @@ -1211,39 +1255,39 @@ bool DuplicateFinder::Match(void* first, void* second) { // was ASCII. byte* s1 = reinterpret_cast(first); byte* s2 = reinterpret_cast(second); - uint32_t length_ascii_field = 0; + uint32_t length_one_byte_field = 0; byte c1; do { c1 = *s1; if (c1 != *s2) return false; - length_ascii_field = (length_ascii_field << 7) | (c1 & 0x7f); + length_one_byte_field = (length_one_byte_field << 7) | (c1 & 0x7f); s1++; s2++; } while ((c1 & 0x80) != 0); - int length = static_cast(length_ascii_field >> 1); + int length = static_cast(length_one_byte_field >> 1); return memcmp(s1, s2, length) == 0; } byte* DuplicateFinder::BackupKey(Vector bytes, - bool is_ascii) { - uint32_t ascii_length = (bytes.length() << 1) | (is_ascii ? 1 : 0); + bool is_one_byte) { + uint32_t one_byte_length = (bytes.length() << 1) | (is_one_byte ? 1 : 0); backing_store_.StartSequence(); - // Emit ascii_length as base-128 encoded number, with the 7th bit set + // Emit one_byte_length as base-128 encoded number, with the 7th bit set // on the byte of every heptet except the last, least significant, one. - if (ascii_length >= (1 << 7)) { - if (ascii_length >= (1 << 14)) { - if (ascii_length >= (1 << 21)) { - if (ascii_length >= (1 << 28)) { - backing_store_.Add(static_cast((ascii_length >> 28) | 0x80)); + if (one_byte_length >= (1 << 7)) { + if (one_byte_length >= (1 << 14)) { + if (one_byte_length >= (1 << 21)) { + if (one_byte_length >= (1 << 28)) { + backing_store_.Add(static_cast((one_byte_length >> 28) | 0x80)); } - backing_store_.Add(static_cast((ascii_length >> 21) | 0x80u)); + backing_store_.Add(static_cast((one_byte_length >> 21) | 0x80u)); } - backing_store_.Add(static_cast((ascii_length >> 14) | 0x80u)); + backing_store_.Add(static_cast((one_byte_length >> 14) | 0x80u)); } - backing_store_.Add(static_cast((ascii_length >> 7) | 0x80u)); + backing_store_.Add(static_cast((one_byte_length >> 7) | 0x80u)); } - backing_store_.Add(static_cast(ascii_length & 0x7f)); + backing_store_.Add(static_cast(one_byte_length & 0x7f)); backing_store_.AddBlock(bytes); return backing_store_.EndSequence().start(); diff --git a/src/scanner.h b/src/scanner.h index b08692b3a..b6a5603ae 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -176,19 +176,19 @@ class DuplicateFinder { int AddNumber(Vector key, int value); private: - int AddSymbol(Vector key, bool is_ascii, int value); + int AddSymbol(Vector key, bool is_one_byte, int value); // Backs up the key and its length in the backing store. // The backup is stored with a base 127 encoding of the // length (plus a bit saying whether the string is ASCII), // followed by the bytes of the key. - byte* BackupKey(Vector key, bool is_ascii); + byte* BackupKey(Vector key, bool is_one_byte); // Compare two encoded keys (both pointing into the backing store) // for having the same base-127 encoded lengths and ASCII-ness, // and then having the same 'length' bytes following. static bool Match(void* first, void* second); // Creates a hash from a sequence of bytes. - static uint32_t Hash(Vector key, bool is_ascii); + static uint32_t Hash(Vector key, bool is_one_byte); // Checks whether a string containing a JS number is its canonical // form. static bool IsNumberCanonical(Vector key); @@ -211,7 +211,7 @@ class DuplicateFinder { class LiteralBuffer { public: - LiteralBuffer() : is_ascii_(true), position_(0), backing_store_() { } + LiteralBuffer() : is_one_byte_(true), position_(0), backing_store_() { } ~LiteralBuffer() { if (backing_store_.length() > 0) { @@ -221,7 +221,7 @@ class LiteralBuffer { INLINE(void AddChar(uint32_t code_unit)) { if (position_ >= backing_store_.length()) ExpandBuffer(); - if (is_ascii_) { + if (is_one_byte_) { if (code_unit <= unibrow::Latin1::kMaxChar) { backing_store_[position_] = static_cast(code_unit); position_ += kOneByteSize; @@ -234,35 +234,35 @@ class LiteralBuffer { position_ += kUC16Size; } - bool is_ascii() { return is_ascii_; } + bool is_one_byte() { return is_one_byte_; } bool is_contextual_keyword(Vector keyword) { - return is_ascii() && keyword.length() == position_ && + return is_one_byte() && keyword.length() == position_ && (memcmp(keyword.start(), backing_store_.start(), position_) == 0); } Vector utf16_literal() { - ASSERT(!is_ascii_); + ASSERT(!is_one_byte_); ASSERT((position_ & 0x1) == 0); return Vector( reinterpret_cast(backing_store_.start()), position_ >> 1); } - Vector ascii_literal() { - ASSERT(is_ascii_); + Vector one_byte_literal() { + ASSERT(is_one_byte_); return Vector( reinterpret_cast(backing_store_.start()), position_); } int length() { - return is_ascii_ ? position_ : (position_ >> 1); + return is_one_byte_ ? position_ : (position_ >> 1); } void Reset() { position_ = 0; - is_ascii_ = true; + is_one_byte_ = true; } private: @@ -284,7 +284,7 @@ class LiteralBuffer { } void ConvertToUtf16() { - ASSERT(is_ascii_); + ASSERT(is_one_byte_); Vector new_store; int new_content_size = position_ * kUC16Size; if (new_content_size >= backing_store_.length()) { @@ -304,10 +304,10 @@ class LiteralBuffer { backing_store_ = new_store; } position_ = new_content_size; - is_ascii_ = false; + is_one_byte_ = false; } - bool is_ascii_; + bool is_one_byte_; int position_; Vector backing_store_; @@ -376,17 +376,17 @@ class Scanner { // numbers. // These functions only give the correct result if the literal // was scanned between calls to StartLiteral() and TerminateLiteral(). - Vector literal_ascii_string() { + Vector literal_one_byte_string() { ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->ascii_literal(); + return current_.literal_chars->one_byte_literal(); } Vector literal_utf16_string() { ASSERT_NOT_NULL(current_.literal_chars); return current_.literal_chars->utf16_literal(); } - bool is_literal_ascii() { + bool is_literal_one_byte() { ASSERT_NOT_NULL(current_.literal_chars); - return current_.literal_chars->is_ascii(); + return current_.literal_chars->is_one_byte(); } bool is_literal_contextual_keyword(Vector keyword) { ASSERT_NOT_NULL(current_.literal_chars); @@ -416,17 +416,17 @@ class Scanner { // Returns the literal string for the next token (the token that // would be returned if Next() were called). - Vector next_literal_ascii_string() { + Vector next_literal_one_byte_string() { ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->ascii_literal(); + return next_.literal_chars->one_byte_literal(); } Vector next_literal_utf16_string() { ASSERT_NOT_NULL(next_.literal_chars); return next_.literal_chars->utf16_literal(); } - bool is_next_literal_ascii() { + bool is_next_literal_one_byte() { ASSERT_NOT_NULL(next_.literal_chars); - return next_.literal_chars->is_ascii(); + return next_.literal_chars->is_one_byte(); } bool is_next_contextual_keyword(Vector keyword) { ASSERT_NOT_NULL(next_.literal_chars); @@ -437,6 +437,30 @@ class Scanner { return next_.literal_chars->length(); } + Handle AllocateLiteralString(Isolate* isolate, PretenureFlag tenured); + Handle AllocateNextLiteralString(Isolate* isolate, + PretenureFlag tenured); + Handle AllocateInternalizedString(Isolate* isolate); + + double DoubleValue(); + bool UnescapedLiteralMatches(const char* data, int length) { + if (is_literal_one_byte() && + literal_length() == length && + !literal_contains_escapes()) { + return !strncmp(literal_one_byte_string().start(), data, length); + } + return false; + } + void IsGetOrSet(bool* is_get, bool* is_set) { + if (is_literal_one_byte() && + literal_length() == 3 && + !literal_contains_escapes()) { + const char* token = literal_one_byte_string().start(); + *is_get = strncmp(token, "get", 3) == 0; + *is_set = !*is_get && strncmp(token, "set", 3) == 0; + } + } + UnicodeCache* unicode_cache() { return unicode_cache_; } static const int kCharacterLookaheadBufferSize = 1; diff --git a/test/cctest/test-parsing.cc b/test/cctest/test-parsing.cc index bd88b2f6c..3a20595db 100644 --- a/test/cctest/test-parsing.cc +++ b/test/cctest/test-parsing.cc @@ -802,8 +802,8 @@ void TestScanRegExp(const char* re_source, const char* expected) { CHECK(start == i::Token::DIV || start == i::Token::ASSIGN_DIV); CHECK(scanner.ScanRegExpPattern(start == i::Token::ASSIGN_DIV)); scanner.Next(); // Current token is now the regexp literal. - CHECK(scanner.is_literal_ascii()); - i::Vector actual = scanner.literal_ascii_string(); + CHECK(scanner.is_literal_one_byte()); + i::Vector actual = scanner.literal_one_byte_string(); for (int i = 0; i < actual.length(); i++) { CHECK_NE('\0', expected[i]); CHECK_EQ(expected[i], actual[i]); -- 2.34.1