From 44149ae751b9cd9addbfbc4920d1ca3ad9448b37 Mon Sep 17 00:00:00 2001 From: arv Date: Thu, 4 Dec 2014 11:24:18 -0800 Subject: [PATCH] Make template scan related function take a template parameter This is for performance. Having to do the test in every Advance was too expensive. BUG=438991, v8:3230 LOG=N R=dslomov@chromium.org, marja Review URL: https://codereview.chromium.org/766193003 Cr-Commit-Position: refs/heads/master@{#25667} --- src/scanner.cc | 56 ++++++++++++++++++++++++++++++++------------------------ src/scanner.h | 31 ++++++++++--------------------- 2 files changed, 42 insertions(+), 45 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 561c30b..96459f3 100644 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -34,7 +34,6 @@ Handle LiteralBuffer::Internalize(Isolate* isolate) const { Scanner::Scanner(UnicodeCache* unicode_cache) : unicode_cache_(unicode_cache), - capturing_raw_literal_(false), octal_pos_(Location::invalid()), harmony_scoping_(false), harmony_modules_(false), @@ -57,6 +56,7 @@ void Scanner::Initialize(Utf16CharacterStream* source) { } +template uc32 Scanner::ScanHexNumber(int expected_length) { DCHECK(expected_length <= 4); // prevent overflow @@ -67,13 +67,14 @@ uc32 Scanner::ScanHexNumber(int expected_length) { return -1; } x = x * 16 + d; - Advance(); + Advance(); } return x; } +template uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { uc32 x = 0; int d = HexValue(c0_); @@ -83,7 +84,7 @@ uc32 Scanner::ScanUnlimitedLengthHexNumber(int max_value) { while (d >= 0) { x = x * 16 + d; if (x > max_value) return -1; - Advance(); + Advance(); d = HexValue(c0_); } return x; @@ -696,16 +697,17 @@ void Scanner::SeekForward(int pos) { } +template bool Scanner::ScanEscape() { uc32 c = c0_; - Advance(); + Advance(); // Skip escaped newlines. if (c0_ >= 0 && unicode_cache_->IsLineTerminator(c)) { // Allow CR+LF newlines in multiline string literals. - if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); + if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance(); // Allow LF+CR newlines in multiline string literals. - if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); + if (IsLineFeed(c) && IsCarriageReturn(c0_)) Advance(); return true; } @@ -719,13 +721,13 @@ bool Scanner::ScanEscape() { case 'r' : c = '\r'; break; case 't' : c = '\t'; break; case 'u' : { - c = ScanUnicodeEscape(); + c = ScanUnicodeEscape(); if (c < 0) return false; break; } case 'v' : c = '\v'; break; case 'x' : { - c = ScanHexNumber(2); + c = ScanHexNumber(2); if (c < 0) return false; break; } @@ -736,7 +738,9 @@ bool Scanner::ScanEscape() { case '4' : // fall through case '5' : // fall through case '6' : // fall through - case '7' : c = ScanOctalEscape(c, 2); break; + case '7': + c = ScanOctalEscape(c, 2); + break; } // According to ECMA-262, section 7.8.4, characters not covered by the @@ -749,6 +753,7 @@ bool Scanner::ScanEscape() { // Octal escapes of the forms '\0xx' and '\xxx' are not a part of // ECMA-262. Other JS VMs support them. +template uc32 Scanner::ScanOctalEscape(uc32 c, int length) { uc32 x = c - '0'; int i = 0; @@ -758,7 +763,7 @@ uc32 Scanner::ScanOctalEscape(uc32 c, int length) { int nx = x * 8 + d; if (nx >= 256) break; x = nx; - Advance(); + Advance(); } // Anything except '\0' is an octal escape sequence, illegal in strict mode. // Remember the position of octal escape sequences so that an error @@ -782,7 +787,7 @@ Token::Value Scanner::ScanString() { uc32 c = c0_; Advance(); if (c == '\\') { - if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; + if (c0_ < 0 || !ScanEscape()) return Token::ILLEGAL; } else { AddLiteralChar(c); } @@ -810,17 +815,19 @@ Token::Value Scanner::ScanTemplateSpan() { // followed by an Expression. Token::Value result = Token::TEMPLATE_SPAN; - LiteralScope literal(this, true); + LiteralScope literal(this); + StartRawLiteral(); + const bool capture_raw = true; while (true) { uc32 c = c0_; - Advance(); + Advance(); if (c == '`') { result = Token::TEMPLATE_TAIL; ReduceRawLiteralLength(1); break; } else if (c == '$' && c0_ == '{') { - Advance(); // Consume '{' + Advance(); // Consume '{' ReduceRawLiteralLength(2); break; } else if (c == '\\') { @@ -828,20 +835,20 @@ Token::Value Scanner::ScanTemplateSpan() { // The TV of LineContinuation :: \ LineTerminatorSequence is the empty // code unit sequence. uc32 lastChar = c0_; - Advance(); + Advance(); if (lastChar == '\r') { ReduceRawLiteralLength(1); // Remove \r if (c0_ == '\n') { - Advance(); // Adds \n + Advance(); // Adds \n } else { AddRawLiteralChar('\n'); } } } else if (c0_ == '0') { - Advance(); + Advance(); AddLiteralChar('0'); } else { - ScanEscape(); + ScanEscape(); } } else if (c < 0) { // Unterminated template literal @@ -854,7 +861,7 @@ Token::Value Scanner::ScanTemplateSpan() { if (c == '\r') { ReduceRawLiteralLength(1); // Remove \r if (c0_ == '\n') { - Advance(); // Adds \n + Advance(); // Adds \n } else { AddRawLiteralChar('\n'); } @@ -1002,27 +1009,28 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() { Advance(); if (c0_ != 'u') return -1; Advance(); - return ScanUnicodeEscape(); + return ScanUnicodeEscape(); } +template uc32 Scanner::ScanUnicodeEscape() { // Accept both \uxxxx and \u{xxxxxx} (if harmony unicode escapes are // allowed). In the latter case, the number of hex digits between { } is // arbitrary. \ and u have already been read. if (c0_ == '{' && HarmonyUnicode()) { - Advance(); - uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff); + Advance(); + uc32 cp = ScanUnlimitedLengthHexNumber(0x10ffff); if (cp < 0) { return -1; } if (c0_ != '}') { return -1; } - Advance(); + Advance(); return cp; } - return ScanHexNumber(4); + return ScanHexNumber(4); } diff --git a/src/scanner.h b/src/scanner.h index 87ff20b..d96ed57 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -322,16 +322,13 @@ class Scanner { // if aborting the scanning before it's complete. class LiteralScope { public: - explicit LiteralScope(Scanner* self, bool capture_raw = false) - : scanner_(self), complete_(false) { + explicit LiteralScope(Scanner* self) : scanner_(self), complete_(false) { scanner_->StartLiteral(); - if (capture_raw) scanner_->StartRawLiteral(); } ~LiteralScope() { if (!complete_) scanner_->DropLiteral(); } void Complete() { - scanner_->TerminateLiteral(); complete_ = true; } @@ -506,6 +503,7 @@ class Scanner { static const int kCharacterLookaheadBufferSize = 1; // Scans octal escape sequence. Also accepts "\0" decimal escape sequence. + template uc32 ScanOctalEscape(uc32 c, int length); // Call this after setting source_ to the input. @@ -529,7 +527,6 @@ class Scanner { inline void StartRawLiteral() { raw_literal_buffer_.Reset(); next_.raw_literal_chars = &raw_literal_buffer_; - capturing_raw_literal_ = true; } INLINE(void AddLiteralChar(uc32 c)) { @@ -538,26 +535,20 @@ class Scanner { } INLINE(void AddRawLiteralChar(uc32 c)) { - DCHECK(capturing_raw_literal_); DCHECK_NOT_NULL(next_.raw_literal_chars); next_.raw_literal_chars->AddChar(c); } INLINE(void ReduceRawLiteralLength(int delta)) { - DCHECK(capturing_raw_literal_); DCHECK_NOT_NULL(next_.raw_literal_chars); next_.raw_literal_chars->ReduceLength(delta); } - // Complete scanning of a literal. - inline void TerminateLiteral() { capturing_raw_literal_ = false; } - // Stops scanning of a literal and drop the collected characters, // e.g., due to an encountered error. inline void DropLiteral() { next_.literal_chars = NULL; next_.raw_literal_chars = NULL; - capturing_raw_literal_ = false; } inline void AddLiteralCharAdvance() { @@ -566,8 +557,9 @@ class Scanner { } // Low-level scanning support. + template void Advance() { - if (capturing_raw_literal_) { + if (capture_raw) { AddRawLiteralChar(c0_); } c0_ = source_->Advance(); @@ -585,10 +577,8 @@ class Scanner { if (ch > static_cast(unibrow::Utf16::kMaxNonSurrogateCharCode)) { source_->PushBack(unibrow::Utf16::TrailSurrogate(c0_)); source_->PushBack(unibrow::Utf16::LeadSurrogate(c0_)); - if (capturing_raw_literal_) ReduceRawLiteralLength(2); } else { source_->PushBack(c0_); - if (capturing_raw_literal_) ReduceRawLiteralLength(1); } c0_ = ch; } @@ -613,8 +603,8 @@ class Scanner { // Literal strings are collected for identifiers, strings, numbers as well // as for template literals. For template literals we also collect the raw // form. - // These functions only give the correct result if the literal - // was scanned between calls to StartLiteral() and TerminateLiteral(). + // These functions only give the correct result if the literal was scanned + // when a LiteralScope object is alive. Vector literal_one_byte_string() { DCHECK_NOT_NULL(current_.literal_chars); return current_.literal_chars->one_byte_literal(); @@ -658,11 +648,12 @@ class Scanner { return current_.raw_literal_chars->is_one_byte(); } - + template uc32 ScanHexNumber(int expected_length); // Scan a number of any length but not bigger than max_value. For example, the // number can be 000000001, so it's very long in characters but its value is // small. + template uc32 ScanUnlimitedLengthHexNumber(int max_value); // Scans a single JavaScript token. @@ -686,11 +677,13 @@ class Scanner { // Scans an escape-sequence which is part of a string and adds the // decoded character to the current literal. Returns true if a pattern // is scanned. + template bool ScanEscape(); // Decodes a Unicode escape-sequence which is part of an identifier. // If the escape sequence cannot be decoded the result is kBadChar. uc32 ScanIdentifierUnicodeEscape(); // Helper for the above functions. + template uc32 ScanUnicodeEscape(); Token::Value ScanTemplateSpan(); @@ -713,10 +706,6 @@ class Scanner { // Buffer to store raw string values LiteralBuffer raw_literal_buffer_; - // We only need to capture the raw literal when we are scanning template - // literal spans. - bool capturing_raw_literal_; - TokenDesc current_; // desc for current token (as returned by Next()) TokenDesc next_; // desc for next token (one token look-ahead) -- 2.7.4