From 23d0abb040d838adb0e9c505fafa7c2087152454 Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Tue, 24 Aug 2010 12:29:50 +0000 Subject: [PATCH] Ensure that scanner state is correctly reset when an error is encountered. Add a scope object to ensure that leaving a literal scanning prematurely will clean up after itself. Also reset the literal buffer if a scanner is reinitialized with a new source code. Review URL: http://codereview.chromium.org/3137037 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5327 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/scanner.cc | 93 +++++++++++++++++++++++++++++++++++++--------------------- src/scanner.h | 21 +++++++++++++ src/utils.h | 21 +++++++++++++ 3 files changed, 102 insertions(+), 33 deletions(-) diff --git a/src/scanner.cc b/src/scanner.cc index 7911be0..1a8d721 100755 --- a/src/scanner.cc +++ b/src/scanner.cc @@ -319,6 +319,26 @@ void KeywordMatcher::Step(uc32 input) { } + +// ---------------------------------------------------------------------------- +// Scanner::LiteralScope + +Scanner::LiteralScope::LiteralScope(Scanner* self) + : scanner_(self), complete_(false) { + self->StartLiteral(); +} + + +Scanner::LiteralScope::~LiteralScope() { + if (!complete_) scanner_->DropLiteral(); +} + + +void Scanner::LiteralScope::Complete() { + scanner_->TerminateLiteral(); + complete_ = true; +} + // ---------------------------------------------------------------------------- // Scanner @@ -386,8 +406,10 @@ void Scanner::Init(Handle source, // Set c0_ (one character ahead) ASSERT(kCharacterLookaheadBufferSize == 1); Advance(); - // Initialise current_ to not refer to a literal. + // Initialize current_ to not refer to a literal. current_.literal_chars = Vector(); + // Reset literal buffer. + literal_buffer_.Reset(); // Skip initial whitespace allowing HTML comment ends just like // after a newline and scan first token. @@ -423,11 +445,17 @@ void Scanner::AddChar(uc32 c) { literal_buffer_.AddChar(c); } + void Scanner::TerminateLiteral() { next_.literal_chars = literal_buffer_.EndLiteral(); } +void Scanner::DropLiteral() { + literal_buffer_.DropLiteral(); +} + + void Scanner::AddCharAdvance() { AddChar(c0_); Advance(); @@ -636,7 +664,7 @@ void Scanner::ScanJson() { Token::Value Scanner::ScanJsonString() { ASSERT_EQ('"', c0_); Advance(); - StartLiteral(); + LiteralScope literal(this); while (c0_ != '"' && c0_ > 0) { // Check for control character (0x00-0x1f) or unterminated string (<0). if (c0_ < 0x20) return Token::ILLEGAL; @@ -670,7 +698,9 @@ Token::Value Scanner::ScanJsonString() { for (int i = 0; i < 4; i++) { Advance(); int digit = HexValue(c0_); - if (digit < 0) return Token::ILLEGAL; + if (digit < 0) { + return Token::ILLEGAL; + } value = value * 16 + digit; } AddChar(value); @@ -685,14 +715,14 @@ Token::Value Scanner::ScanJsonString() { if (c0_ != '"') { return Token::ILLEGAL; } - TerminateLiteral(); + literal.Complete(); Advance(); return Token::STRING; } Token::Value Scanner::ScanJsonNumber() { - StartLiteral(); + LiteralScope literal(this); if (c0_ == '-') AddCharAdvance(); if (c0_ == '0') { AddCharAdvance(); @@ -720,21 +750,21 @@ Token::Value Scanner::ScanJsonNumber() { AddCharAdvance(); } while (c0_ >= '0' && c0_ <= '9'); } - TerminateLiteral(); + literal.Complete(); return Token::NUMBER; } Token::Value Scanner::ScanJsonIdentifier(const char* text, Token::Value token) { - StartLiteral(); + LiteralScope literal(this); while (*text != '\0') { if (c0_ != *text) return Token::ILLEGAL; Advance(); text++; } if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL; - TerminateLiteral(); + literal.Complete(); return token; } @@ -1077,7 +1107,7 @@ Token::Value Scanner::ScanString() { uc32 quote = c0_; Advance(); // consume quote - StartLiteral(); + LiteralScope literal(this); while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) { uc32 c = c0_; Advance(); @@ -1088,10 +1118,8 @@ Token::Value Scanner::ScanString() { AddChar(c); } } - if (c0_ != quote) { - return Token::ILLEGAL; - } - TerminateLiteral(); + if (c0_ != quote) return Token::ILLEGAL; + literal.Complete(); Advance(); // consume quote return Token::STRING; @@ -1127,7 +1155,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) { enum { DECIMAL, HEX, OCTAL } kind = DECIMAL; - StartLiteral(); + LiteralScope literal(this); if (seen_period) { // we have already seen a decimal point of the float AddChar('.'); @@ -1143,12 +1171,13 @@ Token::Value Scanner::ScanNumber(bool seen_period) { // hex number kind = HEX; AddCharAdvance(); - if (!IsHexDigit(c0_)) + if (!IsHexDigit(c0_)) { // we must have at least one hex digit after 'x'/'X' return Token::ILLEGAL; - while (IsHexDigit(c0_)) + } + while (IsHexDigit(c0_)) { AddCharAdvance(); - + } } else if ('0' <= c0_ && c0_ <= '7') { // (possible) octal number kind = OCTAL; @@ -1181,12 +1210,12 @@ Token::Value Scanner::ScanNumber(bool seen_period) { AddCharAdvance(); if (c0_ == '+' || c0_ == '-') AddCharAdvance(); - if (!IsDecimalDigit(c0_)) + if (!IsDecimalDigit(c0_)) { // we must have at least one decimal digit after 'e'/'E' return Token::ILLEGAL; + } ScanDecimalDigits(); } - TerminateLiteral(); // The source character immediately following a numeric literal must // not be an identifier start or a decimal digit; see ECMA-262 @@ -1195,6 +1224,8 @@ Token::Value Scanner::ScanNumber(bool seen_period) { if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_)) return Token::ILLEGAL; + literal.Complete(); + return Token::NUMBER; } @@ -1214,7 +1245,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() { Token::Value Scanner::ScanIdentifier() { ASSERT(kIsIdentifierStart.get(c0_)); - StartLiteral(); + LiteralScope literal(this); KeywordMatcher keyword_match; // Scan identifier start character. @@ -1244,7 +1275,7 @@ Token::Value Scanner::ScanIdentifier() { Advance(); } } - TerminateLiteral(); + literal.Complete(); return keyword_match.token(); } @@ -1274,36 +1305,32 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) { // Scan regular expression body: According to ECMA-262, 3rd, 7.8.5, // the scanner should pass uninterpreted bodies to the RegExp // constructor. - StartLiteral(); + LiteralScope literal(this); if (seen_equal) AddChar('='); while (c0_ != '/' || in_character_class) { - if (kIsLineTerminator.get(c0_) || c0_ < 0) - return false; + if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; if (c0_ == '\\') { // escaped character AddCharAdvance(); - if (kIsLineTerminator.get(c0_) || c0_ < 0) - return false; + if (kIsLineTerminator.get(c0_) || c0_ < 0) return false; AddCharAdvance(); } else { // unescaped character - if (c0_ == '[') - in_character_class = true; - if (c0_ == ']') - in_character_class = false; + if (c0_ == '[') in_character_class = true; + if (c0_ == ']') in_character_class = false; AddCharAdvance(); } } Advance(); // consume '/' - TerminateLiteral(); + literal.Complete(); return true; } bool Scanner::ScanRegExpFlags() { // Scan regular expression flags. - StartLiteral(); + LiteralScope literal(this); while (kIsIdentifierPart.get(c0_)) { if (c0_ == '\\') { uc32 c = ScanIdentifierUnicodeEscape(); @@ -1316,7 +1343,7 @@ bool Scanner::ScanRegExpFlags() { } AddCharAdvance(); } - TerminateLiteral(); + literal.Complete(); next_.location.end_pos = source_pos() - 1; return true; diff --git a/src/scanner.h b/src/scanner.h index 1c256f1..53aaad6 100644 --- a/src/scanner.h +++ b/src/scanner.h @@ -58,6 +58,14 @@ class UTF8Buffer { return Vector(sequence.start(), sequence.length()); } + void DropLiteral() { + buffer_.DropSequence(); + } + + void Reset() { + buffer_.Reset(); + } + // The end marker added after a parsed literal. // Using zero allows the usage of strlen and similar functions on // identifiers and numbers (but not strings, since they may contain zero @@ -262,6 +270,17 @@ class Scanner { public: typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder; + class LiteralScope { + public: + explicit LiteralScope(Scanner* self); + ~LiteralScope(); + void Complete(); + + private: + Scanner* scanner_; + bool complete_; + }; + // Construction explicit Scanner(ParserMode parse_mode); @@ -382,6 +401,8 @@ class Scanner { inline void AddChar(uc32 ch); inline void AddCharAdvance(); inline void TerminateLiteral(); + // Stops scanning of a literal, e.g., due to an encountered error. + inline void DropLiteral(); // Low-level scanning support. void Advance() { c0_ = source_->Advance(); } diff --git a/src/utils.h b/src/utils.h index 8eddb13..90fa74f 100644 --- a/src/utils.h +++ b/src/utils.h @@ -560,6 +560,15 @@ class Collector { return Vector(new_store, total_length); } + // Resets the collector to be empty. + virtual void Reset() { + for (int i = chunks_.length() - 1; i >= 0; i--) { + chunks_.at(i).Dispose(); + } + chunks_.Rewind(0); + index_ = 0; + } + protected: static const int kMinCapacity = 16; List > chunks_; @@ -632,6 +641,18 @@ class SequenceCollector : public Collector { this->index_ - sequence_start); } + // Drops the currently added sequence, and all collected elements in it. + void DropSequence() { + ASSERT(sequence_start_ != kNoSequence); + this->index_ = sequence_start_; + sequence_start_ = kNoSequence; + } + + virtual void Reset() { + sequence_start_ = kNoSequence; + this->Collector::Reset(); + } + private: static const int kNoSequence = -1; int sequence_start_; -- 2.7.4