int flags,
ParserRecorder* recorder) {
Isolate* isolate = Isolate::Current();
- JavaScriptScanner scanner(isolate->unicode_cache());
+ Scanner scanner(isolate->unicode_cache());
scanner.SetHarmonyScoping((flags & kHarmonyScoping) != 0);
scanner.Initialize(source);
intptr_t stack_limit = isolate->stack_guard()->real_climit();
void ReportMessage(const char* message, Vector<const char*> args);
bool inside_with() const { return top_scope_->inside_with(); }
- JavaScriptScanner& scanner() { return scanner_; }
+ Scanner& scanner() { return scanner_; }
Mode mode() const { return mode_; }
ScriptDataImpl* pre_data() const { return pre_data_; }
ZoneList<Handle<String> > symbol_cache_;
Handle<Script> script_;
- JavaScriptScanner scanner_;
+ Scanner scanner_;
Scope* top_scope_;
internal::InputStreamUTF16Buffer buffer(input);
uintptr_t stack_limit = reinterpret_cast<uintptr_t>(&buffer) - max_stack;
internal::UnicodeCache unicode_cache;
- internal::JavaScriptScanner scanner(&unicode_cache);
+ internal::Scanner scanner(&unicode_cache);
scanner.Initialize(&buffer);
internal::CompleteParserRecorder recorder;
preparser::PreParser::PreParseResult result =
if (token == i::Token::ILLEGAL && stack_overflow_) {
return;
}
- i::JavaScriptScanner::Location source_location = scanner_->location();
+ i::Scanner::Location source_location = scanner_->location();
// Four of the tokens are treated specially
switch (token) {
Expect(i::Token::THROW, CHECK_OK);
if (scanner_->HasAnyLineTerminatorBeforeNext()) {
- i::JavaScriptScanner::Location pos = scanner_->location();
+ i::Scanner::Location pos = scanner_->location();
ReportMessageAt(pos, "newline_after_throw", NULL);
*ok = false;
return Statement::Default();
// success (even if parsing failed, the pre-parse data successfully
// captured the syntax error), and false if a stack-overflow happened
// during parsing.
- static PreParseResult PreParseProgram(i::JavaScriptScanner* scanner,
+ static PreParseResult PreParseProgram(i::Scanner* scanner,
i::ParserRecorder* log,
int flags,
uintptr_t stack_limit) {
};
// Private constructor only used in PreParseProgram.
- PreParser(i::JavaScriptScanner* scanner,
+ PreParser(i::Scanner* scanner,
i::ParserRecorder* log,
uintptr_t stack_limit,
bool allow_lazy,
Identifier identifier,
bool* ok);
- i::JavaScriptScanner* scanner_;
+ i::Scanner* scanner_;
i::ParserRecorder* log_;
Scope* scope_;
uintptr_t stack_limit_;
namespace internal {
// ----------------------------------------------------------------------------
-// Scanner::LiteralScope
-
-Scanner::LiteralScope::LiteralScope(Scanner* self)
- : scanner_(self), complete_(false) {
- self->StartLiteral();
-}
-
+// Scanner
-Scanner::LiteralScope::~LiteralScope() {
- if (!complete_) scanner_->DropLiteral();
-}
+Scanner::Scanner(UnicodeCache* unicode_cache)
+ : unicode_cache_(unicode_cache),
+ octal_pos_(Location::invalid()),
+ harmony_scoping_(false) { }
-void Scanner::LiteralScope::Complete() {
- scanner_->TerminateLiteral();
- complete_ = true;
+void Scanner::Initialize(UC16CharacterStream* source) {
+ source_ = source;
+ // Need to capture identifiers in order to recognize "get" and "set"
+ // in object literals.
+ Init();
+ // Skip initial whitespace allowing HTML comment ends just like
+ // after a newline and scan first token.
+ has_line_terminator_before_next_ = true;
+ SkipWhiteSpace();
+ Scan();
}
-// ----------------------------------------------------------------------------
-// Scanner
-
-Scanner::Scanner(UnicodeCache* unicode_cache)
- : unicode_cache_(unicode_cache) { }
-
uc32 Scanner::ScanHexNumber(int expected_length) {
ASSERT(expected_length <= 4); // prevent overflow
}
-
-// ----------------------------------------------------------------------------
-// JavaScriptScanner
-
-JavaScriptScanner::JavaScriptScanner(UnicodeCache* scanner_contants)
- : Scanner(scanner_contants),
- octal_pos_(Location::invalid()),
- harmony_scoping_(false) { }
-
-
-void JavaScriptScanner::Initialize(UC16CharacterStream* source) {
- source_ = source;
- // Need to capture identifiers in order to recognize "get" and "set"
- // in object literals.
- Init();
- // Skip initial whitespace allowing HTML comment ends just like
- // after a newline and scan first token.
- has_line_terminator_before_next_ = true;
- SkipWhiteSpace();
- Scan();
-}
-
-
// Ensure that tokens can be stored in a byte.
STATIC_ASSERT(Token::NUM_TOKENS <= 0x100);
};
-Token::Value JavaScriptScanner::Next() {
+Token::Value Scanner::Next() {
current_ = next_;
has_line_terminator_before_next_ = false;
has_multiline_comment_before_next_ = false;
}
-bool JavaScriptScanner::SkipWhiteSpace() {
+bool Scanner::SkipWhiteSpace() {
int start_position = source_pos();
while (true) {
}
-Token::Value JavaScriptScanner::SkipSingleLineComment() {
+Token::Value Scanner::SkipSingleLineComment() {
Advance();
// The line terminator at the end of the line is not considered
}
-Token::Value JavaScriptScanner::SkipMultiLineComment() {
+Token::Value Scanner::SkipMultiLineComment() {
ASSERT(c0_ == '*');
Advance();
}
-Token::Value JavaScriptScanner::ScanHtmlComment() {
+Token::Value Scanner::ScanHtmlComment() {
// Check for <!-- comments.
ASSERT(c0_ == '!');
Advance();
}
-void JavaScriptScanner::Scan() {
+void Scanner::Scan() {
next_.literal_chars = NULL;
Token::Value token;
do {
}
-void JavaScriptScanner::SeekForward(int pos) {
+void Scanner::SeekForward(int pos) {
// After this call, we will have the token at the given position as
// the "next" token. The "current" token will be invalid.
if (pos == next_.location.beg_pos) return;
}
-void JavaScriptScanner::ScanEscape() {
+void Scanner::ScanEscape() {
uc32 c = c0_;
Advance();
// Octal escapes of the forms '\0xx' and '\xxx' are not a part of
// ECMA-262. Other JS VMs support them.
-uc32 JavaScriptScanner::ScanOctalEscape(uc32 c, int length) {
+uc32 Scanner::ScanOctalEscape(uc32 c, int length) {
uc32 x = c - '0';
int i = 0;
for (; i < length; i++) {
}
-Token::Value JavaScriptScanner::ScanString() {
+Token::Value Scanner::ScanString() {
uc32 quote = c0_;
Advance(); // consume quote
}
-void JavaScriptScanner::ScanDecimalDigits() {
+void Scanner::ScanDecimalDigits() {
while (IsDecimalDigit(c0_))
AddLiteralCharAdvance();
}
-Token::Value JavaScriptScanner::ScanNumber(bool seen_period) {
+Token::Value Scanner::ScanNumber(bool seen_period) {
ASSERT(IsDecimalDigit(c0_)); // the first digit of the number or the fraction
enum { DECIMAL, HEX, OCTAL } kind = DECIMAL;
}
-uc32 JavaScriptScanner::ScanIdentifierUnicodeEscape() {
+uc32 Scanner::ScanIdentifierUnicodeEscape() {
Advance();
if (c0_ != 'u') return -1;
Advance();
}
-Token::Value JavaScriptScanner::ScanIdentifierOrKeyword() {
+Token::Value Scanner::ScanIdentifierOrKeyword() {
ASSERT(unicode_cache_->IsIdentifierStart(c0_));
LiteralScope literal(this);
// Scan identifier start character.
}
-Token::Value JavaScriptScanner::ScanIdentifierSuffix(LiteralScope* literal) {
+Token::Value Scanner::ScanIdentifierSuffix(LiteralScope* literal) {
// Scan the rest of the identifier characters.
while (unicode_cache_->IsIdentifierPart(c0_)) {
if (c0_ == '\\') {
}
-bool JavaScriptScanner::ScanRegExpPattern(bool seen_equal) {
+bool Scanner::ScanRegExpPattern(bool seen_equal) {
// Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
bool in_character_class = false;
}
-bool JavaScriptScanner::ScanLiteralUnicodeEscape() {
+bool Scanner::ScanLiteralUnicodeEscape() {
ASSERT(c0_ == '\\');
uc32 chars_read[6] = {'\\', 'u', 0, 0, 0, 0};
Advance();
}
-bool JavaScriptScanner::ScanRegExpFlags() {
+bool Scanner::ScanRegExpFlags() {
// Scan regular expression flags.
LiteralScope literal(this);
while (unicode_cache_->IsIdentifierPart(c0_)) {
// ----------------------------------------------------------------------------
-// Scanner base-class.
+// JavaScript Scanner.
-// Generic functionality used by both JSON and JavaScript scanners.
class Scanner {
public:
- // -1 is outside of the range of any real source code.
- static const int kNoOctalLocation = -1;
-
- typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
-
+ // Scoped helper for literal recording. Automatically drops the literal
+ // if aborting the scanning before it's complete.
class LiteralScope {
public:
- explicit LiteralScope(Scanner* self);
- ~LiteralScope();
- void Complete();
+ explicit LiteralScope(Scanner* self)
+ : scanner_(self), complete_(false) {
+ scanner_->StartLiteral();
+ }
+ ~LiteralScope() {
+ if (!complete_) scanner_->DropLiteral();
+ }
+ void Complete() {
+ scanner_->TerminateLiteral();
+ complete_ = true;
+ }
private:
Scanner* scanner_;
bool complete_;
};
- explicit Scanner(UnicodeCache* scanner_contants);
-
- // Returns the current token again.
- Token::Value current_token() { return current_.token; }
-
- // One token look-ahead (past the token returned by Next()).
- Token::Value peek() const { return next_.token; }
-
+ // Representation of an interval of source positions.
struct Location {
Location(int b, int e) : beg_pos(b), end_pos(e) { }
Location() : beg_pos(0), end_pos(0) { }
int end_pos;
};
+ // -1 is outside of the range of any real source code.
+ static const int kNoOctalLocation = -1;
+
+ typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
+
+ explicit Scanner(UnicodeCache* scanner_contants);
+
+ void Initialize(UC16CharacterStream* source);
+
+ // Returns the next token and advances input.
+ Token::Value Next();
+ // Returns the current token again.
+ Token::Value current_token() { return current_.token; }
// Returns the location information for the current token
- // (the token returned by Next()).
+ // (the token last returned by Next()).
Location location() const { return current_.location; }
- Location peek_location() const { return next_.location; }
-
// Returns the literal string, if any, for the current token (the
- // token returned by Next()). The string is 0-terminated and in
- // UTF-8 format; they may contain 0-characters. Literal strings are
- // collected for identifiers, strings, and numbers.
+ // token last returned by Next()). The string is 0-terminated.
+ // Literal strings are collected for identifiers, strings, and
+ // numbers.
// These functions only give the correct result if the literal
// was scanned between calls to StartLiteral() and TerminateLiteral().
- bool is_literal_ascii() {
- ASSERT_NOT_NULL(current_.literal_chars);
- return current_.literal_chars->is_ascii();
- }
Vector<const char> literal_ascii_string() {
ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->ascii_literal();
ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->uc16_literal();
}
+ bool is_literal_ascii() {
+ ASSERT_NOT_NULL(current_.literal_chars);
+ return current_.literal_chars->is_ascii();
+ }
int literal_length() const {
ASSERT_NOT_NULL(current_.literal_chars);
return current_.literal_chars->length();
return current_.literal_chars->length() != source_length;
}
+ // Similar functions for the upcoming token.
+
+ // One token look-ahead (past the token returned by Next()).
+ Token::Value peek() const { return next_.token; }
+
+ Location peek_location() const { return next_.location; }
+
// Returns the literal string for the next token (the token that
// would be returned if Next() were called).
- bool is_next_literal_ascii() {
- ASSERT_NOT_NULL(next_.literal_chars);
- return next_.literal_chars->is_ascii();
- }
Vector<const char> next_literal_ascii_string() {
ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->ascii_literal();
ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->uc16_literal();
}
+ bool is_next_literal_ascii() {
+ ASSERT_NOT_NULL(next_.literal_chars);
+ return next_.literal_chars->is_ascii();
+ }
int next_literal_length() const {
ASSERT_NOT_NULL(next_.literal_chars);
return next_.literal_chars->length();
static const int kCharacterLookaheadBufferSize = 1;
- protected:
+ // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
+ uc32 ScanOctalEscape(uc32 c, int length);
+
+ // Returns the location of the last seen octal literal.
+ Location octal_position() const { return octal_pos_; }
+ void clear_octal_position() { octal_pos_ = Location::invalid(); }
+
+ // Seek forward to the given position. This operation does not
+ // work in general, for instance when there are pushed back
+ // characters, but works for seeking forward until simple delimiter
+ // tokens, which is what it is used for.
+ void SeekForward(int pos);
+
+ bool HarmonyScoping() const {
+ return harmony_scoping_;
+ }
+ void SetHarmonyScoping(bool block_scoping) {
+ harmony_scoping_ = block_scoping;
+ }
+
+
+ // Returns true if there was a line terminator before the peek'ed token,
+ // possibly inside a multi-line comment.
+ bool HasAnyLineTerminatorBeforeNext() const {
+ return has_line_terminator_before_next_ ||
+ has_multiline_comment_before_next_;
+ }
+
+ // Scans the input as a regular expression pattern, previous
+ // character(s) must be /(=). Returns true if a pattern is scanned.
+ bool ScanRegExpPattern(bool seen_equal);
+ // Returns true if regexp flags are scanned (always since flags can
+ // be empty).
+ bool ScanRegExpFlags();
+
+ // Tells whether the buffer contains an identifier (no escapes).
+ // Used for checking if a property name is an identifier.
+ static bool IsIdentifier(unibrow::CharacterStream* buffer);
+
+ private:
// The current and look-ahead token.
struct TokenDesc {
Token::Value token;
uc32 ScanHexNumber(int expected_length);
- // Return the current source position.
- int source_pos() {
- return source_->pos() - kCharacterLookaheadBufferSize;
- }
-
- UnicodeCache* unicode_cache_;
-
- // Buffers collecting literal strings, numbers, etc.
- LiteralBuffer literal_buffer1_;
- LiteralBuffer literal_buffer2_;
-
- TokenDesc current_; // desc for current token (as returned by Next())
- TokenDesc next_; // desc for next token (one token look-ahead)
-
- // Input stream. Must be initialized to an UC16CharacterStream.
- UC16CharacterStream* source_;
-
- // One Unicode character look-ahead; c0_ < 0 at the end of the input.
- uc32 c0_;
-};
-
-// ----------------------------------------------------------------------------
-// JavaScriptScanner - base logic for JavaScript scanning.
-
-class JavaScriptScanner : public Scanner {
- public:
- // A LiteralScope that disables recording of some types of JavaScript
- // literals. If the scanner is configured to not record the specific
- // type of literal, the scope will not call StartLiteral.
- class LiteralScope {
- public:
- explicit LiteralScope(JavaScriptScanner* self)
- : scanner_(self), complete_(false) {
- scanner_->StartLiteral();
- }
- ~LiteralScope() {
- if (!complete_) scanner_->DropLiteral();
- }
- void Complete() {
- scanner_->TerminateLiteral();
- complete_ = true;
- }
-
- private:
- JavaScriptScanner* scanner_;
- bool complete_;
- };
-
- explicit JavaScriptScanner(UnicodeCache* scanner_contants);
-
- void Initialize(UC16CharacterStream* source);
-
- // Returns the next token.
- Token::Value Next();
-
- // Returns true if there was a line terminator before the peek'ed token,
- // possibly inside a multi-line comment.
- bool HasAnyLineTerminatorBeforeNext() const {
- return has_line_terminator_before_next_ ||
- has_multiline_comment_before_next_;
- }
-
- // Scans the input as a regular expression pattern, previous
- // character(s) must be /(=). Returns true if a pattern is scanned.
- bool ScanRegExpPattern(bool seen_equal);
- // Returns true if regexp flags are scanned (always since flags can
- // be empty).
- bool ScanRegExpFlags();
-
- // Tells whether the buffer contains an identifier (no escapes).
- // Used for checking if a property name is an identifier.
- static bool IsIdentifier(unibrow::CharacterStream* buffer);
-
- // Scans octal escape sequence. Also accepts "\0" decimal escape sequence.
- uc32 ScanOctalEscape(uc32 c, int length);
-
- // Returns the location of the last seen octal literal
- Location octal_position() const { return octal_pos_; }
- void clear_octal_position() { octal_pos_ = Location::invalid(); }
-
- // Seek forward to the given position. This operation does not
- // work in general, for instance when there are pushed back
- // characters, but works for seeking forward until simple delimiter
- // tokens, which is what it is used for.
- void SeekForward(int pos);
-
- bool HarmonyScoping() const {
- return harmony_scoping_;
- }
- void SetHarmonyScoping(bool block_scoping) {
- harmony_scoping_ = block_scoping;
- }
-
+ // Scans a single JavaScript token.
+ void Scan();
- protected:
bool SkipWhiteSpace();
Token::Value SkipSingleLineComment();
Token::Value SkipMultiLineComment();
-
- // Scans a single JavaScript token.
- void Scan();
+ // Scans a possible HTML comment -- begins with '<!'.
+ Token::Value ScanHtmlComment();
void ScanDecimalDigits();
Token::Value ScanNumber(bool seen_period);
void ScanEscape();
Token::Value ScanString();
- // Scans a possible HTML comment -- begins with '<!'.
- Token::Value ScanHtmlComment();
-
// Decodes a unicode escape-sequence which is part of an identifier.
// If the escape sequence cannot be decoded the result is kBadChar.
uc32 ScanIdentifierUnicodeEscape();
// flags.
bool ScanLiteralUnicodeEscape();
+ // Return the current source position.
+ int source_pos() {
+ return source_->pos() - kCharacterLookaheadBufferSize;
+ }
+
+ UnicodeCache* unicode_cache_;
+
+ // Buffers collecting literal strings, numbers, etc.
+ LiteralBuffer literal_buffer1_;
+ LiteralBuffer literal_buffer2_;
+
+ TokenDesc current_; // desc for current token (as returned by Next())
+ TokenDesc next_; // desc for next token (one token look-ahead)
+
+ // Input stream. Must be initialized to an UC16CharacterStream.
+ UC16CharacterStream* source_;
+
+
// Start position of the octal literal last scanned.
Location octal_pos_;
+ // One Unicode character look-ahead; c0_ < 0 at the end of the input.
+ uc32 c0_;
+
// Whether there is a line terminator whitespace character after
// the current token, and before the next. Does not count newlines
// inside multiline comments.
CHECK(static_cast<int>(sizeof(buffer)) >= length);
{
i::Utf8ToUC16CharacterStream stream(keyword, length);
- i::JavaScriptScanner scanner(&unicode_cache);
+ i::Scanner scanner(&unicode_cache);
// The scanner should parse 'let' as Token::LET for this test.
scanner.SetHarmonyScoping(true);
scanner.Initialize(&stream);
// Removing characters will make keyword matching fail.
{
i::Utf8ToUC16CharacterStream stream(keyword, length - 1);
- i::JavaScriptScanner scanner(&unicode_cache);
+ i::Scanner scanner(&unicode_cache);
scanner.Initialize(&stream);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
memmove(buffer, keyword, length);
buffer[length] = chars_to_append[j];
i::Utf8ToUC16CharacterStream stream(buffer, length + 1);
- i::JavaScriptScanner scanner(&unicode_cache);
+ i::Scanner scanner(&unicode_cache);
scanner.Initialize(&stream);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
memmove(buffer, keyword, length);
buffer[length - 1] = '_';
i::Utf8ToUC16CharacterStream stream(buffer, length);
- i::JavaScriptScanner scanner(&unicode_cache);
+ i::Scanner scanner(&unicode_cache);
scanner.Initialize(&stream);
CHECK_EQ(i::Token::IDENTIFIER, scanner.Next());
CHECK_EQ(i::Token::EOS, scanner.Next());
reinterpret_cast<const i::byte*>(program),
static_cast<unsigned>(strlen(program)));
i::CompleteParserRecorder log;
- i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+ i::Scanner scanner(i::Isolate::Current()->unicode_cache());
scanner.Initialize(&stream);
int flags = i::kAllowLazy | i::kAllowNativesSyntax;
reinterpret_cast<const i::byte*>(program),
static_cast<unsigned>(strlen(program)));
i::CompleteParserRecorder log;
- i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+ i::Scanner scanner(i::Isolate::Current()->unicode_cache());
scanner.Initialize(&stream);
// Flags don't allow natives syntax.
reinterpret_cast<const i::byte*>(*program),
static_cast<unsigned>(kProgramSize));
i::CompleteParserRecorder log;
- i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+ i::Scanner scanner(i::Isolate::Current()->unicode_cache());
scanner.Initialize(&stream);
i::Token::Value* expected_tokens,
int skip_pos = 0, // Zero means not skipping.
int skip_to = 0) {
- i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+ i::Scanner scanner(i::Isolate::Current()->unicode_cache());
scanner.Initialize(stream);
int i = 0;
i::Utf8ToUC16CharacterStream stream(
reinterpret_cast<const i::byte*>(re_source),
static_cast<unsigned>(strlen(re_source)));
- i::JavaScriptScanner scanner(i::Isolate::Current()->unicode_cache());
+ i::Scanner scanner(i::Isolate::Current()->unicode_cache());
scanner.Initialize(&stream);
i::Token::Value start = scanner.peek();