// ----------------------------------------------------------------------------
+// Keyword Matcher
+KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
+ { "break", KEYWORD_PREFIX, Token::BREAK },
+ { NULL, C, Token::ILLEGAL },
+ { NULL, D, Token::ILLEGAL },
+ { "else", KEYWORD_PREFIX, Token::ELSE },
+ { NULL, F, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, I, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, N, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { "return", KEYWORD_PREFIX, Token::RETURN },
+ { "switch", KEYWORD_PREFIX, Token::SWITCH },
+ { NULL, T, Token::ILLEGAL },
+ { NULL, UNMATCHABLE, Token::ILLEGAL },
+ { NULL, V, Token::ILLEGAL },
+ { NULL, W, Token::ILLEGAL }
+};
+
+
+void KeywordMatcher::Step(uc32 input) {
+ switch (state_) {
+ case INITIAL: {
+ // matching the first character is the only state with significant fanout.
+ // Match only lower-case letters in range 'b'..'w'.
+ unsigned int offset = input - kFirstCharRangeMin;
+ if (offset < kFirstCharRangeLength) {
+ state_ = first_states_[offset].state;
+ if (state_ == KEYWORD_PREFIX) {
+ keyword_ = first_states_[offset].keyword;
+ counter_ = 1;
+ keyword_token_ = first_states_[offset].token;
+ }
+ return;
+ }
+ break;
+ }
+ case KEYWORD_PREFIX:
+ if (keyword_[counter_] == input) {
+ ASSERT_NE(input, '\0');
+ counter_++;
+ if (keyword_[counter_] == '\0') {
+ state_ = KEYWORD_MATCHED;
+ token_ = keyword_token_;
+ }
+ return;
+ }
+ break;
+ case KEYWORD_MATCHED:
+ token_ = Token::IDENTIFIER;
+ break;
+ case C:
+ if (MatchState(input, 'a', CA)) return;
+ if (MatchState(input, 'o', CO)) return;
+ break;
+ case CA:
+ if (MatchKeywordStart(input, "case", 2, Token::CASE)) return;
+ if (MatchKeywordStart(input, "catch", 2, Token::CATCH)) return;
+ break;
+ case CO:
+ if (MatchState(input, 'n', CON)) return;
+ break;
+ case CON:
+ if (MatchKeywordStart(input, "const", 3, Token::CONST)) return;
+ if (MatchKeywordStart(input, "continue", 3, Token::CONTINUE)) return;
+ break;
+ case D:
+ if (MatchState(input, 'e', DE)) return;
+ if (MatchKeyword(input, 'o', KEYWORD_MATCHED, Token::DO)) return;
+ break;
+ case DE:
+ if (MatchKeywordStart(input, "debugger", 2, Token::DEBUGGER)) return;
+ if (MatchKeywordStart(input, "default", 2, Token::DEFAULT)) return;
+ if (MatchKeywordStart(input, "delete", 2, Token::DELETE)) return;
+ break;
+ case F:
+ if (MatchKeywordStart(input, "false", 1, Token::FALSE_LITERAL)) return;
+ if (MatchKeywordStart(input, "finally", 1, Token::FINALLY)) return;
+ if (MatchKeywordStart(input, "for", 1, Token::FOR)) return;
+ if (MatchKeywordStart(input, "function", 1, Token::FUNCTION)) return;
+ break;
+ case I:
+ if (MatchKeyword(input, 'f', KEYWORD_MATCHED, Token::IF)) return;
+ if (MatchKeyword(input, 'n', IN, Token::IN)) return;
+ break;
+ case IN:
+ token_ = Token::IDENTIFIER;
+ if (MatchKeywordStart(input, "instanceof", 2, Token::INSTANCEOF)) {
+ return;
+ }
+ break;
+ case N:
+ if (MatchKeywordStart(input, "native", 1, Token::NATIVE)) return;
+ if (MatchKeywordStart(input, "new", 1, Token::NEW)) return;
+ if (MatchKeywordStart(input, "null", 1, Token::NULL_LITERAL)) return;
+ break;
+ case T:
+ if (MatchState(input, 'h', TH)) return;
+ if (MatchState(input, 'r', TR)) return;
+ if (MatchKeywordStart(input, "typeof", 1, Token::TYPEOF)) return;
+ break;
+ case TH:
+ if (MatchKeywordStart(input, "this", 2, Token::THIS)) return;
+ if (MatchKeywordStart(input, "throw", 2, Token::THROW)) return;
+ break;
+ case TR:
+ if (MatchKeywordStart(input, "true", 2, Token::TRUE_LITERAL)) return;
+ if (MatchKeyword(input, 'y', KEYWORD_MATCHED, Token::TRY)) return;
+ break;
+ case V:
+ if (MatchKeywordStart(input, "var", 1, Token::VAR)) return;
+ if (MatchKeywordStart(input, "void", 1, Token::VOID)) return;
+ break;
+ case W:
+ if (MatchKeywordStart(input, "while", 1, Token::WHILE)) return;
+ if (MatchKeywordStart(input, "with", 1, Token::WITH)) return;
+ break;
+ default:
+ UNREACHABLE();
+ }
+ // On fallthrough, it's a failure.
+ state_ = UNMATCHABLE;
+}
+
+
+// ----------------------------------------------------------------------------
// Scanner
Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre) {
Token::Value Scanner::ScanIdentifier() {
ASSERT(kIsIdentifierStart.get(c0_));
- bool has_escapes = false;
StartLiteral();
+ KeywordMatcher keyword_match;
+
// Scan identifier start character.
if (c0_ == '\\') {
- has_escapes = true;
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier start characters.
if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
AddChar(c);
+ keyword_match.Fail();
} else {
AddChar(c0_);
+ keyword_match.AddChar(c0_);
Advance();
}
// Scan the rest of the identifier characters.
while (kIsIdentifierPart.get(c0_)) {
if (c0_ == '\\') {
- has_escapes = true;
uc32 c = ScanIdentifierUnicodeEscape();
// Only allow legal identifier part characters.
if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
AddChar(c);
+ keyword_match.Fail();
} else {
AddChar(c0_);
+ keyword_match.AddChar(c0_);
Advance();
}
}
TerminateLiteral();
- // We don't have any 1-letter keywords (this is probably a common case).
- if ((next_.literal_end - next_.literal_pos) == 1) {
- return Token::IDENTIFIER;
- }
-
- // If the identifier contains unicode escapes, it must not be
- // resolved to a keyword.
- if (has_escapes) {
- return Token::IDENTIFIER;
- }
-
- return Token::Lookup(&literals_.data()[next_.literal_pos]);
+ return keyword_match.token();
}
};
+class KeywordMatcher {
+// Incrementally recognize keywords.
+//
+// Recognized keywords:
+// break case catch const* continue debugger* default delete do else
+// finally false for function if in instanceof native* new null
+// return switch this throw true try typeof var void while with
+//
+// *: Actually "future reserved keywords". These are the only ones we
+// recognized, the remaining are allowed as identifiers.
+ public:
+ KeywordMatcher() : state_(INITIAL), token_(Token::IDENTIFIER) {}
+
+ Token::Value token() { return token_; }
+
+ inline void AddChar(uc32 input) {
+ if (state_ != UNMATCHABLE) {
+ Step(input);
+ }
+ }
+
+ void Fail() {
+ token_ = Token::IDENTIFIER;
+ state_ = UNMATCHABLE;
+ }
+
+ private:
+ enum State {
+ UNMATCHABLE,
+ INITIAL,
+ KEYWORD_PREFIX,
+ KEYWORD_MATCHED,
+ C,
+ CA,
+ CO,
+ CON,
+ D,
+ DE,
+ F,
+ I,
+ IN,
+ N,
+ T,
+ TH,
+ TR,
+ V,
+ W
+ };
+
+ struct FirstState {
+ const char* keyword;
+ State state;
+ Token::Value token;
+ };
+
+ // Range of possible first characters of a keyword.
+ static const unsigned int kFirstCharRangeMin = 'b';
+ static const unsigned int kFirstCharRangeMax = 'w';
+ static const unsigned int kFirstCharRangeLength =
+ kFirstCharRangeMax - kFirstCharRangeMin + 1;
+ // State map for first keyword character range.
+ static FirstState first_states_[kFirstCharRangeLength];
+
+ // Current state.
+ State state_;
+ // Token for currently added characters.
+ Token::Value token_;
+
+ // Matching a specific keyword string (there is only one possible valid
+ // keyword with the current prefix).
+ const char* keyword_;
+ int counter_;
+ Token::Value keyword_token_;
+
+ // If input equals keyword's character at position, continue matching keyword
+ // from that position.
+ inline bool MatchKeywordStart(uc32 input,
+ const char* keyword,
+ int position,
+ Token::Value token_if_match) {
+ if (input == keyword[position]) {
+ state_ = KEYWORD_PREFIX;
+ this->keyword_ = keyword;
+ this->counter_ = position + 1;
+ this->keyword_token_ = token_if_match;
+ return true;
+ }
+ return false;
+ }
+
+ // If input equals match character, transition to new state and return true.
+ inline bool MatchState(uc32 input, char match, State new_state) {
+ if (input == match) {
+ state_ = new_state;
+ return true;
+ }
+ return false;
+ }
+
+ inline bool MatchKeyword(uc32 input,
+ char match,
+ State new_state,
+ Token::Value keyword_token) {
+ if (input == match) { // Matched "do".
+ state_ = new_state;
+ token_ = keyword_token;
+ return true;
+ }
+ return false;
+ }
+
+ void Step(uc32 input);
+};
+
+
class Scanner {
public:
--- /dev/null
+// Copyright 2006-2009 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <stdlib.h>
+
+#include "v8.h"
+
+#include "token.h"
+#include "scanner.h"
+
+#include "cctest.h"
+
+namespace i = ::v8::internal;
+
+TEST(KeywordMatcher) {
+ struct KeywordToken {
+ const char* keyword;
+ i::Token::Value token;
+ };
+
+ static const KeywordToken keywords[] = {
+#define KEYWORD(t, s, d) { s, i::Token::t },
+#define IGNORE(t, s, d) /* */
+ TOKEN_LIST(IGNORE, KEYWORD, IGNORE)
+#undef KEYWORD
+ { NULL, i::Token::IDENTIFIER }
+ };
+
+ static const char* future_keywords[] = {
+#define FUTURE(t, s, d) s,
+ TOKEN_LIST(IGNORE, IGNORE, FUTURE)
+#undef FUTURE
+#undef IGNORE
+ NULL
+ };
+
+ KeywordToken key_token;
+ for (int i = 0; (key_token = keywords[i]).keyword != NULL; i++) {
+ i::KeywordMatcher matcher;
+ const char* keyword = key_token.keyword;
+ int length = strlen(keyword);
+ for (int j = 0; j < length; j++) {
+ if (key_token.token == i::Token::INSTANCEOF && j == 2) {
+ // "in" is a prefix of "instanceof". It's the only keyword
+ // that is a prefix of another.
+ CHECK_EQ(i::Token::IN, matcher.token());
+ } else {
+ CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
+ }
+ matcher.AddChar(keyword[j]);
+ }
+ CHECK_EQ(key_token.token, matcher.token());
+ // Adding more characters will make keyword matching fail.
+ matcher.AddChar('z');
+ CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
+ // Adding a keyword later will not make it match again.
+ matcher.AddChar('i');
+ matcher.AddChar('f');
+ CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
+ }
+
+ // Future keywords are not recognized.
+ const char* future_keyword;
+ for (int i = 0; (future_keyword = future_keywords[i]) != NULL; i++) {
+ i::KeywordMatcher matcher;
+ int length = strlen(future_keyword);
+ for (int j = 0; j < length; j++) {
+ matcher.AddChar(future_keyword[j]);
+ }
+ CHECK_EQ(i::Token::IDENTIFIER, matcher.token());
+ }
+
+ // Zero isn't ignored at first.
+ i::KeywordMatcher bad_start;
+ bad_start.AddChar(0);
+ CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
+ bad_start.AddChar('i');
+ bad_start.AddChar('f');
+ CHECK_EQ(i::Token::IDENTIFIER, bad_start.token());
+
+ // Zero isn't ignored at end.
+ i::KeywordMatcher bad_end;
+ bad_end.AddChar('i');
+ bad_end.AddChar('f');
+ CHECK_EQ(i::Token::IF, bad_end.token());
+ bad_end.AddChar(0);
+ CHECK_EQ(i::Token::IDENTIFIER, bad_end.token());
+
+ // Case isn't ignored.
+ i::KeywordMatcher bad_case;
+ bad_case.AddChar('i');
+ bad_case.AddChar('F');
+ CHECK_EQ(i::Token::IDENTIFIER, bad_case.token());
+
+ // If we mark it as failure, continuing won't help.
+ i::KeywordMatcher full_stop;
+ full_stop.AddChar('i');
+ CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
+ full_stop.Fail();
+ CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
+ full_stop.AddChar('f');
+ CHECK_EQ(i::Token::IDENTIFIER, full_stop.token());
+}
+