Move static scanner fields to scanner-base.h

author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 16 Nov 2010 08:01:45 +0000 (08:01 +0000)

committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Tue, 16 Nov 2010 08:01:45 +0000 (08:01 +0000)
author lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 16 Nov 2010 08:01:45 +0000 (08:01 +0000)
committer lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 16 Nov 2010 08:01:45 +0000 (08:01 +0000)
diff --git a/src/conversions.cc b/src/conversions.cc

index 4cc6744..a1ec089 100644 (file)
--- a/src/conversions.cc
+++ b/src/conversions.cc
@@ -33,7 +33,7 @@
  #include "conversions-inl.h"
  #include "dtoa.h"
  #include "factory.h"
-#include "scanner.h"
+#include "scanner-base.h"
  #include "strtod.h"
  
  namespace v8 {
@@ -121,7 +121,7 @@ static const double JUNK_STRING_VALUE = OS::nan_value();
  template <class Iterator, class EndMark>
  static inline bool AdvanceToNonspace(Iterator* current, EndMark end) {
    while (*current != end) {
-    if (!Scanner::kIsWhiteSpace.get(**current)) return true;
+    if (!ScannerConstants::kIsWhiteSpace.get(**current)) return true;
      ++*current;
    }
    return false;
diff --git a/src/dateparser.h b/src/dateparser.h

index cae9b08..28053f4 100644 (file)
--- a/src/dateparser.h
+++ b/src/dateparser.h
@@ -28,7 +28,8 @@
  #ifndef V8_DATEPARSER_H_
  #define V8_DATEPARSER_H_
  
-#include "scanner.h"
+#include "char-predicates-inl.h"
+#include "scanner-base.h"
  
  namespace v8 {
  namespace internal {
@@ -99,10 +100,20 @@ class DateParser : public AllStatic {
      }
  
      // The skip methods return whether they actually skipped something.
-    bool Skip(uint32_t c) { return ch_ == c ?  (Next(), true) : false; }
+    bool Skip(uint32_t c) {
+      if (ch_ == c) {
+        Next();
+        return true;
+      }
+      return false;
+    }
  
      bool SkipWhiteSpace() {
-      return Scanner::kIsWhiteSpace.get(ch_) ? (Next(), true) : false;
+      if (ScannerConstants::kIsWhiteSpace.get(ch_)) {
+        Next();
+        return true;
+      }
+      return false;
      }
  
      bool SkipParentheses() {
diff --git a/src/heap.cc b/src/heap.cc

index 75ccc44..26859d7 100644 (file)
--- a/src/heap.cc
+++ b/src/heap.cc
@@ -38,7 +38,7 @@
  #include "mark-compact.h"
  #include "natives.h"
  #include "objects-visiting.h"
-#include "scanner.h"
+#include "scanner-base.h"
  #include "scopeinfo.h"
  #include "snapshot.h"
  #include "v8threads.h"
@@ -3249,7 +3249,8 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
    const uc32 kMaxSupportedChar = 0xFFFF;
    // Count the number of characters in the UTF-8 string and check if
    // it is an ASCII string.
-  Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder());
+  Access<ScannerConstants::Utf8Decoder>
+      decoder(ScannerConstants::utf8_decoder());
    decoder->Reset(string.start(), string.length());
    int chars = 0;
    bool is_ascii = true;
diff --git a/src/objects.cc b/src/objects.cc

index c1cb922..8efb0da 100644 (file)
--- a/src/objects.cc
+++ b/src/objects.cc
@@ -35,7 +35,7 @@
  #include "objects-inl.h"
  #include "objects-visiting.h"
  #include "macro-assembler.h"
-#include "scanner.h"
+#include "scanner-base.h"
  #include "scopeinfo.h"
  #include "string-stream.h"
  #include "utils.h"
@@ -1208,7 +1208,8 @@ MaybeObject* JSObject::AddFastProperty(String* name,
    // Normalize the object if the name is an actual string (not the
    // hidden symbols) and is not a real identifier.
    StringInputBuffer buffer(name);
-  if (!Scanner::IsIdentifier(&buffer) && name != Heap::hidden_symbol()) {
+  if (!ScannerConstants::IsIdentifier(&buffer)
+      && name != Heap::hidden_symbol()) {
      Object* obj;
      { MaybeObject* maybe_obj =
            NormalizeProperties(CLEAR_INOBJECT_PROPERTIES, 0);
@@ -5088,7 +5089,8 @@ bool String::MarkAsUndetectable() {
  
  bool String::IsEqualTo(Vector<const char> str) {
    int slen = length();
-  Access<Scanner::Utf8Decoder> decoder(Scanner::utf8_decoder());
+  Access<ScannerConstants::Utf8Decoder>
+      decoder(ScannerConstants::utf8_decoder());
    decoder->Reset(str.start(), str.length());
    int i;
    for (i = 0; i < slen && decoder->has_more(); i++) {
diff --git a/src/parser.cc b/src/parser.cc

index a0f3b71..e6a57e3 100644 (file)
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -36,6 +36,7 @@
  #include "messages.h"
  #include "parser.h"
  #include "platform.h"
+#include "prescanner.h"
  #include "preparser.h"
  #include "runtime.h"
  #include "scopeinfo.h"
@@ -4667,9 +4668,21 @@ ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
                                      unibrow::CharacterStream* stream,
                                      v8::Extension* extension) {
    Handle<Script> no_script;
-  preparser::PreParser<Scanner, CompleteParserRecorder> parser;
-  Scanner scanner;
-  scanner.Initialize(source, stream, JAVASCRIPT);
+  int length = 0;
+  SafeStringInputBuffer safe_stream;
+  if (!source.is_null()) {
+    length = source->length();
+    safe_stream.Reset(source.location());
+    stream = &safe_stream;
+  } else {
+    length = stream->Length();
+  }
+  typedef preparser::Scanner<CharacterStreamUTF16Buffer, UTF8Buffer> PreScanner;
+  preparser::PreParser<PreScanner, CompleteParserRecorder> parser;
+  CharacterStreamUTF16Buffer buffer;
+  buffer.Initialize(source, stream, 0, length);
+  PreScanner scanner;
+  scanner.Initialize(&buffer);
    bool allow_lazy = FLAG_lazy && (extension == NULL);
    CompleteParserRecorder recorder;
    if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
diff --git a/src/scanner-base.cc b/src/scanner-base.cc

index 0567bb9..6cde517 100644 (file)
--- a/src/scanner-base.cc
+++ b/src/scanner-base.cc
@@ -34,6 +34,33 @@ namespace v8 {
  namespace internal {
  
  // ----------------------------------------------------------------------------
+// Character predicates
+
+unibrow::Predicate<IdentifierStart, 128> ScannerConstants::kIsIdentifierStart;
+unibrow::Predicate<IdentifierPart, 128> ScannerConstants::kIsIdentifierPart;
+unibrow::Predicate<unibrow::WhiteSpace, 128> ScannerConstants::kIsWhiteSpace;
+unibrow::Predicate<unibrow::LineTerminator, 128>
+  ScannerConstants::kIsLineTerminator;
+
+StaticResource<ScannerConstants::Utf8Decoder> ScannerConstants::utf8_decoder_;
+
+// Compound predicates.
+
+bool ScannerConstants::IsIdentifier(unibrow::CharacterStream* buffer) {
+  // Checks whether the buffer contains an identifier (no escape).
+  if (!buffer->has_more()) return false;
+  if (!kIsIdentifierStart.get(buffer->GetNext())) {
+    return false;
+  }
+  while (buffer->has_more()) {
+    if (!kIsIdentifierPart.get(buffer->GetNext())) {
+      return false;
+    }
+  }
+  return true;
+}
+
+// ----------------------------------------------------------------------------
  // Keyword Matcher
  
  KeywordMatcher::FirstState KeywordMatcher::first_states_[] = {
diff --git a/src/scanner-base.h b/src/scanner-base.h

index 500870b..72797a8 100644 (file)
--- a/src/scanner-base.h
+++ b/src/scanner-base.h
@@ -30,12 +30,37 @@
  #ifndef V8_SCANNER_BASE_H_
  #define V8_SCANNER_BASE_H_
  
+#include "globals.h"
+#include "checks.h"
+#include "allocation.h"
  #include "token.h"
-#include "unicode.h"
+#include "unicode-inl.h"
+#include "char-predicates.h"
+#include "utils.h"
  
  namespace v8 {
  namespace internal {
  
+class ScannerConstants : AllStatic {
+ public:
+  typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
+
+  static StaticResource<Utf8Decoder>* utf8_decoder() {
+    return &utf8_decoder_;
+  }
+
+  static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
+  static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
+  static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
+  static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
+
+  static bool IsIdentifier(unibrow::CharacterStream* buffer);
+
+ private:
+  static StaticResource<Utf8Decoder> utf8_decoder_;
+};
+
+
  class KeywordMatcher {
  //  Incrementally recognize keywords.
  //
@@ -45,7 +70,8 @@ class KeywordMatcher {
  //      return switch this throw true try typeof var void while with
  //
  //  *: Actually "future reserved keywords". These are the only ones we
-//     recognized, the remaining are allowed as identifiers.
+//     recognize, the remaining are allowed as identifiers.
+//     In ES5 strict mode, we should disallow all reserved keywords.
   public:
    KeywordMatcher()
        : state_(INITIAL),
@@ -156,10 +182,6 @@ class KeywordMatcher {
  };
  
  
-
-
-
-
  } }  // namespace v8::internal
  
  #endif  // V8_SCANNER_BASE_H_
diff --git a/src/scanner.cc b/src/scanner.cc

index a24952a..f7e7db3 100755 (executable)
--- a/src/scanner.cc
+++ b/src/scanner.cc
@@ -30,24 +30,12 @@
  #include "ast.h"
  #include "handles.h"
  #include "scanner.h"
+#include "unicode-inl.h"
  
  namespace v8 {
  namespace internal {
  
  // ----------------------------------------------------------------------------
-// Character predicates
-
-
-unibrow::Predicate<IdentifierStart, 128> Scanner::kIsIdentifierStart;
-unibrow::Predicate<IdentifierPart, 128> Scanner::kIsIdentifierPart;
-unibrow::Predicate<unibrow::LineTerminator, 128> Scanner::kIsLineTerminator;
-unibrow::Predicate<unibrow::WhiteSpace, 128> Scanner::kIsWhiteSpace;
-
-
-StaticResource<Scanner::Utf8Decoder> Scanner::utf8_decoder_;
-
-
-// ----------------------------------------------------------------------------
  // UTF8Buffer
  
  UTF8Buffer::UTF8Buffer() : buffer_(kInitialCapacity) { }
@@ -358,9 +346,9 @@ bool Scanner::SkipJavaScriptWhiteSpace() {
    while (true) {
      // We treat byte-order marks (BOMs) as whitespace for better
      // compatibility with Spidermonkey and other JavaScript engines.
-    while (kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
+    while (ScannerConstants::kIsWhiteSpace.get(c0_) || IsByteOrderMark(c0_)) {
        // IsWhiteSpace() includes line terminators!
-      if (kIsLineTerminator.get(c0_)) {
+      if (ScannerConstants::kIsLineTerminator.get(c0_)) {
          // Ignore line terminators, but remember them. This is necessary
          // for automatic semicolon insertion.
          has_line_terminator_before_next_ = true;
@@ -400,7 +388,7 @@ Token::Value Scanner::SkipSingleLineComment() {
    // separately by the lexical grammar and becomes part of the
    // stream of input elements for the syntactic grammar (see
    // ECMA-262, section 7.4, page 12).
-  while (c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
+  while (c0_ >= 0 && !ScannerConstants::kIsLineTerminator.get(c0_)) {
      Advance();
    }
  
@@ -631,7 +619,7 @@ Token::Value Scanner::ScanJsonIdentifier(const char* text,
      Advance();
      text++;
    }
-  if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
+  if (ScannerConstants::kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
    literal.Complete();
    return token;
  }
@@ -854,7 +842,7 @@ void Scanner::ScanJavaScript() {
          break;
  
        default:
-        if (kIsIdentifierStart.get(c0_)) {
+        if (ScannerConstants::kIsIdentifierStart.get(c0_)) {
            token = ScanIdentifier();
          } else if (IsDecimalDigit(c0_)) {
            token = ScanNumber(false);
@@ -937,7 +925,7 @@ void Scanner::ScanEscape() {
    Advance();
  
    // Skip escaped newlines.
-  if (kIsLineTerminator.get(c)) {
+  if (ScannerConstants::kIsLineTerminator.get(c)) {
      // Allow CR+LF newlines in multiline string literals.
      if (IsCarriageReturn(c) && IsLineFeed(c0_)) Advance();
      // Allow LF+CR newlines in multiline string literals.
@@ -979,7 +967,8 @@ Token::Value Scanner::ScanString() {
    Advance();  // consume quote
  
    LiteralScope literal(this);
-  while (c0_ != quote && c0_ >= 0 && !kIsLineTerminator.get(c0_)) {
+  while (c0_ != quote && c0_ >= 0
+         && !ScannerConstants::kIsLineTerminator.get(c0_)) {
      uc32 c = c0_;
      Advance();
      if (c == '\\') {
@@ -1092,7 +1081,7 @@ Token::Value Scanner::ScanNumber(bool seen_period) {
    // not be an identifier start or a decimal digit; see ECMA-262
    // section 7.8.3, page 17 (note that we read only one decimal digit
    // if the value is 0).
-  if (IsDecimalDigit(c0_) || kIsIdentifierStart.get(c0_))
+  if (IsDecimalDigit(c0_) || ScannerConstants::kIsIdentifierStart.get(c0_))
      return Token::ILLEGAL;
  
    literal.Complete();
@@ -1114,7 +1103,7 @@ uc32 Scanner::ScanIdentifierUnicodeEscape() {
  
  
  Token::Value Scanner::ScanIdentifier() {
-  ASSERT(kIsIdentifierStart.get(c0_));
+  ASSERT(ScannerConstants::kIsIdentifierStart.get(c0_));
  
    LiteralScope literal(this);
    KeywordMatcher keyword_match;
@@ -1123,7 +1112,7 @@ Token::Value Scanner::ScanIdentifier() {
    if (c0_ == '\\') {
      uc32 c = ScanIdentifierUnicodeEscape();
      // Only allow legal identifier start characters.
-    if (!kIsIdentifierStart.get(c)) return Token::ILLEGAL;
+    if (!ScannerConstants::kIsIdentifierStart.get(c)) return Token::ILLEGAL;
      AddChar(c);
      keyword_match.Fail();
    } else {
@@ -1133,11 +1122,11 @@ Token::Value Scanner::ScanIdentifier() {
    }
  
    // Scan the rest of the identifier characters.
-  while (kIsIdentifierPart.get(c0_)) {
+  while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
      if (c0_ == '\\') {
        uc32 c = ScanIdentifierUnicodeEscape();
        // Only allow legal identifier part characters.
-      if (!kIsIdentifierPart.get(c)) return Token::ILLEGAL;
+      if (!ScannerConstants::kIsIdentifierPart.get(c)) return Token::ILLEGAL;
        AddChar(c);
        keyword_match.Fail();
      } else {
@@ -1153,17 +1142,6 @@ Token::Value Scanner::ScanIdentifier() {
  
  
  
-bool Scanner::IsIdentifier(unibrow::CharacterStream* buffer) {
-  // Checks whether the buffer contains an identifier (no escape).
-  if (!buffer->has_more()) return false;
-  if (!kIsIdentifierStart.get(buffer->GetNext())) return false;
-  while (buffer->has_more()) {
-    if (!kIsIdentifierPart.get(buffer->GetNext())) return false;
-  }
-  return true;
-}
-
-
  bool Scanner::ScanRegExpPattern(bool seen_equal) {
    // Scan: ('/' | '/=') RegularExpressionBody '/' RegularExpressionFlags
    bool in_character_class = false;
@@ -1181,10 +1159,10 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
      AddChar('=');
  
    while (c0_ != '/' || in_character_class) {
-    if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
+    if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
      if (c0_ == '\\') {  // escaped character
        AddCharAdvance();
-      if (kIsLineTerminator.get(c0_) || c0_ < 0) return false;
+      if (ScannerConstants::kIsLineTerminator.get(c0_) || c0_ < 0) return false;
        AddCharAdvance();
      } else {  // unescaped character
        if (c0_ == '[') in_character_class = true;
@@ -1202,7 +1180,7 @@ bool Scanner::ScanRegExpPattern(bool seen_equal) {
  bool Scanner::ScanRegExpFlags() {
    // Scan regular expression flags.
    LiteralScope literal(this);
-  while (kIsIdentifierPart.get(c0_)) {
+  while (ScannerConstants::kIsIdentifierPart.get(c0_)) {
      if (c0_ == '\\') {
        uc32 c = ScanIdentifierUnicodeEscape();
        if (c != static_cast<uc32>(unibrow::Utf8::kBadChar)) {
diff --git a/src/scanner.h b/src/scanner.h

index 1f49fd0..eec38cb 100644 (file)
--- a/src/scanner.h
+++ b/src/scanner.h
@@ -251,17 +251,10 @@ class Scanner {
  
    bool stack_overflow() { return stack_overflow_; }
  
-  static StaticResource<Utf8Decoder>* utf8_decoder() { return &utf8_decoder_; }
-
    // Tells whether the buffer contains an identifier (no escapes).
    // Used for checking if a property name is an identifier.
    static bool IsIdentifier(unibrow::CharacterStream* buffer);
  
-  static unibrow::Predicate<IdentifierStart, 128> kIsIdentifierStart;
-  static unibrow::Predicate<IdentifierPart, 128> kIsIdentifierPart;
-  static unibrow::Predicate<unibrow::LineTerminator, 128> kIsLineTerminator;
-  static unibrow::Predicate<unibrow::WhiteSpace, 128> kIsWhiteSpace;
-
    static const int kCharacterLookaheadBufferSize = 1;
    static const int kNoEndPosition = 1;
  
@@ -391,7 +384,6 @@ class Scanner {
    UTF8Buffer literal_buffer_;
  
    bool stack_overflow_;
-  static StaticResource<Utf8Decoder> utf8_decoder_;
  
    // One Unicode character look-ahead; c0_ < 0 at the end of the input.
    uc32 c0_;
diff --git a/src/v8.cc b/src/v8.cc

index 0623400..43f92b3 100644 (file)
--- a/src/v8.cc
+++ b/src/v8.cc
@@ -44,6 +44,7 @@ bool V8::has_been_setup_ = false;
  bool V8::has_been_disposed_ = false;
  bool V8::has_fatal_error_ = false;
  
+
  bool V8::Initialize(Deserializer* des) {
    bool create_heap_objects = des == NULL;
    if (has_been_disposed_ || has_fatal_error_) return false;
diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc

index f7ebeae..b4f6914 100644 (file)
--- a/test/cctest/test-api.cc
+++ b/test/cctest/test-api.cc
@@ -38,6 +38,7 @@
  #include "utils.h"
  #include "cctest.h"
  #include "parser.h"
+#include "unicode-inl.h"
  
  static const bool kLogThreading = true;
author	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 16 Nov 2010 08:01:45 +0000 (08:01 +0000)
committer	lrn@chromium.org <lrn@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Tue, 16 Nov 2010 08:01:45 +0000 (08:01 +0000)
src/conversions.cc		patch \| blob \| history
src/dateparser.h		patch \| blob \| history
src/heap.cc		patch \| blob \| history
src/objects.cc		patch \| blob \| history
src/parser.cc		patch \| blob \| history
src/scanner-base.cc		patch \| blob \| history
src/scanner-base.h		patch \| blob \| history
src/scanner.cc		patch \| blob \| history
src/scanner.h		patch \| blob \| history
src/v8.cc		patch \| blob \| history
test/cctest/test-api.cc		patch \| blob \| history