ScriptData* ScriptData::PreCompile(const char* input, int length) {
- unibrow::Utf8InputBuffer<> buf(input, length);
- return i::ParserApi::PreParse(i::Handle<i::String>(), &buf, NULL);
+ i::Utf8ToUC16CharacterStream stream(
+ reinterpret_cast<const unsigned char*>(input), length);
+ return i::ParserApi::PreParse(&stream, NULL);
}
ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
i::Handle<i::String> str = Utils::OpenHandle(*source);
- return i::ParserApi::PreParse(str, NULL, NULL);
+ if (str->IsExternalTwoByteString()) {
+ i::ExternalTwoByteStringUC16CharacterStream stream(
+ i::Handle<i::ExternalTwoByteString>::cast(str), 0, str->length());
+ return i::ParserApi::PreParse(&stream, NULL);
+ } else {
+ i::GenericStringUC16CharacterStream stream(str, 0, str->length());
+ return i::ParserApi::PreParse(&stream, NULL);
+ }
}
#define CHECK_GT(a, b) CHECK((a) > (b))
#define CHECK_GE(a, b) CHECK((a) >= (b))
+#define CHECK_LT(a, b) CHECK((a) < (b))
+#define CHECK_LE(a, b) CHECK((a) <= (b))
// This is inspired by the static assertion facility in boost. This
ScriptDataImpl* pre_data = input_pre_data;
if (pre_data == NULL
&& source_length >= FLAG_min_preparse_length) {
- pre_data = ParserApi::PartialPreParse(source, NULL, extension);
+ if (source->IsExternalTwoByteString()) {
+ ExternalTwoByteStringUC16CharacterStream stream(
+ Handle<ExternalTwoByteString>::cast(source), 0, source->length());
+ pre_data = ParserApi::PartialPreParse(&stream, extension);
+ } else {
+ GenericStringUC16CharacterStream stream(source, 0, source->length());
+ pre_data = ParserApi::PartialPreParse(&stream, extension);
+ }
}
// Create a script object describing the script to be compiled.
// Initialize parser state.
source->TryFlatten();
- scanner_.Initialize(source);
+ if (source->IsExternalTwoByteString()) {
+ // Notice that the stream is destroyed at the end of the branch block.
+ // The last line of the blocks can't be moved outside, even though they're
+ // identical calls.
+ ExternalTwoByteStringUC16CharacterStream stream(
+ Handle<ExternalTwoByteString>::cast(source), 0, source->length());
+ scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals);
+ return DoParseProgram(source, in_global_context, &zone_scope);
+ } else {
+ GenericStringUC16CharacterStream stream(source, 0, source->length());
+ scanner_.Initialize(&stream, JavaScriptScanner::kAllLiterals);
+ return DoParseProgram(source, in_global_context, &zone_scope);
+ }
+}
+
+
+FunctionLiteral* Parser::DoParseProgram(Handle<String> source,
+ bool in_global_context,
+ ZoneScope* zone_scope) {
ASSERT(target_stack_ == NULL);
if (pre_data_ != NULL) pre_data_->Initialize();
// If there was a syntax error we have to get rid of the AST
// and it is not safe to do so before the scope has been deleted.
- if (result == NULL) zone_scope.DeleteOnExit();
+ if (result == NULL) zone_scope->DeleteOnExit();
return result;
}
-
FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info) {
CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
HistogramTimerScope timer(&Counters::parse_lazy);
Handle<String> source(String::cast(script_->source()));
Counters::total_parse_size.Increment(source->length());
+ // Initialize parser state.
+ source->TryFlatten();
+ if (source->IsExternalTwoByteString()) {
+ ExternalTwoByteStringUC16CharacterStream stream(
+ Handle<ExternalTwoByteString>::cast(source),
+ info->start_position(),
+ info->end_position());
+ FunctionLiteral* result = ParseLazy(info, &stream, &zone_scope);
+ return result;
+ } else {
+ GenericStringUC16CharacterStream stream(source,
+ info->start_position(),
+ info->end_position());
+ FunctionLiteral* result = ParseLazy(info, &stream, &zone_scope);
+ return result;
+ }
+}
+
+
+FunctionLiteral* Parser::ParseLazy(Handle<SharedFunctionInfo> info,
+ UC16CharacterStream* source,
+ ZoneScope* zone_scope) {
+ scanner_.Initialize(source, JavaScriptScanner::kAllLiterals);
+ ASSERT(target_stack_ == NULL);
+
Handle<String> name(String::cast(info->name()));
fni_ = new FuncNameInferrer();
fni_->PushEnclosingName(name);
- // Initialize parser state.
- source->TryFlatten();
- scanner_.Initialize(source, info->start_position(), info->end_position());
- ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY;
// Place holder for the result.
// not safe to do before scope has been deleted.
if (result == NULL) {
Top::StackOverflow();
- zone_scope.DeleteOnExit();
+ zone_scope->DeleteOnExit();
} else {
Handle<String> inferred_name(info->inferred_name());
result->set_inferred_name(inferred_name);
if (pre_data() != NULL) {
symbol_id = pre_data()->GetSymbolIdentifier();
}
- return LookupSymbol(symbol_id, scanner_.literal());
+ return LookupSymbol(symbol_id, scanner().literal());
}
void Parser::ReportMessage(const char* type, Vector<const char*> args) {
- Scanner::Location source_location = scanner_.location();
+ Scanner::Location source_location = scanner().location();
ReportMessageAt(source_location, type, args);
}
Expect(Token::CONTINUE, CHECK_OK);
Handle<String> label = Handle<String>::null();
Token::Value tok = peek();
- if (!scanner_.has_line_terminator_before_next() &&
+ if (!scanner().has_line_terminator_before_next() &&
tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) {
label = ParseIdentifier(CHECK_OK);
}
Expect(Token::BREAK, CHECK_OK);
Handle<String> label;
Token::Value tok = peek();
- if (!scanner_.has_line_terminator_before_next() &&
+ if (!scanner().has_line_terminator_before_next() &&
tok != Token::SEMICOLON && tok != Token::RBRACE && tok != Token::EOS) {
label = ParseIdentifier(CHECK_OK);
}
}
Token::Value tok = peek();
- if (scanner_.has_line_terminator_before_next() ||
+ if (scanner().has_line_terminator_before_next() ||
tok == Token::SEMICOLON ||
tok == Token::RBRACE ||
tok == Token::EOS) {
Expect(Token::THROW, CHECK_OK);
int pos = scanner().location().beg_pos;
- if (scanner_.has_line_terminator_before_next()) {
+ if (scanner().has_line_terminator_before_next()) {
ReportMessage("newline_after_throw", Vector<const char*>::empty());
*ok = false;
return NULL;
// LeftHandSideExpression ('++' | '--')?
Expression* expression = ParseLeftHandSideExpression(CHECK_OK);
- if (!scanner_.has_line_terminator_before_next() && Token::IsCountOp(peek())) {
+ if (!scanner().has_line_terminator_before_next() &&
+ Token::IsCountOp(peek())) {
// Signal a reference error if the expression is an invalid
// left-hand side expression. We could report this as a syntax
// error here but for compatibility with JSC we choose to report the
case Token::NUMBER: {
Consume(Token::NUMBER);
double value =
- StringToDouble(scanner_.literal(), ALLOW_HEX | ALLOW_OCTALS);
+ StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS);
result = NewNumberLiteral(value);
break;
}
case Token::NUMBER: {
Consume(Token::NUMBER);
double value =
- StringToDouble(scanner_.literal(), ALLOW_HEX | ALLOW_OCTALS);
+ StringToDouble(scanner().literal(), ALLOW_HEX | ALLOW_OCTALS);
key = NewNumberLiteral(value);
break;
}
Expression* Parser::ParseRegExpLiteral(bool seen_equal, bool* ok) {
- if (!scanner_.ScanRegExpPattern(seen_equal)) {
+ if (!scanner().ScanRegExpPattern(seen_equal)) {
Next();
ReportMessage("unterminated_regexp", Vector<const char*>::empty());
*ok = false;
int literal_index = temp_scope_->NextMaterializedLiteralIndex();
Handle<String> js_pattern =
- Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED);
- scanner_.ScanRegExpFlags();
+ Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
+ scanner().ScanRegExpFlags();
Handle<String> js_flags =
- Factory::NewStringFromUtf8(scanner_.next_literal(), TENURED);
+ Factory::NewStringFromUtf8(scanner().next_literal(), TENURED);
Next();
return new RegExpLiteral(js_pattern, js_flags, literal_index);
// FormalParameterList ::
// '(' (Identifier)*[','] ')'
Expect(Token::LPAREN, CHECK_OK);
- int start_pos = scanner_.location().beg_pos;
+ int start_pos = scanner().location().beg_pos;
bool done = (peek() == Token::RPAREN);
while (!done) {
Handle<String> param_name = ParseIdentifier(CHECK_OK);
bool is_lazily_compiled =
mode() == PARSE_LAZILY && top_scope_->HasTrivialOuterContext();
- int function_block_pos = scanner_.location().beg_pos;
+ int function_block_pos = scanner().location().beg_pos;
int materialized_literal_count;
int expected_property_count;
int end_pos;
ReportInvalidPreparseData(name, CHECK_OK);
}
Counters::total_preparse_skipped.Increment(end_pos - function_block_pos);
- scanner_.SeekForward(end_pos);
+ // Seek to position just before terminal '}'.
+ scanner().SeekForward(end_pos - 1);
materialized_literal_count = entry.literal_count();
expected_property_count = entry.property_count();
only_simple_this_property_assignments = false;
this_property_assignments = temp_scope.this_property_assignments();
Expect(Token::RBRACE, CHECK_OK);
- end_pos = scanner_.location().end_pos;
+ end_pos = scanner().location().end_pos;
}
FunctionLiteral* function_literal =
Next();
return;
}
- if (scanner_.has_line_terminator_before_next() ||
+ if (scanner().has_line_terminator_before_next() ||
tok == Token::RBRACE ||
tok == Token::EOS) {
return;
bool* ok) {
Expect(Token::IDENTIFIER, ok);
if (!*ok) return Handle<String>();
- if (scanner_.literal_length() == 3) {
- const char* token = scanner_.literal_string();
+ if (scanner().literal_length() == 3) {
+ const char* token = scanner().literal_string();
*is_get = strcmp(token, "get") == 0;
*is_set = !*is_get && strcmp(token, "set") == 0;
}
// ----------------------------------------------------------------------------
// JSON
-Handle<Object> JsonParser::ParseJson(Handle<String> source) {
- source->TryFlatten();
+Handle<Object> JsonParser::ParseJson(Handle<String> script,
+ UC16CharacterStream* source) {
scanner_.Initialize(source);
stack_overflow_ = false;
Handle<Object> result = ParseJsonValue();
}
Scanner::Location source_location = scanner_.location();
- MessageLocation location(Factory::NewScript(source),
+ MessageLocation location(Factory::NewScript(script),
source_location.beg_pos,
source_location.end_pos);
int argc = (name_opt == NULL) ? 0 : 1;
// Create a Scanner for the preparser to use as input, and preparse the source.
-static ScriptDataImpl* DoPreParse(Handle<String> source,
- unibrow::CharacterStream* stream,
+static ScriptDataImpl* DoPreParse(UC16CharacterStream* source,
bool allow_lazy,
ParserRecorder* recorder,
int literal_flags) {
V8JavaScriptScanner scanner;
- scanner.Initialize(source, stream, literal_flags);
+ scanner.Initialize(source, literal_flags);
intptr_t stack_limit = StackGuard::real_climit();
if (!preparser::PreParser::PreParseProgram(&scanner,
recorder,
// Preparse, but only collect data that is immediately useful,
// even if the preparser data is only used once.
-ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
- unibrow::CharacterStream* stream,
+ScriptDataImpl* ParserApi::PartialPreParse(UC16CharacterStream* source,
v8::Extension* extension) {
bool allow_lazy = FLAG_lazy && (extension == NULL);
if (!allow_lazy) {
return NULL;
}
PartialParserRecorder recorder;
-
- return DoPreParse(source, stream, allow_lazy, &recorder,
+ return DoPreParse(source, allow_lazy, &recorder,
JavaScriptScanner::kNoLiterals);
}
-ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
- unibrow::CharacterStream* stream,
+ScriptDataImpl* ParserApi::PreParse(UC16CharacterStream* source,
v8::Extension* extension) {
Handle<Script> no_script;
bool allow_lazy = FLAG_lazy && (extension == NULL);
CompleteParserRecorder recorder;
int kPreParseLiteralsFlags =
JavaScriptScanner::kLiteralString | JavaScriptScanner::kLiteralIdentifier;
- return DoPreParse(source, stream, allow_lazy,
- &recorder, kPreParseLiteralsFlags);
+ return DoPreParse(source, allow_lazy, &recorder, kPreParseLiteralsFlags);
}
static bool Parse(CompilationInfo* info);
// Generic preparser generating full preparse data.
- static ScriptDataImpl* PreParse(Handle<String> source,
- unibrow::CharacterStream* stream,
+ static ScriptDataImpl* PreParse(UC16CharacterStream* source,
v8::Extension* extension);
// Preparser that only does preprocessing that makes sense if only used
// immediately after.
- static ScriptDataImpl* PartialPreParse(Handle<String> source,
- unibrow::CharacterStream* stream,
+ static ScriptDataImpl* PartialPreParse(UC16CharacterStream* source,
v8::Extension* extension);
};
Vector<const char*> args);
protected:
+ FunctionLiteral* ParseLazy(Handle<SharedFunctionInfo> info,
+ UC16CharacterStream* source,
+ ZoneScope* zone_scope);
enum Mode {
PARSE_LAZILY,
PARSE_EAGERLY
};
+ // Called by ParseProgram after setting up the scanner.
+ FunctionLiteral* DoParseProgram(Handle<String> source,
+ bool in_global_context,
+ ZoneScope* zone_scope);
+
// Report syntax error
void ReportUnexpectedToken(Token::Value token);
void ReportInvalidPreparseData(Handle<String> name, bool* ok);
void ReportMessage(const char* message, Vector<const char*> args);
bool inside_with() const { return with_nesting_level_ > 0; }
- Scanner& scanner() { return scanner_; }
+ V8JavaScriptScanner& scanner() { return scanner_; }
Mode mode() const { return mode_; }
ScriptDataImpl* pre_data() const { return pre_data_; }
INLINE(Token::Value peek()) {
if (stack_overflow_) return Token::ILLEGAL;
- return scanner_.peek();
+ return scanner().peek();
}
INLINE(Token::Value Next()) {
}
if (StackLimitCheck().HasOverflowed()) {
// Any further calls to Next or peek will return the illegal token.
+ // The current call must return the next token, which might already
+ // have been peek'ed.
stack_overflow_ = true;
}
- return scanner_.Next();
+ return scanner().Next();
}
INLINE(void Consume(Token::Value token));
// Parse JSON input as a single JSON value.
// Returns null handle and sets exception if parsing failed.
static Handle<Object> Parse(Handle<String> source) {
- return JsonParser().ParseJson(source);
+ if (source->IsExternalTwoByteString()) {
+ ExternalTwoByteStringUC16CharacterStream stream(
+ Handle<ExternalTwoByteString>::cast(source), 0, source->length());
+ return JsonParser().ParseJson(source, &stream);
+ } else {
+ GenericStringUC16CharacterStream stream(source, 0, source->length());
+ return JsonParser().ParseJson(source, &stream);
+ }
}
private:
~JsonParser() { }
// Parse a string containing a single JSON value.
- Handle<Object> ParseJson(Handle<String>);
+ Handle<Object> ParseJson(Handle<String> script, UC16CharacterStream* source);
// Parse a single JSON value from input (grammar production JSONValue).
// A JSON value is either a (double-quoted) string literal, a number literal,
// one of "true", "false", or "null", or an object or array literal.
namespace internal {
// UTF16Buffer based on a v8::UnicodeInputStream.
-class InputStreamUTF16Buffer : public UTF16Buffer {
+class InputStreamUTF16Buffer : public UC16CharacterStream {
public:
- explicit InputStreamUTF16Buffer(UnicodeInputStream* stream)
- : UTF16Buffer(),
- stream_(stream) { }
+ explicit InputStreamUTF16Buffer(v8::UnicodeInputStream* stream)
+ : UC16CharacterStream(),
+ stream_(stream),
+ pushback_active_(false) {
+ buffer_cursor_ = buffer_end_ = buffer_ + kPushBackSize;
+ }
virtual ~InputStreamUTF16Buffer() { }
- virtual void PushBack(uc32 ch) {
+ virtual void PushBack(uc16 ch) {
+ ASSERT(pos_ > 0);
+ if (buffer_cursor_ > buffer_) {
+ // While we can stay within the buffer, just do so.
+ *--buffer_cursor_ = ch;
+ pos_--;
+ return;
+ }
+ if (!pushback_active_) {
+ // Push back the entire buffer to the stream and let the
+ // stream handle pushbacks from now.
+ // We leave buffer_cursor_ == buffer_end_, so the next read
+ // will fill the buffer from the current position.
+ // This should happen exceedingly rarely.
+ while (buffer_end_ > buffer_) {
+ stream_->PushBack(*--buffer_end_);
+ }
+ buffer_cursor_ = buffer_end_;
+ pushback_active_ = true;
+ }
stream_->PushBack(ch);
pos_--;
}
- virtual uc32 Advance() {
- uc32 result = stream_->Next();
- if (result >= 0) pos_++;
- return result;
+ protected:
+ virtual bool ReadBlock() {
+ // Copy the top of the buffer into the pushback area.
+ pushback_active_ = false;
+ int32_t value;
+ uc16* buffer_start = buffer_ + kPushBackSize;
+ buffer_cursor_ = buffer_end_ = buffer_start;
+ while ((value = stream_->Next()) >= 0) {
+ if (value > static_cast<int32_t>(unibrow::Utf8::kMaxThreeByteChar)) {
+ value = unibrow::Utf8::kBadChar;
+ }
+ // buffer_end_ is a const pointer, but buffer_ is writable.
+ buffer_start[buffer_end_++ - buffer_start] = static_cast<uc16>(value);
+ if (buffer_end_ == buffer_ + kPushBackSize + kBufferSize) break;
+ }
+ return buffer_end_ > buffer_start;
}
- virtual void SeekForward(int pos) {
+ virtual unsigned SlowSeekForward(unsigned pos) {
// Seeking in the input is not used by preparsing.
// It's only used by the real parser based on preparser data.
UNIMPLEMENTED();
+ return 0;
}
private:
+ static const unsigned kBufferSize = 512;
+ static const unsigned kPushBackSize = 16;
v8::UnicodeInputStream* const stream_;
+ // Buffer holding first kPushBackSize characters of pushback buffer,
+ // then kBufferSize chars of read-ahead.
+ // The pushback buffer is only used if pushing back characters past
+ // the start of a block.
+ uc16 buffer_[kBufferSize + kPushBackSize];
+ bool pushback_active_;
};
class StandAloneJavaScriptScanner : public JavaScriptScanner {
public:
- void Initialize(UTF16Buffer* source) {
+ void Initialize(UC16CharacterStream* source) {
source_ = source;
literal_flags_ = kLiteralString | kLiteralIdentifier;
Init();
Expect(i::Token::RBRACE, CHECK_OK);
+ // Position right after terminal '}'.
int end_pos = scanner_->location().end_pos;
log_->LogFunction(function_block_pos, end_pos,
function_scope.materialized_literal_count(),
namespace internal {
// ----------------------------------------------------------------------------
-// UTF16Buffer
-
-UTF16Buffer::UTF16Buffer()
- : pos_(0), end_(kNoEndPosition) { }
-
-// ----------------------------------------------------------------------------
// LiteralCollector
LiteralCollector::LiteralCollector()
// ----------------------------------------------------------------------------
// Scanner
-Scanner::Scanner() : source_(NULL) {}
+Scanner::Scanner() { }
uc32 Scanner::ScanHexEscape(uc32 c, int length) {
// ----------------------------------------------------------------------------
// JavaScriptScanner
-JavaScriptScanner::JavaScriptScanner()
- : has_line_terminator_before_next_(false) {}
+JavaScriptScanner::JavaScriptScanner() : Scanner() {}
Token::Value JavaScriptScanner::Next() {
void JavaScriptScanner::SeekForward(int pos) {
- source_->SeekForward(pos - 1);
- Advance();
- // This function is only called to seek to the location
- // of the end of a function (at the "}" token). It doesn't matter
- // whether there was a line terminator in the part we skip.
- has_line_terminator_before_next_ = false;
+ // After this call, we will have the token at the given position as
+ // the "next" token. The "current" token will be invalid.
+ if (pos == next_.location.beg_pos) return;
+ int current_pos = source_pos();
+ ASSERT_EQ(next_.location.end_pos, current_pos);
+ // Positions inside the lookahead token aren't supported.
+ ASSERT(pos >= current_pos);
+ if (pos != current_pos) {
+ source_->SeekForward(pos - source_->pos());
+ Advance();
+ // This function is only called to seek to the location
+ // of the end of a function (at the "}" token). It doesn't matter
+ // whether there was a line terminator in the part we skip.
+ has_line_terminator_before_next_ = false;
+ }
Scan();
+ ASSERT_EQ(Token::RBRACE, next_.token);
}
return -1;
}
-// ----------------------------------------------------------------------------
-// UTF16Buffer - scanner input source with pushback.
-class UTF16Buffer {
+// ---------------------------------------------------------------------
+// Buffered stream of characters, using an internal UC16 buffer.
+
+class UC16CharacterStream {
public:
- UTF16Buffer();
- virtual ~UTF16Buffer() {}
+ UC16CharacterStream() : pos_(0) { }
+ virtual ~UC16CharacterStream() { }
+
+ // Returns and advances past the next UC16 character in the input
+ // stream. If there are no more characters, it returns a negative
+ // value.
+ inline int32_t Advance() {
+ if (buffer_cursor_ < buffer_end_ || ReadBlock()) {
+ pos_++;
+ return *(buffer_cursor_++);
+ }
+ // Note: currently the following increment is necessary to avoid a
+ // parser problem! The scanner treats the final kEndOfInput as
+ // a character with a position, and does math relative to that
+ // position.
+ pos_++;
- virtual void PushBack(uc32 ch) = 0;
- // Returns a value < 0 when the buffer end is reached.
- virtual uc32 Advance() = 0;
- virtual void SeekForward(int pos) = 0;
+ return kEndOfInput;
+ }
- int pos() const { return pos_; }
+ // Return the current position in the character stream.
+ // Starts at zero.
+ inline unsigned pos() const { return pos_; }
+
+ // Skips forward past the next character_count UC16 characters
+ // in the input, or until the end of input if that comes sooner.
+ // Returns the number of characters actually skipped. If less
+ // than character_count,
+ inline unsigned SeekForward(unsigned character_count) {
+ unsigned buffered_chars =
+ static_cast<unsigned>(buffer_end_ - buffer_cursor_);
+ if (character_count <= buffered_chars) {
+ buffer_cursor_ += character_count;
+ pos_ += character_count;
+ return character_count;
+ }
+ return SlowSeekForward(character_count);
+ }
- static const int kNoEndPosition = 1;
+ // Pushes back the most recently read UC16 character, i.e.,
+ // the value returned by the most recent call to Advance.
+ // Must not be used right after calling SeekForward.
+ virtual void PushBack(uc16 character) = 0;
protected:
- // Initial value of end_ before the input stream is initialized.
-
- int pos_; // Current position in the buffer.
- int end_; // Position where scanning should stop (EOF).
+ static const int32_t kEndOfInput = -1;
+
+ // Ensures that the buffer_cursor_ points to the character at
+ // position pos_ of the input, if possible. If the position
+ // is at or after the end of the input, return false. If there
+ // are more characters available, return true.
+ virtual bool ReadBlock() = 0;
+ virtual unsigned SlowSeekForward(unsigned character_count) = 0;
+
+ const uc16* buffer_cursor_;
+ const uc16* buffer_end_;
+ unsigned pos_;
};
+// ---------------------------------------------------------------------
+// Constants used by scanners.
+
class ScannerConstants : AllStatic {
public:
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
// Low-level scanning support.
void Advance() { c0_ = source_->Advance(); }
void PushBack(uc32 ch) {
- source_->PushBack(ch);
+ source_->PushBack(c0_);
c0_ = ch;
}
TokenDesc current_; // desc for current token (as returned by Next())
TokenDesc next_; // desc for next token (one token look-ahead)
- // Input stream. Must be initialized to an UTF16Buffer.
- UTF16Buffer* source_;
+ // Input stream. Must be initialized to an UC16CharacterStream.
+ UC16CharacterStream* source_;
// Buffer to hold literal values (identifiers, strings, numbers)
// using '\x00'-terminated UTF-8 encoding. Handles allocation internally.
namespace internal {
// ----------------------------------------------------------------------------
-// UTF16Buffer
-
-// CharacterStreamUTF16Buffer
-CharacterStreamUTF16Buffer::CharacterStreamUTF16Buffer()
- : pushback_buffer_(0), last_(0), stream_(NULL) { }
+// BufferedUC16CharacterStreams
+
+BufferedUC16CharacterStream::BufferedUC16CharacterStream()
+ : UC16CharacterStream(),
+ pushback_limit_(NULL) {
+ // Initialize buffer as being empty. First read will fill the buffer.
+ buffer_cursor_ = buffer_;
+ buffer_end_ = buffer_;
+}
+BufferedUC16CharacterStream::~BufferedUC16CharacterStream() { }
-void CharacterStreamUTF16Buffer::Initialize(Handle<String> data,
- unibrow::CharacterStream* input,
- int start_position,
- int end_position) {
- stream_ = input;
- if (start_position > 0) {
- SeekForward(start_position);
+void BufferedUC16CharacterStream::PushBack(uc16 character) {
+ if (pushback_limit_ == NULL && buffer_cursor_ > buffer_) {
+ // buffer_ is writable, buffer_cursor_ is const pointer.
+ buffer_[--buffer_cursor_ - buffer_] = character;
+ pos_--;
+ return;
}
- end_ = end_position != kNoEndPosition ? end_position : kMaxInt;
+ SlowPushBack(character);
}
-void CharacterStreamUTF16Buffer::PushBack(uc32 ch) {
- pushback_buffer()->Add(last_);
- last_ = ch;
+void BufferedUC16CharacterStream::SlowPushBack(uc16 character) {
+ // In pushback mode, the end of the buffer contains pushback,
+ // and the start of the buffer (from buffer start to pushback_limit_)
+ // contains valid data that comes just after the pushback.
+ // We NULL the pushback_limit_ if pushing all the way back to the
+ // start of the buffer.
+
+ if (pushback_limit_ == NULL) {
+ // Enter pushback mode.
+ pushback_limit_ = buffer_end_;
+ buffer_end_ = buffer_ + kBufferSize;
+ buffer_cursor_ = buffer_end_;
+ }
+ ASSERT(pushback_limit_ > buffer_);
+ ASSERT(pos_ > 0);
+ buffer_[--buffer_cursor_ - buffer_] = character;
+ if (buffer_cursor_ == buffer_) {
+ pushback_limit_ = NULL;
+ } else if (buffer_cursor_ < pushback_limit_) {
+ pushback_limit_ = buffer_cursor_;
+ }
pos_--;
}
-uc32 CharacterStreamUTF16Buffer::Advance() {
- ASSERT(end_ != kNoEndPosition);
- ASSERT(end_ >= 0);
- // NOTE: It is of importance to Persian / Farsi resources that we do
- // *not* strip format control characters in the scanner; see
- //
- // https://bugzilla.mozilla.org/show_bug.cgi?id=274152
- //
- // So, even though ECMA-262, section 7.1, page 11, dictates that we
- // must remove Unicode format-control characters, we do not. This is
- // in line with how IE and SpiderMonkey handles it.
- if (!pushback_buffer()->is_empty()) {
- pos_++;
- return last_ = pushback_buffer()->RemoveLast();
- } else if (stream_->has_more() && pos_ < end_) {
- pos_++;
- uc32 next = stream_->GetNext();
- return last_ = next;
- } else {
- // Note: currently the following increment is necessary to avoid a
- // test-parser problem!
- pos_++;
- return last_ = static_cast<uc32>(-1);
+bool BufferedUC16CharacterStream::ReadBlock() {
+ if (pushback_limit_ != NULL) {
+ buffer_cursor_ = buffer_;
+ buffer_end_ = pushback_limit_;
+ pushback_limit_ = NULL;
+ ASSERT(buffer_cursor_ != buffer_end_);
+ return true;
}
+ unsigned length = FillBuffer(pos_, kBufferSize);
+ buffer_cursor_ = buffer_;
+ buffer_end_ = buffer_ + length;
+ return length > 0;
}
-void CharacterStreamUTF16Buffer::SeekForward(int pos) {
- pos_ = pos;
- ASSERT(pushback_buffer()->is_empty());
- stream_->Seek(pos);
+unsigned BufferedUC16CharacterStream::SlowSeekForward(unsigned delta) {
+ // Leave pushback mode (i.e., ignore that there might be valid data
+ // in the buffer before the pushback_limit_ point).
+ pushback_limit_ = NULL;
+ return BufferSeekForward(delta);
+}
+
+// ----------------------------------------------------------------------------
+// GenericStringUC16CharacterStream
+
+
+GenericStringUC16CharacterStream::GenericStringUC16CharacterStream(
+ Handle<String> data,
+ unsigned start_position,
+ unsigned end_position)
+ : string_(data),
+ length_(end_position) {
+ ASSERT(end_position >= start_position);
+ buffer_cursor_ = buffer_;
+ buffer_end_ = buffer_;
+ pos_ = start_position;
+}
+
+
+GenericStringUC16CharacterStream::~GenericStringUC16CharacterStream() { }
+
+
+unsigned GenericStringUC16CharacterStream::BufferSeekForward(unsigned delta) {
+ unsigned old_pos = pos_;
+ pos_ = Min(pos_ + delta, length_);
+ ReadBlock();
+ return pos_ - old_pos;
+}
+
+
+unsigned GenericStringUC16CharacterStream::FillBuffer(unsigned from_pos,
+ unsigned length) {
+ if (from_pos >= length_) return 0;
+ if (from_pos + length > length_) {
+ length = length_ - from_pos;
+ }
+ String::WriteToFlat<uc16>(*string_, buffer_, from_pos, from_pos + length);
+ return length;
+}
+
+
+// ----------------------------------------------------------------------------
+// Utf8ToUC16CharacterStream
+Utf8ToUC16CharacterStream::Utf8ToUC16CharacterStream(const byte* data,
+ unsigned length)
+ : BufferedUC16CharacterStream(),
+ raw_data_(data),
+ raw_data_length_(length),
+ raw_data_pos_(0),
+ raw_character_position_(0) {
+ ReadBlock();
+}
+
+
+Utf8ToUC16CharacterStream::~Utf8ToUC16CharacterStream() { }
+
+
+unsigned Utf8ToUC16CharacterStream::BufferSeekForward(unsigned delta) {
+ unsigned old_pos = pos_;
+ unsigned target_pos = pos_ + delta;
+ SetRawPosition(target_pos);
+ pos_ = raw_character_position_;
+ ReadBlock();
+ return pos_ - old_pos;
+}
+
+
+unsigned Utf8ToUC16CharacterStream::FillBuffer(unsigned char_position,
+ unsigned length) {
+ static const unibrow::uchar kMaxUC16Character = 0xffff;
+ SetRawPosition(char_position);
+ if (raw_character_position_ != char_position) {
+ // char_position was not a valid position in the stream (hit the end
+ // while spooling to it).
+ return 0u;
+ }
+ unsigned i = 0;
+ while (i < length) {
+ if (raw_data_pos_ == raw_data_length_) break;
+ unibrow::uchar c = raw_data_[raw_data_pos_];
+ if (c <= unibrow::Utf8::kMaxOneByteChar) {
+ raw_data_pos_++;
+ } else {
+ c = unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
+ raw_data_length_ - raw_data_pos_,
+ &raw_data_pos_);
+ // Don't allow characters outside of the BMP.
+ if (c > kMaxUC16Character) {
+ c = unibrow::Utf8::kBadChar;
+ }
+ }
+ buffer_[i++] = static_cast<uc16>(c);
+ }
+ raw_character_position_ = char_position + i;
+ return i;
+}
+
+
+static const byte kUtf8MultiByteMask = 0xC0;
+static const byte kUtf8MultiByteCharStart = 0xC0;
+static const byte kUtf8MultiByteCharFollower = 0x80;
+
+
+#ifdef DEBUG
+static bool IsUtf8MultiCharacterStart(byte first_byte) {
+ return (first_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharStart;
+}
+#endif
+
+
+static bool IsUtf8MultiCharacterFollower(byte later_byte) {
+ return (later_byte & kUtf8MultiByteMask) == kUtf8MultiByteCharFollower;
+}
+
+
+// Move the cursor back to point at the preceding UTF-8 character start
+// in the buffer.
+static inline void Utf8CharacterBack(const byte* buffer, unsigned* cursor) {
+ byte character = buffer[--*cursor];
+ if (character > unibrow::Utf8::kMaxOneByteChar) {
+ ASSERT(IsUtf8MultiCharacterFollower(character));
+ // Last byte of a multi-byte character encoding. Step backwards until
+ // pointing to the first byte of the encoding, recognized by having the
+ // top two bits set.
+ while (IsUtf8MultiCharacterFollower(buffer[--*cursor])) { }
+ ASSERT(IsUtf8MultiCharacterStart(buffer[*cursor]));
+ }
+}
+
+
+// Move the cursor forward to point at the next following UTF-8 character start
+// in the buffer.
+static inline void Utf8CharacterForward(const byte* buffer, unsigned* cursor) {
+ byte character = buffer[(*cursor)++];
+ if (character > unibrow::Utf8::kMaxOneByteChar) {
+ // First character of a multi-byte character encoding.
+ // The number of most-significant one-bits determines the length of the
+ // encoding:
+ // 110..... - (0xCx, 0xDx) one additional byte (minimum).
+ // 1110.... - (0xEx) two additional bytes.
+ // 11110... - (0xFx) three additional bytes (maximum).
+ ASSERT(IsUtf8MultiCharacterStart(character));
+ // Additional bytes is:
+ // 1 if value in range 0xC0 .. 0xDF.
+ // 2 if value in range 0xE0 .. 0xEF.
+ // 3 if value in range 0xF0 .. 0xF7.
+ // Encode that in a single value.
+ unsigned additional_bytes =
+ ((0x3211u) >> (((character - 0xC0) >> 2) & 0xC)) & 0x03;
+ *cursor += additional_bytes;
+ ASSERT(!IsUtf8MultiCharacterFollower(buffer[1 + additional_bytes]));
+ }
+}
+
+
+void Utf8ToUC16CharacterStream::SetRawPosition(unsigned target_position) {
+ if (raw_character_position_ > target_position) {
+ // Spool backwards in utf8 buffer.
+ do {
+ Utf8CharacterBack(raw_data_, &raw_data_pos_);
+ raw_character_position_--;
+ } while (raw_character_position_ > target_position);
+ return;
+ }
+ // Spool forwards in the utf8 buffer.
+ while (raw_character_position_ < target_position) {
+ if (raw_data_pos_ == raw_data_length_) return;
+ Utf8CharacterForward(raw_data_, &raw_data_pos_);
+ raw_character_position_++;
+ }
+}
+
+
+// ----------------------------------------------------------------------------
+// ExternalTwoByteStringUC16CharacterStream
+
+ExternalTwoByteStringUC16CharacterStream::
+ ~ExternalTwoByteStringUC16CharacterStream() { }
+
+
+ExternalTwoByteStringUC16CharacterStream
+ ::ExternalTwoByteStringUC16CharacterStream(
+ Handle<ExternalTwoByteString> data,
+ int start_position,
+ int end_position)
+ : UC16CharacterStream(),
+ source_(data),
+ raw_data_(data->GetTwoByteData(start_position)) {
+ buffer_cursor_ = raw_data_,
+ buffer_end_ = raw_data_ + (end_position - start_position);
+ pos_ = start_position;
}
complete_ = true;
}
+
// ----------------------------------------------------------------------------
// V8JavaScriptScanner
-void V8JavaScriptScanner::Initialize(Handle<String> source,
- int literal_flags) {
- source_ = stream_initializer_.Init(source, NULL, 0, source->length());
- // Need to capture identifiers in order to recognize "get" and "set"
- // in object literals.
- literal_flags_ = literal_flags | kLiteralIdentifier;
- Init();
- // Skip initial whitespace allowing HTML comment ends just like
- // after a newline and scan first token.
- has_line_terminator_before_next_ = true;
- SkipWhiteSpace();
- Scan();
-}
-
-
-void V8JavaScriptScanner::Initialize(Handle<String> source,
- unibrow::CharacterStream* stream,
- int literal_flags) {
- source_ = stream_initializer_.Init(source, stream,
- 0, UTF16Buffer::kNoEndPosition);
- literal_flags_ = literal_flags | kLiteralIdentifier;
- Init();
- // Skip initial whitespace allowing HTML comment ends just like
- // after a newline and scan first token.
- has_line_terminator_before_next_ = true;
- SkipWhiteSpace();
- Scan();
-}
+V8JavaScriptScanner::V8JavaScriptScanner() : JavaScriptScanner() { }
-void V8JavaScriptScanner::Initialize(Handle<String> source,
- int start_position,
- int end_position,
+void V8JavaScriptScanner::Initialize(UC16CharacterStream* source,
int literal_flags) {
- source_ = stream_initializer_.Init(source, NULL,
- start_position, end_position);
+ source_ = source;
literal_flags_ = literal_flags | kLiteralIdentifier;
+ // Need to capture identifiers in order to recognize "get" and "set"
+ // in object literals.
Init();
// Skip initial whitespace allowing HTML comment ends just like
// after a newline and scan first token.
}
-UTF16Buffer* StreamInitializer::Init(Handle<String> source,
- unibrow::CharacterStream* stream,
- int start_position,
- int end_position) {
- // Either initialize the scanner from a character stream or from a
- // string.
- ASSERT(source.is_null() || stream == NULL);
-
- // Initialize the source buffer.
- if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
- two_byte_string_buffer_.Initialize(
- Handle<ExternalTwoByteString>::cast(source),
- start_position,
- end_position);
- return &two_byte_string_buffer_;
- } else if (!source.is_null() && StringShape(*source).IsExternalAscii()) {
- ascii_string_buffer_.Initialize(
- Handle<ExternalAsciiString>::cast(source),
- start_position,
- end_position);
- return &ascii_string_buffer_;
- } else {
- if (!source.is_null()) {
- safe_string_input_buffer_.Reset(source.location());
- stream = &safe_string_input_buffer_;
- }
- char_stream_buffer_.Initialize(source,
- stream,
- start_position,
- end_position);
- return &char_stream_buffer_;
- }
-}
-
// ----------------------------------------------------------------------------
// JsonScanner
-JsonScanner::JsonScanner() {}
+JsonScanner::JsonScanner() : Scanner() { }
-void JsonScanner::Initialize(Handle<String> source) {
- source_ = stream_initializer_.Init(source, NULL, 0, source->length());
+void JsonScanner::Initialize(UC16CharacterStream* source) {
+ source_ = source;
Init();
// Skip initial whitespace.
SkipJsonWhiteSpace();
namespace v8 {
namespace internal {
-// UTF16 buffer to read characters from a character stream.
-class CharacterStreamUTF16Buffer: public UTF16Buffer {
+// A buffered character stream based on a random access character
+// source (ReadBlock can be called with pos_ pointing to any position,
+// even positions before the current).
+class BufferedUC16CharacterStream: public UC16CharacterStream {
public:
- CharacterStreamUTF16Buffer();
- virtual ~CharacterStreamUTF16Buffer() {}
- void Initialize(Handle<String> data,
- unibrow::CharacterStream* stream,
- int start_position,
- int end_position);
- virtual void PushBack(uc32 ch);
- virtual uc32 Advance();
- virtual void SeekForward(int pos);
-
- private:
- List<uc32> pushback_buffer_;
- uc32 last_;
- unibrow::CharacterStream* stream_;
-
- List<uc32>* pushback_buffer() { return &pushback_buffer_; }
+ BufferedUC16CharacterStream();
+ virtual ~BufferedUC16CharacterStream();
+
+ virtual void PushBack(uc16 character);
+
+ protected:
+ static const unsigned kBufferSize = 512;
+ static const unsigned kPushBackStepSize = 16;
+
+ virtual unsigned SlowSeekForward(unsigned delta);
+ virtual bool ReadBlock();
+ virtual void SlowPushBack(uc16 character);
+
+ virtual unsigned BufferSeekForward(unsigned delta) = 0;
+ virtual unsigned FillBuffer(unsigned position, unsigned length) = 0;
+
+ const uc16* pushback_limit_;
+ uc16 buffer_[kBufferSize];
};
-// UTF16 buffer to read characters from an external string.
-template <typename StringType, typename CharType>
-class ExternalStringUTF16Buffer: public UTF16Buffer {
+// Generic string stream.
+class GenericStringUC16CharacterStream: public BufferedUC16CharacterStream {
public:
- ExternalStringUTF16Buffer();
- virtual ~ExternalStringUTF16Buffer() {}
- void Initialize(Handle<StringType> data,
- int start_position,
- int end_position);
- virtual void PushBack(uc32 ch);
- virtual uc32 Advance();
- virtual void SeekForward(int pos);
-
- private:
- const CharType* raw_data_; // Pointer to the actual array of characters.
+ GenericStringUC16CharacterStream(Handle<String> data,
+ unsigned start_position,
+ unsigned end_position);
+ virtual ~GenericStringUC16CharacterStream();
+
+ protected:
+ virtual unsigned BufferSeekForward(unsigned delta);
+ virtual unsigned FillBuffer(unsigned position, unsigned length);
+
+ Handle<String> string_;
+ unsigned start_position_;
+ unsigned length_;
};
-// Initializes a UTF16Buffer as input stream, using one of a number
-// of strategies depending on the available character sources.
-class StreamInitializer {
+// UC16 stream based on a literal UTF-8 string.
+class Utf8ToUC16CharacterStream: public BufferedUC16CharacterStream {
public:
- UTF16Buffer* Init(Handle<String> source,
- unibrow::CharacterStream* stream,
- int start_position,
- int end_position);
- private:
- // Different UTF16 buffers used to pull characters from. Based on input one of
- // these will be initialized as the actual data source.
- CharacterStreamUTF16Buffer char_stream_buffer_;
- ExternalStringUTF16Buffer<ExternalTwoByteString, uint16_t>
- two_byte_string_buffer_;
- ExternalStringUTF16Buffer<ExternalAsciiString, char> ascii_string_buffer_;
-
- // Used to convert the source string into a character stream when a stream
- // is not passed to the scanner.
- SafeStringInputBuffer safe_string_input_buffer_;
+ Utf8ToUC16CharacterStream(const byte* data, unsigned length);
+ virtual ~Utf8ToUC16CharacterStream();
+
+ protected:
+ virtual unsigned BufferSeekForward(unsigned delta);
+ virtual unsigned FillBuffer(unsigned char_position, unsigned length);
+ void SetRawPosition(unsigned char_position);
+
+ const byte* raw_data_;
+ unsigned raw_data_length_; // Measured in bytes, not characters.
+ unsigned raw_data_pos_;
+ // The character position of the character at raw_data[raw_data_pos_].
+ // Not necessarily the same as pos_.
+ unsigned raw_character_position_;
+};
+
+
+// UTF16 buffer to read characters from an external string.
+class ExternalTwoByteStringUC16CharacterStream: public UC16CharacterStream {
+ public:
+ ExternalTwoByteStringUC16CharacterStream(Handle<ExternalTwoByteString> data,
+ int start_position,
+ int end_position);
+ virtual ~ExternalTwoByteStringUC16CharacterStream();
+
+ virtual void PushBack(uc16 character) {
+ ASSERT(buffer_cursor_ > raw_data_);
+ buffer_cursor_--;
+ pos_--;
+ }
+ protected:
+ virtual unsigned SlowSeekForward(unsigned delta) {
+ // Fast case always handles seeking.
+ return 0;
+ }
+ virtual bool ReadBlock() {
+ // Entire string is read at start.
+ return false;
+ }
+ Handle<ExternalTwoByteString> source_;
+ const uc16* raw_data_; // Pointer to the actual array of characters.
};
+
// ----------------------------------------------------------------------------
// V8JavaScriptScanner
// JavaScript scanner getting its input from either a V8 String or a unicode
class V8JavaScriptScanner : public JavaScriptScanner {
public:
- V8JavaScriptScanner() {}
-
- // Initialize the Scanner to scan source.
- void Initialize(Handle<String> source, int literal_flags = kAllLiterals);
- void Initialize(Handle<String> source,
- unibrow::CharacterStream* stream,
- int literal_flags = kAllLiterals);
- void Initialize(Handle<String> source,
- int start_position, int end_position,
+ V8JavaScriptScanner();
+ void Initialize(UC16CharacterStream* source,
int literal_flags = kAllLiterals);
-
- protected:
- StreamInitializer stream_initializer_;
};
public:
JsonScanner();
- // Initialize the Scanner to scan source.
- void Initialize(Handle<String> source);
+ void Initialize(UC16CharacterStream* source);
// Returns the next token.
Token::Value Next();
// Recognizes all of the single-character tokens directly, or calls a function
// to scan a number, string or identifier literal.
// The only allowed whitespace characters between tokens are tab,
- // carrige-return, newline and space.
+ // carriage-return, newline and space.
void ScanJson();
// A JSON number (production JSONNumber) is a subset of the valid JavaScript
// are the only valid JSON identifiers (productions JSONBooleanLiteral,
// JSONNullLiteral).
Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
-
- StreamInitializer stream_initializer_;
};
-
-// ExternalStringUTF16Buffer
-template <typename StringType, typename CharType>
-ExternalStringUTF16Buffer<StringType, CharType>::ExternalStringUTF16Buffer()
- : raw_data_(NULL) { }
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::Initialize(
- Handle<StringType> data,
- int start_position,
- int end_position) {
- ASSERT(!data.is_null());
- raw_data_ = data->resource()->data();
-
- ASSERT(end_position <= data->length());
- if (start_position > 0) {
- SeekForward(start_position);
- }
- end_ =
- end_position != kNoEndPosition ? end_position : data->length();
-}
-
-
-template <typename StringType, typename CharType>
-uc32 ExternalStringUTF16Buffer<StringType, CharType>::Advance() {
- if (pos_ < end_) {
- return raw_data_[pos_++];
- } else {
- // note: currently the following increment is necessary to avoid a
- // test-parser problem!
- pos_++;
- return static_cast<uc32>(-1);
- }
-}
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::PushBack(uc32 ch) {
- pos_--;
- ASSERT(pos_ >= Scanner::kCharacterLookaheadBufferSize);
- ASSERT(raw_data_[pos_ - Scanner::kCharacterLookaheadBufferSize] == ch);
-}
-
-
-template <typename StringType, typename CharType>
-void ExternalStringUTF16Buffer<StringType, CharType>::SeekForward(int pos) {
- pos_ = pos;
-}
-
} } // namespace v8::internal
#endif // V8_SCANNER_H_
uintptr_t stack_limit = i::StackGuard::real_climit();
for (int i = 0; programs[i]; i++) {
const char* program = programs[i];
- unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
+ i::Utf8ToUC16CharacterStream stream(
+ reinterpret_cast<const i::byte*>(program),
+ static_cast<unsigned>(strlen(program)));
i::CompleteParserRecorder log;
i::V8JavaScriptScanner scanner;
- scanner.Initialize(i::Handle<i::String>::null(), &stream);
+ scanner.Initialize(&stream);
v8::preparser::PreParser::PreParseResult result =
v8::preparser::PreParser::PreParseProgram(&scanner,
// and then used the invalid currently scanned literal. This always
// failed in debug mode, and sometimes crashed in release mode.
- unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
+ i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
+ static_cast<unsigned>(strlen(program)));
i::ScriptDataImpl* data =
- i::ParserApi::PreParse(i::Handle<i::String>::null(), &stream, NULL);
+ i::ParserApi::PreParse(&stream, NULL);
CHECK(data->HasError());
delete data;
}
"try { } catch (e) { var foo = function () { /* first */ } }"
"var bar = function () { /* second */ }";
- unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
+ i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(program),
+ static_cast<unsigned>(strlen(program)));
i::ScriptDataImpl* data =
- i::ParserApi::PartialPreParse(i::Handle<i::String>::null(),
- &stream, NULL);
+ i::ParserApi::PartialPreParse(&stream, NULL);
CHECK(!data->HasError());
data->Initialize();
uintptr_t stack_limit = i::StackGuard::real_climit();
- unibrow::Utf8InputBuffer<256> stream(*program, strlen(*program));
+ i::Utf8ToUC16CharacterStream stream(
+ reinterpret_cast<const i::byte*>(*program),
+ static_cast<unsigned>(kProgramSize));
i::CompleteParserRecorder log;
i::V8JavaScriptScanner scanner;
- scanner.Initialize(i::Handle<i::String>::null(), &stream);
+ scanner.Initialize(&stream);
v8::preparser::PreParser::PreParseResult result =
stack_limit);
CHECK_EQ(v8::preparser::PreParser::kPreParseStackOverflow, result);
}
+
+
+class TestExternalResource: public v8::String::ExternalStringResource {
+ public:
+ explicit TestExternalResource(uint16_t* data, int length)
+ : data_(data), length_(static_cast<size_t>(length)) { }
+
+ ~TestExternalResource() { }
+
+ const uint16_t* data() const {
+ return data_;
+ }
+
+ size_t length() const {
+ return length_;
+ }
+ private:
+ uint16_t* data_;
+ size_t length_;
+};
+
+
+#define CHECK_EQU(v1, v2) CHECK_EQ(static_cast<int>(v1), static_cast<int>(v2))
+
+void TestCharacterStream(const char* ascii_source,
+ unsigned length,
+ unsigned start = 0,
+ unsigned end = 0) {
+ if (end == 0) end = length;
+ unsigned sub_length = end - start;
+ i::HandleScope test_scope;
+ i::SmartPointer<i::uc16> uc16_buffer(new i::uc16[length]);
+ for (unsigned i = 0; i < length; i++) {
+ uc16_buffer[i] = static_cast<i::uc16>(ascii_source[i]);
+ }
+ i::Vector<const char> ascii_vector(ascii_source, static_cast<int>(length));
+ i::Handle<i::String> ascii_string(
+ i::Factory::NewStringFromAscii(ascii_vector));
+ TestExternalResource resource(*uc16_buffer, length);
+ i::Handle<i::String> uc16_string(
+ i::Factory::NewExternalStringFromTwoByte(&resource));
+
+ i::ExternalTwoByteStringUC16CharacterStream uc16_stream(
+ i::Handle<i::ExternalTwoByteString>::cast(uc16_string), start, end);
+ i::GenericStringUC16CharacterStream string_stream(ascii_string, start, end);
+ i::Utf8ToUC16CharacterStream utf8_stream(
+ reinterpret_cast<const i::byte*>(ascii_source), end);
+ utf8_stream.SeekForward(start);
+
+ unsigned i = start;
+ while (i < end) {
+ // Read streams one char at a time
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ int32_t c0 = ascii_source[i];
+ int32_t c1 = uc16_stream.Advance();
+ int32_t c2 = string_stream.Advance();
+ int32_t c3 = utf8_stream.Advance();
+ i++;
+ CHECK_EQ(c0, c1);
+ CHECK_EQ(c0, c2);
+ CHECK_EQ(c0, c3);
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ }
+ while (i > start + sub_length / 4) {
+ // Pushback, re-read, pushback again.
+ int32_t c0 = ascii_source[i - 1];
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ uc16_stream.PushBack(c0);
+ string_stream.PushBack(c0);
+ utf8_stream.PushBack(c0);
+ i--;
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ int32_t c1 = uc16_stream.Advance();
+ int32_t c2 = string_stream.Advance();
+ int32_t c3 = utf8_stream.Advance();
+ i++;
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ CHECK_EQ(c0, c1);
+ CHECK_EQ(c0, c2);
+ CHECK_EQ(c0, c3);
+ uc16_stream.PushBack(c0);
+ string_stream.PushBack(c0);
+ utf8_stream.PushBack(c0);
+ i--;
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ }
+ unsigned halfway = start + sub_length / 2;
+ uc16_stream.SeekForward(halfway - i);
+ string_stream.SeekForward(halfway - i);
+ utf8_stream.SeekForward(halfway - i);
+ i = halfway;
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+
+ while (i < end) {
+ // Read streams one char at a time
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ int32_t c0 = ascii_source[i];
+ int32_t c1 = uc16_stream.Advance();
+ int32_t c2 = string_stream.Advance();
+ int32_t c3 = utf8_stream.Advance();
+ i++;
+ CHECK_EQ(c0, c1);
+ CHECK_EQ(c0, c2);
+ CHECK_EQ(c0, c3);
+ CHECK_EQU(i, uc16_stream.pos());
+ CHECK_EQU(i, string_stream.pos());
+ CHECK_EQU(i, utf8_stream.pos());
+ }
+
+ int32_t c1 = uc16_stream.Advance();
+ int32_t c2 = string_stream.Advance();
+ int32_t c3 = utf8_stream.Advance();
+ CHECK_LT(c1, 0);
+ CHECK_LT(c2, 0);
+ CHECK_LT(c3, 0);
+}
+
+
+TEST(CharacterStreams) {
+ v8::HandleScope handles;
+ v8::Persistent<v8::Context> context = v8::Context::New();
+ v8::Context::Scope context_scope(context);
+
+ TestCharacterStream("abc\0\n\r\x7f", 7);
+ static const unsigned kBigStringSize = 4096;
+ char buffer[kBigStringSize + 1];
+ for (unsigned i = 0; i < kBigStringSize; i++) {
+ buffer[i] = static_cast<char>(i & 0x7f);
+ }
+ TestCharacterStream(buffer, kBigStringSize);
+
+ TestCharacterStream(buffer, kBigStringSize, 576, 3298);
+
+ TestCharacterStream("\0", 1);
+ TestCharacterStream("", 0);
+}
+
+
+TEST(Utf8CharacterStream) {
+ static const unsigned kMaxUC16CharU = unibrow::Utf8::kMaxThreeByteChar;
+ static const int kMaxUC16Char = static_cast<int>(kMaxUC16CharU);
+
+ static const int kAllUtf8CharsSize =
+ (unibrow::Utf8::kMaxOneByteChar + 1) +
+ (unibrow::Utf8::kMaxTwoByteChar - unibrow::Utf8::kMaxOneByteChar) * 2 +
+ (unibrow::Utf8::kMaxThreeByteChar - unibrow::Utf8::kMaxTwoByteChar) * 3;
+ static const unsigned kAllUtf8CharsSizeU =
+ static_cast<unsigned>(kAllUtf8CharsSize);
+
+ char buffer[kAllUtf8CharsSizeU];
+ unsigned cursor = 0;
+ for (int i = 0; i <= kMaxUC16Char; i++) {
+ cursor += unibrow::Utf8::Encode(buffer + cursor, i);
+ }
+ ASSERT(cursor == kAllUtf8CharsSizeU);
+
+ i::Utf8ToUC16CharacterStream stream(reinterpret_cast<const i::byte*>(buffer),
+ kAllUtf8CharsSizeU);
+ for (int i = 0; i <= kMaxUC16Char; i++) {
+ CHECK_EQU(i, stream.pos());
+ int32_t c = stream.Advance();
+ CHECK_EQ(i, c);
+ CHECK_EQU(i + 1, stream.pos());
+ }
+ for (int i = kMaxUC16Char; i >= 0; i--) {
+ CHECK_EQU(i + 1, stream.pos());
+ stream.PushBack(i);
+ CHECK_EQU(i, stream.pos());
+ }
+ int i = 0;
+ while (stream.pos() < kMaxUC16CharU) {
+ CHECK_EQU(i, stream.pos());
+ unsigned progress = stream.SeekForward(12);
+ i += progress;
+ int32_t c = stream.Advance();
+ if (i <= kMaxUC16Char) {
+ CHECK_EQ(i, c);
+ } else {
+ CHECK_EQ(-1, c);
+ }
+ i += 1;
+ CHECK_EQU(i, stream.pos());
+ }
+}
+
+#undef CHECK_EQU
+
+void TestStreamScanner(i::UC16CharacterStream* stream,
+ i::Token::Value* expected_tokens,
+ int skip_pos = 0, // Zero means not skipping.
+ int skip_to = 0) {
+ i::V8JavaScriptScanner scanner;
+ scanner.Initialize(stream, i::JavaScriptScanner::kAllLiterals);
+
+ int i = 0;
+ do {
+ i::Token::Value expected = expected_tokens[i];
+ i::Token::Value actual = scanner.Next();
+ CHECK_EQ(i::Token::String(expected), i::Token::String(actual));
+ if (scanner.location().end_pos == skip_pos) {
+ scanner.SeekForward(skip_to);
+ }
+ i++;
+ } while (expected_tokens[i] != i::Token::ILLEGAL);
+}
+
+TEST(StreamScanner) {
+ const char* str1 = "{ foo get for : */ <- \n\n /*foo*/ bib";
+ i::Utf8ToUC16CharacterStream stream1(reinterpret_cast<const i::byte*>(str1),
+ static_cast<unsigned>(strlen(str1)));
+ i::Token::Value expectations1[] = {
+ i::Token::LBRACE,
+ i::Token::IDENTIFIER,
+ i::Token::IDENTIFIER,
+ i::Token::FOR,
+ i::Token::COLON,
+ i::Token::MUL,
+ i::Token::DIV,
+ i::Token::LT,
+ i::Token::SUB,
+ i::Token::IDENTIFIER,
+ i::Token::EOS,
+ i::Token::ILLEGAL
+ };
+ TestStreamScanner(&stream1, expectations1, 0, 0);
+
+ const char* str2 = "case default const {THIS\nPART\nSKIPPED} do";
+ i::Utf8ToUC16CharacterStream stream2(reinterpret_cast<const i::byte*>(str2),
+ static_cast<unsigned>(strlen(str2)));
+ i::Token::Value expectations2[] = {
+ i::Token::CASE,
+ i::Token::DEFAULT,
+ i::Token::CONST,
+ i::Token::LBRACE,
+ // Skipped part here
+ i::Token::RBRACE,
+ i::Token::DO,
+ i::Token::EOS,
+ i::Token::ILLEGAL
+ };
+ ASSERT_EQ('{', str2[19]);
+ ASSERT_EQ('}', str2[37]);
+ TestStreamScanner(&stream2, expectations2, 20, 37);
+
+ const char* str3 = "{}}}}";
+ i::Token::Value expectations3[] = {
+ i::Token::LBRACE,
+ i::Token::RBRACE,
+ i::Token::RBRACE,
+ i::Token::RBRACE,
+ i::Token::RBRACE,
+ i::Token::EOS,
+ i::Token::ILLEGAL
+ };
+ // Skip zero-four RBRACEs.
+ for (int i = 0; i <= 4; i++) {
+ expectations3[6 - i] = i::Token::ILLEGAL;
+ expectations3[5 - i] = i::Token::EOS;
+ i::Utf8ToUC16CharacterStream stream3(
+ reinterpret_cast<const i::byte*>(str3),
+ static_cast<unsigned>(strlen(str3)));
+ TestStreamScanner(&stream3, expectations3, 1, 1 + i);
+ }
+}