Add script streaming API.

author marja@chromium.org <marja@chromium.org>

Thu, 11 Sep 2014 11:06:26 +0000 (11:06 +0000)

committer marja@chromium.org <marja@chromium.org>

Thu, 11 Sep 2014 11:06:26 +0000 (11:06 +0000)
author marja@chromium.org <marja@chromium.org>
Thu, 11 Sep 2014 11:06:26 +0000 (11:06 +0000)
committer marja@chromium.org <marja@chromium.org>
Thu, 11 Sep 2014 11:06:26 +0000 (11:06 +0000)
diff --git a/BUILD.gn b/BUILD.gn

index 76d48d1cdba5b4cab820b6a696de4d68fd9adf74..b5e56e5e5b819107fab4c99e4bc9cf7a1d6446db 100644 (file)
--- a/BUILD.gn
+++ b/BUILD.gn
@@ -435,6 +435,8 @@ source_set("v8_base") {
      "src/ast-value-factory.h",
      "src/ast.cc",
      "src/ast.h",
+    "src/background-parsing-task.cc",
+    "src/background-parsing-task.h",
      "src/bignum-dtoa.cc",
      "src/bignum-dtoa.h",
      "src/bignum.cc",
diff --git a/include/v8.h b/include/v8.h

index d6fbf67c08790b61ffd0bd1e4ecd91697211b2ab..19b2778e404f9eed3c9743516a0973ca30876eab 100644 (file)
--- a/include/v8.h
+++ b/include/v8.h
@@ -130,6 +130,7 @@ class Heap;
  class HeapObject;
  class Isolate;
  class Object;
+class StreamedSource;
  template<typename T> class CustomArguments;
  class PropertyCallbackArguments;
  class FunctionCallbackArguments;
@@ -1088,6 +1089,73 @@ class V8_EXPORT ScriptCompiler {
      CachedData* cached_data;
    };
  
+  /**
+   * For streaming incomplete script data to V8. The embedder should implement a
+   * subclass of this class.
+   */
+  class ExternalSourceStream {
+   public:
+    virtual ~ExternalSourceStream() {}
+
+    /**
+     * V8 calls this to request the next chunk of data from the embedder. This
+     * function will be called on a background thread, so it's OK to block and
+     * wait for the data, if the embedder doesn't have data yet. Returns the
+     * length of the data returned. When the data ends, GetMoreData should
+     * return 0. Caller takes ownership of the data.
+     *
+     * When streaming UTF-8 data, V8 handles multi-byte characters split between
+     * two data chunks, but doesn't handle multi-byte characters split between
+     * more than two data chunks. The embedder can avoid this problem by always
+     * returning at least 2 bytes of data.
+     *
+     * If the embedder wants to cancel the streaming, they should make the next
+     * GetMoreData call return 0. V8 will interpret it as end of data (and most
+     * probably, parsing will fail). The streaming task will return as soon as
+     * V8 has parsed the data it received so far.
+     */
+    virtual size_t GetMoreData(const uint8_t** src) = 0;
+  };
+
+
+  /**
+   * Source code which can be streamed into V8 in pieces. It will be parsed
+   * while streaming. It can be compiled after the streaming is complete.
+   * StreamedSource must be kept alive while the streaming task is ran (see
+   * ScriptStreamingTask below).
+   */
+  class V8_EXPORT StreamedSource {
+   public:
+    enum Encoding { ONE_BYTE, TWO_BYTE, UTF8 };
+
+    StreamedSource(ExternalSourceStream* source_stream, Encoding encoding);
+    ~StreamedSource();
+
+    // Ownership of the CachedData or its buffers is *not* transferred to the
+    // caller. The CachedData object is alive as long as the StreamedSource
+    // object is alive.
+    const CachedData* GetCachedData() const;
+
+    internal::StreamedSource* impl() const { return impl_; }
+
+   private:
+    // Prevent copying. Not implemented.
+    StreamedSource(const StreamedSource&);
+    StreamedSource& operator=(const StreamedSource&);
+
+    internal::StreamedSource* impl_;
+  };
+
+  /**
+   * A streaming task which the embedder must run on a background thread to
+   * stream scripts into V8. Returned by ScriptCompiler::StartStreamingScript.
+   */
+  class ScriptStreamingTask {
+   public:
+    virtual ~ScriptStreamingTask() {}
+    virtual void Run() = 0;
+  };
+
    enum CompileOptions {
      kNoCompileOptions = 0,
      kProduceParserCache,
@@ -1130,6 +1198,32 @@ class V8_EXPORT ScriptCompiler {
    static Local<Script> Compile(
        Isolate* isolate, Source* source,
        CompileOptions options = kNoCompileOptions);
+
+  /**
+   * Returns a task which streams script data into V8, or NULL if the script
+   * cannot be streamed. The user is responsible for running the task on a
+   * background thread and deleting it. When ran, the task starts parsing the
+   * script, and it will request data from the StreamedSource as needed. When
+   * ScriptStreamingTask::Run exits, all data has been streamed and the script
+   * can be compiled (see Compile below).
+   *
+   * This API allows to start the streaming with as little data as possible, and
+   * the remaining data (for example, the ScriptOrigin) is passed to Compile.
+   */
+  static ScriptStreamingTask* StartStreamingScript(
+      Isolate* isolate, StreamedSource* source,
+      CompileOptions options = kNoCompileOptions);
+
+  /**
+   * Compiles a streamed script (bound to current context).
+   *
+   * This can only be called after the streaming has finished
+   * (ScriptStreamingTask has been run). V8 doesn't construct the source string
+   * during streaming, so the embedder needs to pass the full source here.
+   */
+  static Local<Script> Compile(Isolate* isolate, StreamedSource* source,
+                               Handle<String> full_source_string,
+                               const ScriptOrigin& origin);
  };
  
  
diff --git a/src/api.cc b/src/api.cc

index 4e10def26bb51b32f5cb9a276cbb88f91c39fd78..e4a00658c5cf3843b572924afe9d426b6a3039f7 100644 (file)
--- a/src/api.cc
+++ b/src/api.cc
@@ -13,6 +13,7 @@
  #include "include/v8-profiler.h"
  #include "include/v8-testing.h"
  #include "src/assert-scope.h"
+#include "src/background-parsing-task.h"
  #include "src/base/platform/platform.h"
  #include "src/base/platform/time.h"
  #include "src/base/utils/random-number-generator.h"
@@ -1600,6 +1601,20 @@ ScriptCompiler::CachedData::~CachedData() {
  }
  
  
+ScriptCompiler::StreamedSource::StreamedSource(ExternalSourceStream* stream,
+                                               Encoding encoding)
+    : impl_(new i::StreamedSource(stream, encoding)) {}
+
+
+ScriptCompiler::StreamedSource::~StreamedSource() { delete impl_; }
+
+
+const ScriptCompiler::CachedData*
+ScriptCompiler::StreamedSource::GetCachedData() const {
+  return impl_->cached_data.get();
+}
+
+
  Local<Script> UnboundScript::BindToCurrentContext() {
    i::Handle<i::HeapObject> obj =
        i::Handle<i::HeapObject>::cast(Utils::OpenHandle(this));
@@ -1814,6 +1829,89 @@ Local<Script> ScriptCompiler::Compile(
  }
  
  
+ScriptCompiler::ScriptStreamingTask* ScriptCompiler::StartStreamingScript(
+    Isolate* v8_isolate, StreamedSource* source, CompileOptions options) {
+  i::Isolate* isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
+  if (!isolate->global_context().is_null() &&
+      !isolate->global_context()->IsNativeContext()) {
+    // The context chain is non-trivial, and constructing the corresponding
+    // non-trivial Scope chain outside the V8 heap is not implemented. Don't
+    // stream the script. This will only occur if Harmony scoping is enabled and
+    // a previous script has introduced "let" or "const" variables. TODO(marja):
+    // Implement externalizing ScopeInfos and constructing non-trivial Scope
+    // chains independent of the V8 heap so that we can stream also in this
+    // case.
+    return NULL;
+  }
+  return new i::BackgroundParsingTask(source->impl(), options,
+                                      i::FLAG_stack_size, isolate);
+}
+
+
+Local<Script> ScriptCompiler::Compile(Isolate* v8_isolate,
+                                      StreamedSource* v8_source,
+                                      Handle<String> full_source_string,
+                                      const ScriptOrigin& origin) {
+  i::Isolate* isolate = reinterpret_cast<i::Isolate*>(v8_isolate);
+  i::StreamedSource* source = v8_source->impl();
+  ON_BAILOUT(isolate, "v8::ScriptCompiler::Compile()", return Local<Script>());
+  LOG_API(isolate, "ScriptCompiler::Compile()");
+  ENTER_V8(isolate);
+  i::SharedFunctionInfo* raw_result = NULL;
+
+  {
+    i::HandleScope scope(isolate);
+    i::Handle<i::String> str = Utils::OpenHandle(*(full_source_string));
+    i::Handle<i::Script> script = isolate->factory()->NewScript(str);
+    if (!origin.ResourceName().IsEmpty()) {
+      script->set_name(*Utils::OpenHandle(*(origin.ResourceName())));
+    }
+    if (!origin.ResourceLineOffset().IsEmpty()) {
+      script->set_line_offset(i::Smi::FromInt(
+          static_cast<int>(origin.ResourceLineOffset()->Value())));
+    }
+    if (!origin.ResourceColumnOffset().IsEmpty()) {
+      script->set_column_offset(i::Smi::FromInt(
+          static_cast<int>(origin.ResourceColumnOffset()->Value())));
+    }
+    if (!origin.ResourceIsSharedCrossOrigin().IsEmpty()) {
+      script->set_is_shared_cross_origin(origin.ResourceIsSharedCrossOrigin() ==
+                                         v8::True(v8_isolate));
+    }
+    source->info->set_script(script);
+    source->info->SetContext(isolate->global_context());
+
+    EXCEPTION_PREAMBLE(isolate);
+
+    // Do the parsing tasks which need to be done on the main thread. This will
+    // also handle parse errors.
+    source->parser->Internalize();
+
+    i::Handle<i::SharedFunctionInfo> result =
+        i::Handle<i::SharedFunctionInfo>::null();
+    if (source->info->function() != NULL) {
+      // Parsing has succeeded.
+      result =
+          i::Compiler::CompileStreamedScript(source->info.get(), str->length());
+    }
+    has_pending_exception = result.is_null();
+    if (has_pending_exception) isolate->ReportPendingMessages();
+    EXCEPTION_BAILOUT_CHECK(isolate, Local<Script>());
+
+    raw_result = *result;
+    // The Handle<Script> will go out of scope soon; make sure CompilationInfo
+    // doesn't point to it.
+    source->info->set_script(i::Handle<i::Script>());
+  }  // HandleScope goes out of scope.
+  i::Handle<i::SharedFunctionInfo> result(raw_result, isolate);
+  Local<UnboundScript> generic = ToApiHandle<UnboundScript>(result);
+  if (generic.IsEmpty()) {
+    return Local<Script>();
+  }
+  return generic->BindToCurrentContext();
+}
+
+
  Local<Script> Script::Compile(v8::Handle<String> source,
                                v8::ScriptOrigin* origin) {
    i::Handle<i::String> str = Utils::OpenHandle(*source);
diff --git a/src/background-parsing-task.cc b/src/background-parsing-task.cc

new file mode 100644 (file)

index 0000000..c7602a7
--- /dev/null
+++ b/src/background-parsing-task.cc
@@ -0,0 +1,62 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "src/background-parsing-task.h"
+
+namespace v8 {
+namespace internal {
+
+BackgroundParsingTask::BackgroundParsingTask(
+    StreamedSource* source, ScriptCompiler::CompileOptions options,
+    int stack_size, Isolate* isolate)
+    : source_(source), options_(options), stack_size_(stack_size) {
+  // Prepare the data for the internalization phase and compilation phase, which
+  // will happen in the main thread after parsing.
+  source->info.Reset(new i::CompilationInfoWithZone(source->source_stream.get(),
+                                                    source->encoding, isolate));
+  source->info->MarkAsGlobal();
+
+  // We don't set the context to the CompilationInfo yet, because the background
+  // thread cannot do anything with it anyway. We set it just before compilation
+  // on the foreground thread.
+  DCHECK(options == ScriptCompiler::kProduceParserCache ||
+         options == ScriptCompiler::kProduceCodeCache ||
+         options == ScriptCompiler::kNoCompileOptions);
+  source->allow_lazy =
+      !i::Compiler::DebuggerWantsEagerCompilation(source->info.get());
+  source->hash_seed = isolate->heap()->HashSeed();
+}
+
+
+void BackgroundParsingTask::Run() {
+  DisallowHeapAllocation no_allocation;
+  DisallowHandleAllocation no_handles;
+  DisallowHandleDereference no_deref;
+
+  ScriptData* script_data = NULL;
+  if (options_ == ScriptCompiler::kProduceParserCache ||
+      options_ == ScriptCompiler::kProduceCodeCache) {
+    source_->info->SetCachedData(&script_data, options_);
+  }
+
+  uintptr_t limit = reinterpret_cast<uintptr_t>(&limit) - stack_size_ * KB;
+  Parser::ParseInfo parse_info = {limit, source_->hash_seed,
+                                  &source_->unicode_cache};
+
+  // Parser needs to stay alive for finalizing the parsing on the main
+  // thread. Passing &parse_info is OK because Parser doesn't store it.
+  source_->parser.Reset(new Parser(source_->info.get(), &parse_info));
+  source_->parser->set_allow_lazy(source_->allow_lazy);
+  source_->parser->ParseOnBackground();
+
+  if (script_data != NULL) {
+    source_->cached_data.Reset(new ScriptCompiler::CachedData(
+        script_data->data(), script_data->length(),
+        ScriptCompiler::CachedData::BufferOwned));
+    script_data->ReleaseDataOwnership();
+    delete script_data;
+  }
+}
+}
+}  // namespace v8::internal
diff --git a/src/background-parsing-task.h b/src/background-parsing-task.h

new file mode 100644 (file)

index 0000000..19c93a8
--- /dev/null
+++ b/src/background-parsing-task.h
@@ -0,0 +1,67 @@
+// Copyright 2014 the V8 project authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef V8_BACKGROUND_PARSING_TASK_H_
+#define V8_BACKGROUND_PARSING_TASK_H_
+
+#include "src/base/platform/platform.h"
+#include "src/base/platform/semaphore.h"
+#include "src/compiler.h"
+#include "src/parser.h"
+#include "src/smart-pointers.h"
+
+namespace v8 {
+namespace internal {
+
+class Parser;
+
+// Internal representation of v8::ScriptCompiler::StreamedSource. Contains all
+// data which needs to be transmitted between threads for background parsing,
+// finalizing it on the main thread, and compiling on the main thread.
+struct StreamedSource {
+  StreamedSource(ScriptCompiler::ExternalSourceStream* source_stream,
+                 ScriptCompiler::StreamedSource::Encoding encoding)
+      : source_stream(source_stream),
+        encoding(encoding),
+        hash_seed(0),
+        allow_lazy(false) {}
+
+  // Internal implementation of v8::ScriptCompiler::StreamedSource.
+  SmartPointer<ScriptCompiler::ExternalSourceStream> source_stream;
+  ScriptCompiler::StreamedSource::Encoding encoding;
+  SmartPointer<ScriptCompiler::CachedData> cached_data;
+
+  // Data needed for parsing, and data needed to to be passed between thread
+  // between parsing and compilation. These need to be initialized before the
+  // compilation starts.
+  UnicodeCache unicode_cache;
+  SmartPointer<CompilationInfo> info;
+  uint32_t hash_seed;
+  bool allow_lazy;
+  SmartPointer<Parser> parser;
+
+ private:
+  // Prevent copying. Not implemented.
+  StreamedSource(const StreamedSource&);
+  StreamedSource& operator=(const StreamedSource&);
+};
+
+
+class BackgroundParsingTask : public ScriptCompiler::ScriptStreamingTask {
+ public:
+  BackgroundParsingTask(StreamedSource* source,
+                        ScriptCompiler::CompileOptions options, int stack_size,
+                        Isolate* isolate);
+
+  virtual void Run();
+
+ private:
+  StreamedSource* source_;  // Not owned.
+  ScriptCompiler::CompileOptions options_;
+  int stack_size_;
+};
+}
+}  // namespace v8::internal
+
+#endif  // V8_BACKGROUND_PARSING_TASK_H_
diff --git a/src/compiler.cc b/src/compiler.cc

index d43177e07d060a66144e8d50daa745422c581c55..70a2c95b1dae24cba551852051511c915f8c01f1 100644 (file)
--- a/src/compiler.cc
+++ b/src/compiler.cc
@@ -47,6 +47,7 @@ ScriptData::ScriptData(const byte* data, int length)
  CompilationInfo::CompilationInfo(Handle<Script> script, Zone* zone)
      : flags_(kThisHasUses),
        script_(script),
+      source_stream_(NULL),
        osr_ast_id_(BailoutId::None()),
        parameter_count_(0),
        optimization_id_(-1),
@@ -59,6 +60,7 @@ CompilationInfo::CompilationInfo(Handle<Script> script, Zone* zone)
  CompilationInfo::CompilationInfo(Isolate* isolate, Zone* zone)
      : flags_(kThisHasUses),
        script_(Handle<Script>::null()),
+      source_stream_(NULL),
        osr_ast_id_(BailoutId::None()),
        parameter_count_(0),
        optimization_id_(-1),
@@ -73,6 +75,7 @@ CompilationInfo::CompilationInfo(Handle<SharedFunctionInfo> shared_info,
      : flags_(kLazy | kThisHasUses),
        shared_info_(shared_info),
        script_(Handle<Script>(Script::cast(shared_info->script()))),
+      source_stream_(NULL),
        osr_ast_id_(BailoutId::None()),
        parameter_count_(0),
        optimization_id_(-1),
@@ -87,6 +90,7 @@ CompilationInfo::CompilationInfo(Handle<JSFunction> closure, Zone* zone)
        closure_(closure),
        shared_info_(Handle<SharedFunctionInfo>(closure->shared())),
        script_(Handle<Script>(Script::cast(shared_info_->script()))),
+      source_stream_(NULL),
        context_(closure->context()),
        osr_ast_id_(BailoutId::None()),
        parameter_count_(0),
@@ -100,6 +104,7 @@ CompilationInfo::CompilationInfo(Handle<JSFunction> closure, Zone* zone)
  CompilationInfo::CompilationInfo(HydrogenCodeStub* stub, Isolate* isolate,
                                   Zone* zone)
      : flags_(kLazy | kThisHasUses),
+      source_stream_(NULL),
        osr_ast_id_(BailoutId::None()),
        parameter_count_(0),
        optimization_id_(-1),
@@ -110,6 +115,22 @@ CompilationInfo::CompilationInfo(HydrogenCodeStub* stub, Isolate* isolate,
  }
  
  
+CompilationInfo::CompilationInfo(
+    ScriptCompiler::ExternalSourceStream* stream,
+    ScriptCompiler::StreamedSource::Encoding encoding, Isolate* isolate,
+    Zone* zone)
+    : flags_(kThisHasUses),
+      source_stream_(stream),
+      source_stream_encoding_(encoding),
+      osr_ast_id_(BailoutId::None()),
+      parameter_count_(0),
+      optimization_id_(-1),
+      ast_value_factory_(NULL),
+      ast_value_factory_owned_(false) {
+  Initialize(isolate, BASE, zone);
+}
+
+
  void CompilationInfo::Initialize(Isolate* isolate,
                                   Mode mode,
                                   Zone* zone) {
@@ -136,7 +157,9 @@ void CompilationInfo::Initialize(Isolate* isolate,
    }
    mode_ = mode;
    abort_due_to_dependency_ = false;
-  if (script_->type()->value() == Script::TYPE_NATIVE) MarkAsNative();
+  if (!script_.is_null() && script_->type()->value() == Script::TYPE_NATIVE) {
+    MarkAsNative();
+  }
    if (isolate_->debug()->is_active()) MarkAsDebug();
    if (FLAG_context_specialization) MarkAsContextSpecializing();
    if (FLAG_turbo_inlining) MarkAsInliningEnabled();
@@ -810,13 +833,6 @@ void Compiler::CompileForLiveEdit(Handle<Script> script) {
  }
  
  
-static bool DebuggerWantsEagerCompilation(CompilationInfo* info,
-                                          bool allow_lazy_without_ctx = false) {
-  return LiveEditFunctionTracker::IsActive(info->isolate()) ||
-         (info->isolate()->DebuggerHasBreakPoints() && !allow_lazy_without_ctx);
-}
-
-
  static Handle<SharedFunctionInfo> CompileToplevel(CompilationInfo* info) {
    Isolate* isolate = info->isolate();
    PostponeInterruptsScope postpone(isolate);
@@ -831,28 +847,30 @@ static Handle<SharedFunctionInfo> CompileToplevel(CompilationInfo* info) {
  
    DCHECK(info->is_eval() || info->is_global());
  
-  bool parse_allow_lazy =
-      (info->compile_options() == ScriptCompiler::kConsumeParserCache ||
-       String::cast(script->source())->length() > FLAG_min_preparse_length) &&
-      !DebuggerWantsEagerCompilation(info);
-
-  if (!parse_allow_lazy &&
-      (info->compile_options() == ScriptCompiler::kProduceParserCache ||
-       info->compile_options() == ScriptCompiler::kConsumeParserCache)) {
-    // We are going to parse eagerly, but we either 1) have cached data produced
-    // by lazy parsing or 2) are asked to generate cached data. We cannot use
-    // the existing data, since it won't contain all the symbols we need for
-    // eager parsing. In addition, it doesn't make sense to produce the data
-    // when parsing eagerly. That data would contain all symbols, but no
-    // functions, so it cannot be used to aid lazy parsing later.
-    info->SetCachedData(NULL, ScriptCompiler::kNoCompileOptions);
-  }
-
    Handle<SharedFunctionInfo> result;
  
    { VMState<COMPILER> state(info->isolate());
-    if (!Parser::Parse(info, parse_allow_lazy)) {
-      return Handle<SharedFunctionInfo>::null();
+    if (info->function() == NULL) {
+      // Parse the script if needed (if it's already parsed, function() is
+      // non-NULL).
+      bool parse_allow_lazy =
+          (info->compile_options() == ScriptCompiler::kConsumeParserCache ||
+           String::cast(script->source())->length() >
+               FLAG_min_preparse_length) &&
+          !Compiler::DebuggerWantsEagerCompilation(info);
+
+      if (!parse_allow_lazy &&
+          (info->compile_options() == ScriptCompiler::kProduceParserCache ||
+           info->compile_options() == ScriptCompiler::kConsumeParserCache)) {
+        // We are going to parse eagerly, but we either 1) have cached data
+        // produced by lazy parsing or 2) are asked to generate cached data.
+        // Eager parsing cannot benefit from cached data, and producing cached
+        // data while parsing eagerly is not implemented.
+        info->SetCachedData(NULL, ScriptCompiler::kNoCompileOptions);
+      }
+      if (!Parser::Parse(info, parse_allow_lazy)) {
+        return Handle<SharedFunctionInfo>::null();
+      }
      }
  
      FunctionLiteral* lit = info->function();
@@ -898,7 +916,8 @@ static Handle<SharedFunctionInfo> CompileToplevel(CompilationInfo* info) {
      SetExpectedNofPropertiesFromEstimate(result,
                                           lit->expected_property_count());
  
-    script->set_compilation_state(Script::COMPILATION_STATE_COMPILED);
+    if (!script.is_null())
+      script->set_compilation_state(Script::COMPILATION_STATE_COMPILED);
  
      live_edit_tracker.RecordFunctionInfo(result, lit, info->zone());
    }
@@ -1055,6 +1074,19 @@ Handle<SharedFunctionInfo> Compiler::CompileScript(
  }
  
  
+Handle<SharedFunctionInfo> Compiler::CompileStreamedScript(
+    CompilationInfo* info, int source_length) {
+  Isolate* isolate = info->isolate();
+  isolate->counters()->total_load_size()->Increment(source_length);
+  isolate->counters()->total_compile_size()->Increment(source_length);
+
+  if (FLAG_use_strict) info->SetStrictMode(STRICT);
+  // TODO(marja): FLAG_serialize_toplevel is not honoured and won't be; when the
+  // real code caching lands, streaming needs to be adapted to use it.
+  return CompileToplevel(info);
+}
+
+
  Handle<SharedFunctionInfo> Compiler::BuildFunctionInfo(
      FunctionLiteral* literal, Handle<Script> script,
      CompilationInfo* outer_info) {
@@ -1362,6 +1394,13 @@ void Compiler::RecordFunctionCompilation(Logger::LogEventsAndTags tag,
  }
  
  
+bool Compiler::DebuggerWantsEagerCompilation(CompilationInfo* info,
+                                             bool allow_lazy_without_ctx) {
+  return LiveEditFunctionTracker::IsActive(info->isolate()) ||
+         (info->isolate()->DebuggerHasBreakPoints() && !allow_lazy_without_ctx);
+}
+
+
  CompilationPhase::CompilationPhase(const char* name, CompilationInfo* info)
      : name_(name), info_(info), zone_(info->isolate()) {
    if (FLAG_hydrogen_stats) {
diff --git a/src/compiler.h b/src/compiler.h

index 00cd0f40b3ae8bc8d09f4ee417848423ce684466..ca64e40387232cd1724e6fe40ae04e1670e4ea10 100644 (file)
--- a/src/compiler.h
+++ b/src/compiler.h
@@ -107,12 +107,19 @@ class CompilationInfo {
    Handle<JSFunction> closure() const { return closure_; }
    Handle<SharedFunctionInfo> shared_info() const { return shared_info_; }
    Handle<Script> script() const { return script_; }
+  void set_script(Handle<Script> script) { script_ = script; }
    HydrogenCodeStub* code_stub() const {return code_stub_; }
    v8::Extension* extension() const { return extension_; }
    ScriptData** cached_data() const { return cached_data_; }
    ScriptCompiler::CompileOptions compile_options() const {
      return compile_options_;
    }
+  ScriptCompiler::ExternalSourceStream* source_stream() const {
+    return source_stream_;
+  }
+  ScriptCompiler::StreamedSource::Encoding source_stream_encoding() const {
+    return source_stream_encoding_;
+  }
    Handle<Context> context() const { return context_; }
    BailoutId osr_ast_id() const { return osr_ast_id_; }
    Handle<Code> unoptimized_code() const { return unoptimized_code_; }
@@ -378,6 +385,10 @@ class CompilationInfo {
    CompilationInfo(HydrogenCodeStub* stub,
                    Isolate* isolate,
                    Zone* zone);
+  CompilationInfo(ScriptCompiler::ExternalSourceStream* source_stream,
+                  ScriptCompiler::StreamedSource::Encoding encoding,
+                  Isolate* isolate, Zone* zone);
+
  
   private:
    Isolate* isolate_;
@@ -427,6 +438,8 @@ class CompilationInfo {
    Handle<JSFunction> closure_;
    Handle<SharedFunctionInfo> shared_info_;
    Handle<Script> script_;
+  ScriptCompiler::ExternalSourceStream* source_stream_;  // Not owned.
+  ScriptCompiler::StreamedSource::Encoding source_stream_encoding_;
  
    // Fields possibly needed for eager compilation, NULL by default.
    v8::Extension* extension_;
@@ -508,6 +521,10 @@ class CompilationInfoWithZone: public CompilationInfo {
    CompilationInfoWithZone(HydrogenCodeStub* stub, Isolate* isolate)
        : CompilationInfo(stub, isolate, &zone_),
          zone_(isolate) {}
+  CompilationInfoWithZone(ScriptCompiler::ExternalSourceStream* stream,
+                          ScriptCompiler::StreamedSource::Encoding encoding,
+                          Isolate* isolate)
+      : CompilationInfo(stream, encoding, isolate, &zone_), zone_(isolate) {}
  
    // Virtual destructor because a CompilationInfoWithZone has to exit the
    // zone scope and get rid of dependent maps even when the destructor is
@@ -667,6 +684,9 @@ class Compiler : public AllStatic {
        ScriptCompiler::CompileOptions compile_options,
        NativesFlag is_natives_code);
  
+  static Handle<SharedFunctionInfo> CompileStreamedScript(CompilationInfo* info,
+                                                          int source_length);
+
    // Create a shared function info object (the code may be lazily compiled).
    static Handle<SharedFunctionInfo> BuildFunctionInfo(FunctionLiteral* node,
                                                        Handle<Script> script,
@@ -690,6 +710,9 @@ class Compiler : public AllStatic {
    static void RecordFunctionCompilation(Logger::LogEventsAndTags tag,
                                          CompilationInfo* info,
                                          Handle<SharedFunctionInfo> shared);
+
+  static bool DebuggerWantsEagerCompilation(
+      CompilationInfo* info, bool allow_lazy_without_ctx = false);
  };
  
  
diff --git a/src/parser.cc b/src/parser.cc

index adb797d8d0c27bbb6b0f473efa83e5fa198861cc..0664e2a6a9211062bfbaccc325e2e77e562bf64e 100644 (file)
--- a/src/parser.cc
+++ b/src/parser.cc
@@ -750,7 +750,7 @@ Parser::Parser(CompilationInfo* info, ParseInfo* parse_info)
        pending_error_char_arg_(NULL),
        total_preparse_skipped_(0),
        pre_parse_timer_(NULL) {
-  DCHECK(!script().is_null());
+  DCHECK(!script().is_null() || info->source_stream() != NULL);
    set_allow_harmony_scoping(!info->is_native() && FLAG_harmony_scoping);
    set_allow_modules(!info->is_native() && FLAG_harmony_modules);
    set_allow_natives_syntax(FLAG_allow_natives_syntax || info->is_native());
@@ -798,6 +798,9 @@ FunctionLiteral* Parser::ParseProgram() {
  
    source = String::Flatten(source);
    FunctionLiteral* result;
+
+  Scope* top_scope = NULL;
+  Scope* eval_scope = NULL;
    if (source->IsExternalTwoByteString()) {
      // Notice that the stream is destroyed at the end of the branch block.
      // The last line of the blocks can't be moved outside, even though they're
@@ -805,11 +808,15 @@ FunctionLiteral* Parser::ParseProgram() {
      ExternalTwoByteStringUtf16CharacterStream stream(
          Handle<ExternalTwoByteString>::cast(source), 0, source->length());
      scanner_.Initialize(&stream);
-    result = DoParseProgram(info(), source);
+    result = DoParseProgram(info(), &top_scope, &eval_scope);
    } else {
      GenericStringUtf16CharacterStream stream(source, 0, source->length());
      scanner_.Initialize(&stream);
-    result = DoParseProgram(info(), source);
+    result = DoParseProgram(info(), &top_scope, &eval_scope);
+  }
+  top_scope->set_end_position(source->length());
+  if (eval_scope != NULL) {
+    eval_scope->set_end_position(source->length());
    }
    HandleSourceURLComments();
  
@@ -834,51 +841,52 @@ FunctionLiteral* Parser::ParseProgram() {
  }
  
  
-FunctionLiteral* Parser::DoParseProgram(CompilationInfo* info,
-                                        Handle<String> source) {
+FunctionLiteral* Parser::DoParseProgram(CompilationInfo* info, Scope** scope,
+                                        Scope** eval_scope) {
    DCHECK(scope_ == NULL);
    DCHECK(target_stack_ == NULL);
  
    FunctionLiteral* result = NULL;
-  { Scope* scope = NewScope(scope_, GLOBAL_SCOPE);
-    info->SetGlobalScope(scope);
+  {
+    *scope = NewScope(scope_, GLOBAL_SCOPE);
+    info->SetGlobalScope(*scope);
      if (!info->context().is_null() && !info->context()->IsNativeContext()) {
-      scope = Scope::DeserializeScopeChain(*info->context(), scope, zone());
+      *scope = Scope::DeserializeScopeChain(*info->context(), *scope, zone());
        // The Scope is backed up by ScopeInfo (which is in the V8 heap); this
        // means the Parser cannot operate independent of the V8 heap. Tell the
        // string table to internalize strings and values right after they're
        // created.
        ast_value_factory()->Internalize(isolate());
      }
-    original_scope_ = scope;
+    original_scope_ = *scope;
      if (info->is_eval()) {
-      if (!scope->is_global_scope() || info->strict_mode() == STRICT) {
-        scope = NewScope(scope, EVAL_SCOPE);
+      if (!(*scope)->is_global_scope() || info->strict_mode() == STRICT) {
+        *scope = NewScope(*scope, EVAL_SCOPE);
        }
      } else if (info->is_global()) {
-      scope = NewScope(scope, GLOBAL_SCOPE);
+      *scope = NewScope(*scope, GLOBAL_SCOPE);
      }
-    scope->set_start_position(0);
-    scope->set_end_position(source->length());
+    (*scope)->set_start_position(0);
+    // End position will be set by the caller.
  
      // Compute the parsing mode.
      Mode mode = (FLAG_lazy && allow_lazy()) ? PARSE_LAZILY : PARSE_EAGERLY;
-    if (allow_natives_syntax() ||
-        extension_ != NULL ||
-        scope->is_eval_scope()) {
+    if (allow_natives_syntax() || extension_ != NULL ||
+        (*scope)->is_eval_scope()) {
        mode = PARSE_EAGERLY;
      }
      ParsingModeScope parsing_mode(this, mode);
  
      // Enters 'scope'.
-    FunctionState function_state(&function_state_, &scope_, scope, zone(),
+    FunctionState function_state(&function_state_, &scope_, *scope, zone(),
                                   ast_value_factory(), info->ast_node_id_gen());
  
      scope_->SetStrictMode(info->strict_mode());
      ZoneList<Statement*>* body = new(zone()) ZoneList<Statement*>(16, zone());
      bool ok = true;
      int beg_pos = scanner()->location().beg_pos;
-    ParseSourceElements(body, Token::EOS, info->is_eval(), true, &ok);
+    ParseSourceElements(body, Token::EOS, info->is_eval(), true, eval_scope,
+                        &ok);
  
      if (ok && strict_mode() == STRICT) {
        CheckOctalLiteral(beg_pos, scanner()->location().end_pos, &ok);
@@ -1023,10 +1031,8 @@ FunctionLiteral* Parser::ParseLazy(Utf16CharacterStream* source) {
  
  
  void* Parser::ParseSourceElements(ZoneList<Statement*>* processor,
-                                  int end_token,
-                                  bool is_eval,
-                                  bool is_global,
-                                  bool* ok) {
+                                  int end_token, bool is_eval, bool is_global,
+                                  Scope** eval_scope, bool* ok) {
    // SourceElements ::
    //   (ModuleElement)* <end_token>
  
@@ -1082,6 +1088,10 @@ void* Parser::ParseSourceElements(ZoneList<Statement*>* processor,
              scope->set_start_position(scope_->start_position());
              scope->set_end_position(scope_->end_position());
              scope_ = scope;
+            if (eval_scope != NULL) {
+              // Caller will correct the positions of the ad hoc eval scope.
+              *eval_scope = scope;
+            }
              mode_ = PARSE_EAGERLY;
            }
            scope_->SetStrictMode(STRICT);
@@ -3722,7 +3732,7 @@ ZoneList<Statement*>* Parser::ParseEagerFunctionBody(
          yield, RelocInfo::kNoPosition), zone());
    }
  
-  ParseSourceElements(body, Token::RBRACE, false, false, CHECK_OK);
+  ParseSourceElements(body, Token::RBRACE, false, false, NULL, CHECK_OK);
  
    if (is_generator) {
      VariableProxy* get_proxy = factory()->NewVariableProxy(
@@ -4839,4 +4849,46 @@ bool Parser::Parse() {
    return (result != NULL);
  }
  
+
+void Parser::ParseOnBackground() {
+  DCHECK(info()->function() == NULL);
+  FunctionLiteral* result = NULL;
+  fni_ = new (zone()) FuncNameInferrer(ast_value_factory(), zone());
+
+  CompleteParserRecorder recorder;
+  if (compile_options() == ScriptCompiler::kProduceParserCache) {
+    log_ = &recorder;
+  }
+
+  DCHECK(info()->source_stream() != NULL);
+  ExternalStreamingStream stream(info()->source_stream(),
+                                 info()->source_stream_encoding());
+  scanner_.Initialize(&stream);
+  DCHECK(info()->context().is_null() || info()->context()->IsNativeContext());
+
+  // When streaming, we don't know the length of the source until we have parsed
+  // it. The raw data can be UTF-8, so we wouldn't know the source length until
+  // we have decoded it anyway even if we knew the raw data length (which we
+  // don't). We work around this by storing all the scopes which need their end
+  // position set at the end of the script (the top scope and possible eval
+  // scopes) and set their end position after we know the script length.
+  Scope* top_scope = NULL;
+  Scope* eval_scope = NULL;
+  result = DoParseProgram(info(), &top_scope, &eval_scope);
+
+  top_scope->set_end_position(scanner()->location().end_pos);
+  if (eval_scope != NULL) {
+    eval_scope->set_end_position(scanner()->location().end_pos);
+  }
+
+  info()->SetFunction(result);
+
+  // We cannot internalize on a background thread; a foreground task will take
+  // care of calling Parser::Internalize just before compilation.
+
+  if (compile_options() == ScriptCompiler::kProduceParserCache) {
+    if (result != NULL) *info_->cached_data() = recorder.GetScriptData();
+    log_ = NULL;
+  }
+}
  } }  // namespace v8::internal
diff --git a/src/parser.h b/src/parser.h

index bba36480bc95d80478cfa6b72e4dd433722f4e18..b5bf2391284cb475944190a4a97defc6d2ad1258 100644 (file)
--- a/src/parser.h
+++ b/src/parser.h
@@ -623,6 +623,11 @@ class Parser : public ParserBase<ParserTraits> {
      return parser.Parse();
    }
    bool Parse();
+  void ParseOnBackground();
+
+  // Handle errors detected during parsing, move statistics to Isolate,
+  // internalize strings (move them to the heap).
+  void Internalize();
  
   private:
    friend class ParserTraits;
@@ -663,8 +668,8 @@ class Parser : public ParserBase<ParserTraits> {
    }
  
    // Called by ParseProgram after setting up the scanner.
-  FunctionLiteral* DoParseProgram(CompilationInfo* info,
-                                  Handle<String> source);
+  FunctionLiteral* DoParseProgram(CompilationInfo* info, Scope** scope,
+                                  Scope** ad_hoc_eval_scope);
  
    void SetCachedData();
  
@@ -682,7 +687,8 @@ class Parser : public ParserBase<ParserTraits> {
    // By making the 'exception handling' explicit, we are forced to check
    // for failure at the call sites.
    void* ParseSourceElements(ZoneList<Statement*>* processor, int end_token,
-                            bool is_eval, bool is_global, bool* ok);
+                            bool is_eval, bool is_global,
+                            Scope** ad_hoc_eval_scope, bool* ok);
    Statement* ParseModuleElement(ZoneList<const AstRawString*>* labels,
                                  bool* ok);
    Statement* ParseModuleDeclaration(ZoneList<const AstRawString*>* names,
@@ -805,10 +811,6 @@ class Parser : public ParserBase<ParserTraits> {
  
    void ThrowPendingError();
  
-  // Handle errors detected during parsing, move statistics to Isolate,
-  // internalize strings (move them to the heap).
-  void Internalize();
-
    Scanner scanner_;
    PreParser* reusable_preparser_;
    Scope* original_scope_;  // for ES5 function declarations in sloppy eval
diff --git a/src/scanner-character-streams.cc b/src/scanner-character-streams.cc

index 9ec0ad1008316dfca01264c619d9b5c88655a231..74adda0ade7d992a353f3858c74f296e72ff2cb8 100644 (file)
--- a/src/scanner-character-streams.cc
+++ b/src/scanner-character-streams.cc
@@ -6,12 +6,40 @@
  
  #include "src/scanner-character-streams.h"
  
+#include "include/v8.h"
  #include "src/handles.h"
  #include "src/unicode-inl.h"
  
  namespace v8 {
  namespace internal {
  
+namespace {
+
+unsigned CopyCharsHelper(uint16_t* dest, unsigned length, const uint8_t* src,
+                         unsigned* src_pos, unsigned src_length,
+                         ScriptCompiler::StreamedSource::Encoding encoding) {
+  if (encoding == ScriptCompiler::StreamedSource::UTF8) {
+    return v8::internal::Utf8ToUtf16CharacterStream::CopyChars(
+        dest, length, src, src_pos, src_length);
+  }
+
+  unsigned to_fill = length;
+  if (to_fill > src_length - *src_pos) to_fill = src_length - *src_pos;
+
+  if (encoding == ScriptCompiler::StreamedSource::ONE_BYTE) {
+    v8::internal::CopyChars<uint8_t, uint16_t>(dest, src + *src_pos, to_fill);
+  } else {
+    DCHECK(encoding == ScriptCompiler::StreamedSource::TWO_BYTE);
+    v8::internal::CopyChars<uint16_t, uint16_t>(
+        dest, reinterpret_cast<const uint16_t*>(src + *src_pos), to_fill);
+  }
+  *src_pos += to_fill;
+  return to_fill;
+}
+
+}  // namespace
+
+
  // ----------------------------------------------------------------------------
  // BufferedUtf16CharacterStreams
  
@@ -145,6 +173,35 @@ Utf8ToUtf16CharacterStream::Utf8ToUtf16CharacterStream(const byte* data,
  Utf8ToUtf16CharacterStream::~Utf8ToUtf16CharacterStream() { }
  
  
+unsigned Utf8ToUtf16CharacterStream::CopyChars(uint16_t* dest, unsigned length,
+                                               const byte* src,
+                                               unsigned* src_pos,
+                                               unsigned src_length) {
+  static const unibrow::uchar kMaxUtf16Character = 0xffff;
+  unsigned i = 0;
+  // Because of the UTF-16 lead and trail surrogates, we stop filling the buffer
+  // one character early (in the normal case), because we need to have at least
+  // two free spaces in the buffer to be sure that the next character will fit.
+  while (i < length - 1) {
+    if (*src_pos == src_length) break;
+    unibrow::uchar c = src[*src_pos];
+    if (c <= unibrow::Utf8::kMaxOneByteChar) {
+      *src_pos = *src_pos + 1;
+    } else {
+      c = unibrow::Utf8::CalculateValue(src + *src_pos, src_length - *src_pos,
+                                        src_pos);
+    }
+    if (c > kMaxUtf16Character) {
+      dest[i++] = unibrow::Utf16::LeadSurrogate(c);
+      dest[i++] = unibrow::Utf16::TrailSurrogate(c);
+    } else {
+      dest[i++] = static_cast<uc16>(c);
+    }
+  }
+  return i;
+}
+
+
  unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
    unsigned old_pos = pos_;
    unsigned target_pos = pos_ + delta;
@@ -156,31 +213,14 @@ unsigned Utf8ToUtf16CharacterStream::BufferSeekForward(unsigned delta) {
  
  
  unsigned Utf8ToUtf16CharacterStream::FillBuffer(unsigned char_position) {
-  static const unibrow::uchar kMaxUtf16Character = 0xffff;
    SetRawPosition(char_position);
    if (raw_character_position_ != char_position) {
      // char_position was not a valid position in the stream (hit the end
      // while spooling to it).
      return 0u;
    }
-  unsigned i = 0;
-  while (i < kBufferSize - 1) {
-    if (raw_data_pos_ == raw_data_length_) break;
-    unibrow::uchar c = raw_data_[raw_data_pos_];
-    if (c <= unibrow::Utf8::kMaxOneByteChar) {
-      raw_data_pos_++;
-    } else {
-      c =  unibrow::Utf8::CalculateValue(raw_data_ + raw_data_pos_,
-                                         raw_data_length_ - raw_data_pos_,
-                                         &raw_data_pos_);
-    }
-    if (c > kMaxUtf16Character) {
-      buffer_[i++] = unibrow::Utf16::LeadSurrogate(c);
-      buffer_[i++] = unibrow::Utf16::TrailSurrogate(c);
-    } else {
-      buffer_[i++] = static_cast<uc16>(c);
-    }
-  }
+  unsigned i = CopyChars(buffer_, kBufferSize, raw_data_, &raw_data_pos_,
+                         raw_data_length_);
    raw_character_position_ = char_position + i;
    return i;
  }
@@ -276,6 +316,110 @@ void Utf8ToUtf16CharacterStream::SetRawPosition(unsigned target_position) {
  }
  
  
+unsigned ExternalStreamingStream::FillBuffer(unsigned position) {
+  // Ignore "position" which is the position in the decoded data. Instead,
+  // ExternalStreamingStream keeps track of the position in the raw data.
+  unsigned data_in_buffer = 0;
+  // Note that the UTF-8 decoder might not be able to fill the buffer
+  // completely; it will typically leave the last character empty (see
+  // Utf8ToUtf16CharacterStream::CopyChars).
+  while (data_in_buffer < kBufferSize - 1) {
+    if (current_data_ == NULL) {
+      // GetSomeData will wait until the embedder has enough data.
+      current_data_length_ = source_stream_->GetMoreData(&current_data_);
+      current_data_offset_ = 0;
+      bool data_ends = current_data_length_ == 0;
+
+      // A caveat: a data chunk might end with bytes from an incomplete UTF-8
+      // character (the rest of the bytes will be in the next chunk).
+      if (encoding_ == ScriptCompiler::StreamedSource::UTF8) {
+        HandleUtf8SplitCharacters(&data_in_buffer);
+        if (!data_ends && current_data_offset_ == current_data_length_) {
+          // The data stream didn't end, but we used all the data in the
+          // chunk. This will only happen when the chunk was really small. We
+          // don't handle the case where a UTF-8 character is split over several
+          // chunks; in that case V8 won't crash, but it will be a parse error.
+          delete[] current_data_;
+          current_data_ = NULL;
+          current_data_length_ = 0;
+          current_data_offset_ = 0;
+          continue;  // Request a new chunk.
+        }
+      }
+
+      // Did the data stream end?
+      if (data_ends) {
+        DCHECK(utf8_split_char_buffer_length_ == 0);
+        return data_in_buffer;
+      }
+    }
+
+    // Fill the buffer from current_data_.
+    unsigned new_offset = 0;
+    unsigned new_chars_in_buffer =
+        CopyCharsHelper(buffer_ + data_in_buffer, kBufferSize - data_in_buffer,
+                        current_data_ + current_data_offset_, &new_offset,
+                        current_data_length_ - current_data_offset_, encoding_);
+    data_in_buffer += new_chars_in_buffer;
+    current_data_offset_ += new_offset;
+    DCHECK(data_in_buffer <= kBufferSize);
+
+    // Did we use all the data in the data chunk?
+    if (current_data_offset_ == current_data_length_) {
+      delete[] current_data_;
+      current_data_ = NULL;
+      current_data_length_ = 0;
+      current_data_offset_ = 0;
+    }
+  }
+  return data_in_buffer;
+}
+
+void ExternalStreamingStream::HandleUtf8SplitCharacters(
+    unsigned* data_in_buffer) {
+  // First check if we have leftover data from the last chunk.
+  unibrow::uchar c;
+  if (utf8_split_char_buffer_length_ > 0) {
+    // Move the bytes which are part of the split character (which started in
+    // the previous chunk) into utf8_split_char_buffer_.
+    while (current_data_offset_ < current_data_length_ &&
+           utf8_split_char_buffer_length_ < 4 &&
+           (c = current_data_[current_data_offset_]) >
+               unibrow::Utf8::kMaxOneByteChar) {
+      utf8_split_char_buffer_[utf8_split_char_buffer_length_] = c;
+      ++utf8_split_char_buffer_length_;
+      ++current_data_offset_;
+    }
+
+    // Convert the data in utf8_split_char_buffer_.
+    unsigned new_offset = 0;
+    unsigned new_chars_in_buffer =
+        CopyCharsHelper(buffer_ + *data_in_buffer,
+                        kBufferSize - *data_in_buffer, utf8_split_char_buffer_,
+                        &new_offset, utf8_split_char_buffer_length_, encoding_);
+    *data_in_buffer += new_chars_in_buffer;
+    // Make sure we used all the data.
+    DCHECK(new_offset == utf8_split_char_buffer_length_);
+    DCHECK(*data_in_buffer <= kBufferSize);
+
+    utf8_split_char_buffer_length_ = 0;
+  }
+
+  // Move bytes which are part of an incomplete character from the end of the
+  // current chunk to utf8_split_char_buffer_. They will be converted when the
+  // next data chunk arrives.
+  while (current_data_length_ > current_data_offset_ &&
+         (c = current_data_[current_data_length_ - 1]) >
+             unibrow::Utf8::kMaxOneByteChar) {
+    --current_data_length_;
+    ++utf8_split_char_buffer_length_;
+  }
+  for (unsigned i = 0; i < utf8_split_char_buffer_length_; ++i) {
+    utf8_split_char_buffer_[i] = current_data_[current_data_length_ + i];
+  }
+}
+
+
  // ----------------------------------------------------------------------------
  // ExternalTwoByteStringUtf16CharacterStream
  
diff --git a/src/scanner-character-streams.h b/src/scanner-character-streams.h

index eeb40e260f62727610c52c9591cb62ca231885c9..afca13f1807b0ce167340773ab8f70c852d49aa7 100644 (file)
--- a/src/scanner-character-streams.h
+++ b/src/scanner-character-streams.h
@@ -59,6 +59,9 @@ class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
    Utf8ToUtf16CharacterStream(const byte* data, unsigned length);
    virtual ~Utf8ToUtf16CharacterStream();
  
+  static unsigned CopyChars(uint16_t* dest, unsigned length, const byte* src,
+                            unsigned* src_pos, unsigned src_length);
+
   protected:
    virtual unsigned BufferSeekForward(unsigned delta);
    virtual unsigned FillBuffer(unsigned char_position);
@@ -73,6 +76,46 @@ class Utf8ToUtf16CharacterStream: public BufferedUtf16CharacterStream {
  };
  
  
+// ExternalStreamingStream is a wrapper around an ExternalSourceStream (see
+// include/v8.h) subclass implemented by the embedder.
+class ExternalStreamingStream : public BufferedUtf16CharacterStream {
+ public:
+  ExternalStreamingStream(ScriptCompiler::ExternalSourceStream* source_stream,
+                          v8::ScriptCompiler::StreamedSource::Encoding encoding)
+      : source_stream_(source_stream),
+        encoding_(encoding),
+        current_data_(NULL),
+        current_data_offset_(0),
+        current_data_length_(0),
+        utf8_split_char_buffer_length_(0) {}
+
+  virtual ~ExternalStreamingStream() { delete[] current_data_; }
+
+  virtual unsigned BufferSeekForward(unsigned delta) OVERRIDE {
+    // We never need to seek forward when streaming scripts. We only seek
+    // forward when we want to parse a function whose location we already know,
+    // and when streaming, we don't know the locations of anything we haven't
+    // seen yet.
+    UNREACHABLE();
+    return 0;
+  }
+
+  virtual unsigned FillBuffer(unsigned position);
+
+ private:
+  void HandleUtf8SplitCharacters(unsigned* data_in_buffer);
+
+  ScriptCompiler::ExternalSourceStream* source_stream_;
+  v8::ScriptCompiler::StreamedSource::Encoding encoding_;
+  const uint8_t* current_data_;
+  unsigned current_data_offset_;
+  unsigned current_data_length_;
+  // For converting UTF-8 characters which are split across two data chunks.
+  uint8_t utf8_split_char_buffer_[4];
+  unsigned utf8_split_char_buffer_length_;
+};
+
+
  // UTF16 buffer to read characters from an external string.
  class ExternalTwoByteStringUtf16CharacterStream: public Utf16CharacterStream {
   public:
diff --git a/test/cctest/test-api.cc b/test/cctest/test-api.cc

index 75c5b8e2a330f8306bd220e561d70c5b2ed4d534..e32089398b97dae02079d53ec7eb0fd52711044f 100644 (file)
--- a/test/cctest/test-api.cc
+++ b/test/cctest/test-api.cc
@@ -23035,3 +23035,307 @@ TEST(GetHiddenPropertyTableAfterAccessCheck) {
  
    obj->SetHiddenValue(v8_str("hidden key 2"), v8_str("hidden value 2"));
  }
+
+
+class TestSourceStream : public v8::ScriptCompiler::ExternalSourceStream {
+ public:
+  explicit TestSourceStream(const char** chunks) : chunks_(chunks), index_(0) {}
+
+  virtual size_t GetMoreData(const uint8_t** src) {
+    // Unlike in real use cases, this function will never block.
+    if (chunks_[index_] == NULL) {
+      return 0;
+    }
+    // Copy the data, since the caller takes ownership of it.
+    size_t len = strlen(chunks_[index_]);
+    // We don't need to zero-terminate since we return the length.
+    uint8_t* copy = new uint8_t[len];
+    memcpy(copy, chunks_[index_], len);
+    *src = copy;
+    ++index_;
+    return len;
+  }
+
+  // Helper for constructing a string from chunks (the compilation needs it
+  // too).
+  static char* FullSourceString(const char** chunks) {
+    size_t total_len = 0;
+    for (size_t i = 0; chunks[i] != NULL; ++i) {
+      total_len += strlen(chunks[i]);
+    }
+    char* full_string = new char[total_len + 1];
+    size_t offset = 0;
+    for (size_t i = 0; chunks[i] != NULL; ++i) {
+      size_t len = strlen(chunks[i]);
+      memcpy(full_string + offset, chunks[i], len);
+      offset += len;
+    }
+    full_string[total_len] = 0;
+    return full_string;
+  }
+
+ private:
+  const char** chunks_;
+  unsigned index_;
+};
+
+
+// Helper function for running streaming tests.
+void RunStreamingTest(const char** chunks,
+                      v8::ScriptCompiler::StreamedSource::Encoding encoding =
+                          v8::ScriptCompiler::StreamedSource::ONE_BYTE,
+                      bool expected_success = true) {
+  LocalContext env;
+  v8::Isolate* isolate = env->GetIsolate();
+  v8::HandleScope scope(isolate);
+  v8::TryCatch try_catch;
+
+  v8::ScriptCompiler::StreamedSource source(new TestSourceStream(chunks),
+                                            encoding);
+  v8::ScriptCompiler::ScriptStreamingTask* task =
+      v8::ScriptCompiler::StartStreamingScript(isolate, &source);
+
+  // TestSourceStream::GetMoreData won't block, so it's OK to just run the
+  // task here in the main thread.
+  task->Run();
+  delete task;
+
+  v8::ScriptOrigin origin(v8_str("http://foo.com"));
+  char* full_source = TestSourceStream::FullSourceString(chunks);
+
+  // The possible errors are only produced while compiling.
+  CHECK_EQ(false, try_catch.HasCaught());
+
+  v8::Handle<Script> script = v8::ScriptCompiler::Compile(
+      isolate, &source, v8_str(full_source), origin);
+  if (expected_success) {
+    CHECK(!script.IsEmpty());
+    v8::Handle<Value> result(script->Run());
+    // All scripts are supposed to return the fixed value 13 when ran.
+    CHECK_EQ(13, result->Int32Value());
+  } else {
+    CHECK(script.IsEmpty());
+    CHECK(try_catch.HasCaught());
+  }
+  delete[] full_source;
+}
+
+
+TEST(StreamingSimpleScript) {
+  // This script is unrealistically small, since no one chunk is enough to fill
+  // the backing buffer of Scanner, let alone overflow it.
+  const char* chunks[] = {"function foo() { ret", "urn 13; } f", "oo(); ",
+                          NULL};
+  RunStreamingTest(chunks);
+}
+
+
+TEST(StreamingBiggerScript) {
+  const char* chunk1 =
+      "function foo() {\n"
+      "  // Make this chunk sufficiently long so that it will overflow the\n"
+      "  // backing buffer of the Scanner.\n"
+      "  var i = 0;\n"
+      "  var result = 0;\n"
+      "  for (i = 0; i < 13; ++i) { result = result + 1; }\n"
+      "  result = 0;\n"
+      "  for (i = 0; i < 13; ++i) { result = result + 1; }\n"
+      "  result = 0;\n"
+      "  for (i = 0; i < 13; ++i) { result = result + 1; }\n"
+      "  result = 0;\n"
+      "  for (i = 0; i < 13; ++i) { result = result + 1; }\n"
+      "  return result;\n"
+      "}\n";
+  const char* chunks[] = {chunk1, "foo(); ", NULL};
+  RunStreamingTest(chunks);
+}
+
+
+TEST(StreamingScriptWithParseError) {
+  // Test that parse errors from streamed scripts are propagated correctly.
+  {
+    char chunk1[] =
+        "  // This will result in a parse error.\n"
+        "  var if else then foo";
+    char chunk2[] = "  13\n";
+    const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
+
+    RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::ONE_BYTE,
+                     false);
+  }
+  // Test that the next script succeeds normally.
+  {
+    char chunk1[] =
+        "  // This will be parsed successfully.\n"
+        "  function foo() { return ";
+    char chunk2[] = "  13; }\n";
+    const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
+
+    RunStreamingTest(chunks);
+  }
+}
+
+
+TEST(StreamingUtf8Script) {
+  const char* chunk1 =
+      "function foo() {\n"
+      "  // This function will contain an UTF-8 character which is not in\n"
+      "  // ASCII.\n"
+      "  var foob\uc481r = 13;\n"
+      "  return foob\uc481r;\n"
+      "}\n";
+  const char* chunks[] = {chunk1, "foo(); ", NULL};
+  RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
+}
+
+
+TEST(StreamingUtf8ScriptWithSplitCharactersSanityCheck) {
+  // A sanity check to prove that the approach of splitting UTF-8
+  // characters is correct. Here is an UTF-8 character which will take three
+  // bytes.
+  const char* reference = "\uc481";
+  CHECK_EQ(3, strlen(reference));
+  char chunk1[] =
+      "function foo() {\n"
+      "  // This function will contain an UTF-8 character which is not in\n"
+      "  // ASCII.\n"
+      "  var foob";
+  char chunk2[] =
+      "XXXr = 13;\n"
+      "  return foob\uc481r;\n"
+      "}\n";
+  for (int i = 0; i < 3; ++i) {
+    chunk2[i] = reference[i];
+  }
+  const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
+  RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
+}
+
+
+TEST(StreamingUtf8ScriptWithSplitCharacters) {
+  // Stream data where a multi-byte UTF-8 character is split between two data
+  // chunks.
+  const char* reference = "\uc481";
+  char chunk1[] =
+      "function foo() {\n"
+      "  // This function will contain an UTF-8 character which is not in\n"
+      "  // ASCII.\n"
+      "  var foobX";
+  char chunk2[] =
+      "XXr = 13;\n"
+      "  return foob\uc481r;\n"
+      "}\n";
+  chunk1[strlen(chunk1) - 1] = reference[0];
+  chunk2[0] = reference[1];
+  chunk2[1] = reference[2];
+  const char* chunks[] = {chunk1, chunk2, "foo();", NULL};
+  RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
+}
+
+
+TEST(StreamingUtf8ScriptWithSplitCharactersValidEdgeCases) {
+  // Tests edge cases which should still be decoded correctly.
+
+  // Case 1: a chunk contains only bytes for a split character (and no other
+  // data). This kind of a chunk would be exceptionally small, but we should
+  // still decode it correctly.
+  const char* reference = "\uc481";
+  fprintf(stderr, "%d %d %d\n", reference[0], reference[1], reference[2]);
+  // The small chunk is at the beginning of the split character
+  {
+    char chunk1[] =
+        "function foo() {\n"
+        "  // This function will contain an UTF-8 character which is not in\n"
+        "  // ASCII.\n"
+        "  var foob";
+    char chunk2[] = "XX";
+    char chunk3[] =
+        "Xr = 13;\n"
+        "  return foob\uc481r;\n"
+        "}\n";
+    chunk2[0] = reference[0];
+    chunk2[1] = reference[1];
+    chunk3[0] = reference[2];
+    const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};
+    RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
+  }
+  // The small chunk is at the end of a character
+  {
+    char chunk1[] =
+        "function foo() {\n"
+        "  // This function will contain an UTF-8 character which is not in\n"
+        "  // ASCII.\n"
+        "  var foobX";
+    char chunk2[] = "XX";
+    char chunk3[] =
+        "r = 13;\n"
+        "  return foob\uc481r;\n"
+        "}\n";
+    chunk1[strlen(chunk1) - 1] = reference[0];
+    chunk2[0] = reference[1];
+    chunk2[1] = reference[2];
+    const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};
+    RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
+  }
+  // Case 2: the script ends with a multi-byte character. Make sure that it's
+  // decoded correctly and not just ignored.
+  {
+    char chunk1[] =
+        "var foob\uc481 = 13;\n"
+        "foob\uc481";
+    const char* chunks[] = {chunk1, NULL};
+    RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8);
+  }
+}
+
+
+TEST(StreamingUtf8ScriptWithSplitCharactersInvalidEdgeCases) {
+  // Test cases where a UTF-8 character is split over several chunks. Those
+  // cases are not supported (the embedder should give the data in big enough
+  // chunks), but we shouldn't crash, just produce a parse error.
+  const char* reference = "\uc481";
+  char chunk1[] =
+      "function foo() {\n"
+      "  // This function will contain an UTF-8 character which is not in\n"
+      "  // ASCII.\n"
+      "  var foobX";
+  char chunk2[] = "X";
+  char chunk3[] =
+      "Xr = 13;\n"
+      "  return foob\uc481r;\n"
+      "}\n";
+  chunk1[strlen(chunk1) - 1] = reference[0];
+  chunk2[0] = reference[1];
+  chunk3[0] = reference[2];
+  const char* chunks[] = {chunk1, chunk2, chunk3, "foo();", NULL};
+
+  RunStreamingTest(chunks, v8::ScriptCompiler::StreamedSource::UTF8, false);
+}
+
+
+TEST(StreamingProducesParserCache) {
+  i::FLAG_min_preparse_length = 0;
+  const char* chunks[] = {"function foo() { ret", "urn 13; } f", "oo(); ",
+                          NULL};
+
+  LocalContext env;
+  v8::Isolate* isolate = env->GetIsolate();
+  v8::HandleScope scope(isolate);
+
+  v8::ScriptCompiler::StreamedSource source(
+      new TestSourceStream(chunks),
+      v8::ScriptCompiler::StreamedSource::ONE_BYTE);
+  v8::ScriptCompiler::ScriptStreamingTask* task =
+      v8::ScriptCompiler::StartStreamingScript(
+          isolate, &source, v8::ScriptCompiler::kProduceParserCache);
+
+  // TestSourceStream::GetMoreData won't block, so it's OK to just run the
+  // task here in the main thread.
+  task->Run();
+  delete task;
+
+  const v8::ScriptCompiler::CachedData* cached_data = source.GetCachedData();
+  CHECK(cached_data != NULL);
+  CHECK(cached_data->data != NULL);
+  CHECK_GT(cached_data->length, 0);
+}
diff --git a/tools/gyp/v8.gyp b/tools/gyp/v8.gyp

index 1641138991fefc55f01f77151d61fa8b7976d772..79a1adf39d929359b8c67d748e5660ca18b4e6b0 100644 (file)
--- a/tools/gyp/v8.gyp
+++ b/tools/gyp/v8.gyp
@@ -347,6 +347,8 @@
          '../../src/ast-value-factory.h',
          '../../src/ast.cc',
          '../../src/ast.h',
+        '../../src/background-parsing-task.cc',
+        '../../src/background-parsing-task.h',
          '../../src/bignum-dtoa.cc',
          '../../src/bignum-dtoa.h',
          '../../src/bignum.cc',
author	marja@chromium.org <marja@chromium.org>
	Thu, 11 Sep 2014 11:06:26 +0000 (11:06 +0000)
committer	marja@chromium.org <marja@chromium.org>
	Thu, 11 Sep 2014 11:06:26 +0000 (11:06 +0000)
BUILD.gn		patch \| blob \| history
include/v8.h		patch \| blob \| history
src/api.cc		patch \| blob \| history
src/background-parsing-task.cc	[new file with mode: 0644]	patch \| blob
src/background-parsing-task.h	[new file with mode: 0644]	patch \| blob
src/compiler.cc		patch \| blob \| history
src/compiler.h		patch \| blob \| history
src/parser.cc		patch \| blob \| history
src/parser.h		patch \| blob \| history
src/scanner-character-streams.cc		patch \| blob \| history
src/scanner-character-streams.h		patch \| blob \| history
test/cctest/test-api.cc		patch \| blob \| history
tools/gyp/v8.gyp		patch \| blob \| history