Add experimental support for external two-byte symbols.
authorager@chromium.org <ager@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 17 Nov 2008 15:13:40 +0000 (15:13 +0000)
committerager@chromium.org <ager@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 17 Nov 2008 15:13:40 +0000 (15:13 +0000)
This might be a big win in a browser setting where a lot of string
conversions can be avoided.  On the other hand it adds extra pressure
on the global handle system.
Review URL: http://codereview.chromium.org/11404

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@772 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

include/v8.h
src/api.cc
src/heap-inl.h
src/heap.cc
src/heap.h
src/objects.cc
test/cctest/test-api.cc

index c99dc5a..c7b641e 100644 (file)
@@ -1811,7 +1811,7 @@ class EXPORT Exception {
 };
 
 
-// --- C o u n t e r s  C a l l b a c k s
+// --- C o u n t e r s  C a l l b a c k s ---
 
 typedef int* (*CounterLookupCallback)(const wchar_t* name);
 
@@ -1832,7 +1832,24 @@ typedef void (*FailedAccessCheckCallback)(Local<Object> target,
 typedef void (*GCCallback)();
 
 
-//  --- C o n t e x t  G e n e r a t o r
+// --- E x t e r n a l  S y m b o l  C a l l b a c k ---
+
+/**
+ * Callback used to allocate certain V8 symbols as external strings.
+ *
+ * The data passed to the callback is utf8 encoded.
+ *
+ * Allocations are not allowed in the callback function, you therefore
+ * cannot manipulate objects (set or delete properties for example)
+ * since it is possible such operations will result in the allocation
+ * of objects.
+ */
+typedef String::ExternalStringResource* (*ExternalSymbolCallback)(
+    const char* utf8,
+    size_t length);
+
+
+// --- C o n t e x t  G e n e r a t o r ---
 
 /**
  * Applications must provide a callback function which is called to generate
@@ -1932,6 +1949,20 @@ class EXPORT V8 {
   static void SetGlobalGCEpilogueCallback(GCCallback);
 
   /**
+   * Applications can register a callback that will be used when
+   * allocating most of the V8 symbols.  The callback must return an
+   * external string resource that represents the symbols.
+   *
+   * Most often when performing a property lookup the key will be a
+   * symbol.  Allocating symbols as external strings can reduce the
+   * amount of string conversions needed when using interceptors and
+   * accessors.
+   *
+   * \note This is an experimental feature and it might be removed.
+   */
+  static void SetExternalSymbolCallback(ExternalSymbolCallback);
+
+  /**
    * Allows the host application to group objects together. If one
    * object in the group is alive, all objects in the group are alive.
    * After each garbage collection, object groups are removed. It is
index 3c3ebc6..017753b 100644 (file)
@@ -2652,6 +2652,12 @@ void V8::SetGlobalGCEpilogueCallback(GCCallback callback) {
 }
 
 
+void V8::SetExternalSymbolCallback(ExternalSymbolCallback callback) {
+  if (IsDeadCheck("v8::V8::SetExternalSymbolCallback()")) return;
+  i::Heap::SetExternalSymbolCallback(callback);
+}
+
+
 String::Utf8Value::Utf8Value(v8::Handle<v8::Value> obj) {
   EnsureInitialized("v8::String::Utf8Value::Utf8Value()");
   if (obj.IsEmpty()) {
index 4fd9a4e..07d92c1 100644 (file)
@@ -38,6 +38,18 @@ int Heap::MaxHeapObjectSize() {
 }
 
 
+Object* Heap::AllocateSymbol(Vector<const char> str,
+                             int chars,
+                             uint32_t length_field) {
+  if (global_external_symbol_callback_) {
+    return AllocateExternalSymbol(str, chars);
+  }
+  unibrow::Utf8InputBuffer<> buffer(str.start(),
+                                    static_cast<unsigned>(str.length()));
+  return AllocateInternalSymbol(&buffer, chars, length_field);
+}
+
+
 Object* Heap::AllocateRaw(int size_in_bytes,
                           AllocationSpace space,
                           AllocationSpace retry_space) {
index e62234f..27b3945 100644 (file)
@@ -84,6 +84,8 @@ int Heap::initial_semispace_size_ = 256*KB;
 GCCallback Heap::global_gc_prologue_callback_ = NULL;
 GCCallback Heap::global_gc_epilogue_callback_ = NULL;
 
+ExternalSymbolCallback Heap::global_external_symbol_callback_ = NULL;
+
 // Variables set based on semispace_size_ and old_generation_size_ in
 // ConfigureHeap.
 int Heap::young_generation_size_ = 0;  // Will be 2 * semispace_size_.
@@ -1535,6 +1537,29 @@ Object* Heap::AllocateExternalStringFromTwoByte(
 }
 
 
+Object* Heap::AllocateExternalSymbolFromTwoByte(
+    ExternalTwoByteString::Resource* resource) {
+  Map* map;
+  int length = resource->length();
+  if (length <= String::kMaxShortStringSize) {
+    map = short_external_symbol_map();
+  } else if (length <= String::kMaxMediumStringSize) {
+    map = medium_external_symbol_map();
+  } else {
+    map = long_external_symbol_map();
+  }
+
+  Object* result = Allocate(map, OLD_DATA_SPACE);
+  if (result->IsFailure()) return result;
+
+  ExternalTwoByteString* external_string = ExternalTwoByteString::cast(result);
+  external_string->set_length(length);
+  external_string->set_resource(resource);
+
+  return result;
+}
+
+
 Object* Heap::LookupSingleCharacterStringFromCode(uint16_t code) {
   if (code <= String::kMaxAsciiCharCode) {
     Object* value = Heap::single_character_string_cache()->get(code);
@@ -2028,9 +2053,9 @@ Map* Heap::SymbolMapForString(String* string) {
 }
 
 
-Object* Heap::AllocateSymbol(unibrow::CharacterStream* buffer,
-                             int chars,
-                             uint32_t length_field) {
+Object* Heap::AllocateInternalSymbol(unibrow::CharacterStream* buffer,
+                                     int chars,
+                                     uint32_t length_field) {
   // Ensure the chars matches the number of characters in the buffer.
   ASSERT(static_cast<unsigned>(chars) == buffer->Length());
   // Determine whether the string is ascii.
@@ -2086,6 +2111,42 @@ Object* Heap::AllocateSymbol(unibrow::CharacterStream* buffer,
 }
 
 
+// External string resource that only contains a length field.  These
+// are used temporarily when allocating external symbols.
+class DummyExternalStringResource
+    : public v8::String::ExternalStringResource {
+ public:
+  explicit DummyExternalStringResource(size_t length) : length_(length) { }
+
+  virtual const uint16_t* data() const {
+    UNREACHABLE();
+    return NULL;
+  }
+
+  virtual size_t length() const { return length_; }
+ private:
+  size_t length_;
+};
+
+
+Object* Heap::AllocateExternalSymbol(Vector<const char> string, int chars) {
+  // Attempt to allocate the resulting external string first.  Use a
+  // dummy string resource that has the correct length so that we only
+  // have to patch the external string resource after the callback.
+  DummyExternalStringResource dummy_resource(chars);
+  Object* obj = AllocateExternalSymbolFromTwoByte(&dummy_resource);
+  if (obj->IsFailure()) return obj;
+  // Perform callback.
+  v8::String::ExternalStringResource* resource =
+      global_external_symbol_callback_(string.start(), string.length());
+  // Patch the resource pointer of the result.
+  ExternalTwoByteString* result = ExternalTwoByteString::cast(obj);
+  result->set_resource(resource);
+  ASSERT(result->IsEqualTo(string));
+  return result;
+}
+
+
 Object* Heap::AllocateRawAsciiString(int length, PretenureFlag pretenure) {
   AllocationSpace space = (pretenure == TENURED) ? OLD_DATA_SPACE : NEW_SPACE;
   int size = SeqAsciiString::SizeFor(length);
index d89caf9..694ee56 100644 (file)
@@ -356,9 +356,17 @@ class Heap : public AllStatic {
   // Returns Failure::RetryAfterGC(requested_bytes, space) if the allocation
   // failed.
   // Please note this function does not perform a garbage collection.
-  static Object* AllocateSymbol(unibrow::CharacterStream* buffer,
-                                int chars,
-                                uint32_t length_field);
+  static inline Object* AllocateSymbol(Vector<const char> str,
+                                       int chars,
+                                       uint32_t length_field);
+
+  static Object* AllocateInternalSymbol(unibrow::CharacterStream* buffer,
+                                        int chars,
+                                        uint32_t length_field);
+
+  static Object* AllocateExternalSymbol(Vector<const char> str,
+                                        int chars);
+
 
   // Allocates and partially initializes a String.  There are two String
   // encodings: ASCII and two byte.  These functions allocate a string of the
@@ -527,6 +535,8 @@ class Heap : public AllStatic {
       ExternalAsciiString::Resource* resource);
   static Object* AllocateExternalStringFromTwoByte(
       ExternalTwoByteString::Resource* resource);
+  static Object* AllocateExternalSymbolFromTwoByte(
+      ExternalTwoByteString::Resource* resource);
 
   // Allocates an uninitialized object.  The memory is non-executable if the
   // hardware and OS allow.
@@ -595,6 +605,10 @@ class Heap : public AllStatic {
     global_gc_epilogue_callback_ = callback;
   }
 
+  static void SetExternalSymbolCallback(ExternalSymbolCallback callback) {
+    global_external_symbol_callback_ = callback;
+  }
+
   // Heap roots
 #define ROOT_ACCESSOR(type, name) static type* name() { return name##_; }
   ROOT_LIST(ROOT_ACCESSOR)
@@ -854,6 +868,9 @@ class Heap : public AllStatic {
   static GCCallback global_gc_prologue_callback_;
   static GCCallback global_gc_epilogue_callback_;
 
+  // Callback function used for allocating external symbols.
+  static ExternalSymbolCallback global_external_symbol_callback_;
+
   // Checks whether a global GC is necessary
   static GarbageCollector SelectGarbageCollector(AllocationSpace space);
 
index 511f9f7..b4fe552 100644 (file)
@@ -5806,9 +5806,7 @@ class Utf8SymbolKey : public HashTableKey {
 
   Object* GetObject() {
     if (length_field_ == 0) Hash();
-    unibrow::Utf8InputBuffer<> buffer(string_.start(),
-                                      static_cast<unsigned>(string_.length()));
-    return Heap::AllocateSymbol(&buffer, chars_, length_field_);
+    return Heap::AllocateSymbol(string_, chars_, length_field_);
   }
 
   static uint32_t StringHash(Object* obj) {
@@ -5857,9 +5855,9 @@ class SymbolKey : public HashTableKey {
     }
     // Otherwise allocate a new symbol.
     StringInputBuffer buffer(string_);
-    return Heap::AllocateSymbol(&buffer,
-                                string_->length(),
-                                string_->length_field());
+    return Heap::AllocateInternalSymbol(&buffer,
+                                        string_->length(),
+                                        string_->length_field());
   }
 
   static uint32_t StringHash(Object* obj) {
index f3b018d..38394fb 100644 (file)
@@ -5222,3 +5222,45 @@ THREADED_TEST(DisableAccessChecksWhileConfiguring) {
   Local<Value> value = CompileRun("obj.x");
   CHECK(value->BooleanValue());
 }
+
+
+static String::ExternalStringResource* SymbolCallback(const char* chars,
+                                                      size_t length) {
+  uint16_t* buffer = i::NewArray<uint16_t>(length + 1);
+  for (size_t i = 0; i < length; i++) {
+    buffer[i] = chars[i];
+  }
+  buffer[length] = '\0';
+  return new TestResource(buffer);
+}
+
+
+static v8::Handle<Value> ExternalSymbolGetter(Local<String> name,
+                                              const AccessorInfo& info) {
+  CHECK(name->IsExternal());
+  return v8::True();
+}
+
+
+static void ExternalSymbolSetter(Local<String> name,
+                                 Local<Value> value,
+                                 const AccessorInfo&) {
+  CHECK(name->IsExternal());
+}
+
+
+THREADED_TEST(ExternalSymbols) {
+  TestResource::dispose_count = 0;
+  v8::V8::SetExternalSymbolCallback(SymbolCallback);
+  v8::HandleScope scope;
+  LocalContext context;
+  Local<ObjectTemplate> templ = ObjectTemplate::New();
+  templ->SetAccessor(v8_str("x"), ExternalSymbolGetter, ExternalSymbolSetter);
+  context->Global()->Set(v8_str("obj"), templ->NewInstance());
+  Local<Value> value = CompileRun("var o = { x: 42 }; o.x");
+  CHECK_EQ(42, value->Int32Value());
+  value = CompileRun("obj.x");
+  CHECK_EQ(true, value->BooleanValue());
+  value = CompileRun("obj.x = 42");
+  v8::V8::SetExternalSymbolCallback(NULL);
+}