string_bytes: use external for large strings
authorTrevor Norris <trev.norris@gmail.com>
Sun, 9 Jun 2013 22:54:30 +0000 (15:54 -0700)
committerTrevor Norris <trev.norris@gmail.com>
Wed, 12 Jun 2013 21:43:24 +0000 (14:43 -0700)
When large strings are used they cause v8's GC to spend a lot more time
cleaning up. In these cases it's much faster to use external string
resources.

UTF8 strings do not use external string resources because only one and
two byte external strings are supported.

EXTERN_APEX is the value at which v8's GC overtakes performance.

The following table has the type and buffer size that use to encode the
strings as rough estimates of the percentage of performance gain from
this patch (UTF8 is missing because they cannot be externalized).

encoding  128KB    1MB    5MB
-----------------------------
ASCII       58%   208%   250%
HEX         15%    74%    86%
BASE64      11%    74%    71%
UCS2         2%   225%   398%
BINARY    2234%  1728%  2305%

BINARY is so much faster across the board because of using the new v8
WriteOneByte API.

src/string_bytes.cc

index 3ae3b5b..e162fd7 100644 (file)
 #include "node_buffer.h"
 #include "v8.h"
 
+// When creating strings >= this length v8's gc spins up and consumes
+// most of the execution time. For these cases it's more performant to
+// use external string resources.
+#define EXTERN_APEX 0xFBEE9
+
 namespace node {
 
 using v8::Local;
@@ -39,6 +44,64 @@ using v8::String;
 using v8::Value;
 
 
+template <typename ResourceType, typename TypeName>
+class ExternString: public ResourceType {
+  public:
+    ~ExternString() {
+      delete[] data_;
+      node_isolate->AdjustAmountOfExternalAllocatedMemory(-length_);
+    }
+
+    const TypeName* data() const {
+      return data_;
+    }
+
+    size_t length() const {
+      return length_;
+    }
+
+    static Local<String> NewFromCopy(const TypeName* data, size_t length) {
+      HandleScope scope(node_isolate);
+
+      if (length == 0)
+        return scope.Close(String::Empty(node_isolate));
+
+      TypeName* new_data = new TypeName[length];
+      memcpy(new_data, data, length * sizeof(*new_data));
+
+      return scope.Close(ExternString<ResourceType, TypeName>::New(new_data,
+                                                                   length));
+    }
+
+    // uses "data" for external resource, and will be free'd on gc
+    static Local<String> New(const TypeName* data, size_t length) {
+      HandleScope scope(node_isolate);
+
+      if (length == 0)
+        return scope.Close(String::Empty(node_isolate));
+
+      ExternString* h_str = new ExternString<ResourceType, TypeName>(data,
+                                                                     length);
+      Local<String> str = String::NewExternal(h_str);
+      node_isolate->AdjustAmountOfExternalAllocatedMemory(length);
+
+      return scope.Close(str);
+    }
+
+  private:
+    ExternString(const TypeName* data, size_t length)
+      : data_(data), length_(length) { }
+    const TypeName* data_;
+    size_t length_;
+};
+
+
+typedef ExternString<String::ExternalAsciiStringResource,
+                     char> ExternOneByteString;
+typedef ExternString<String::ExternalStringResource,
+                     uint16_t> ExternTwoByteString;
+
+
 //// Base 64 ////
 
 #define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
@@ -556,16 +619,23 @@ Local<Value> StringBytes::Encode(const char* buf,
       if (contains_non_ascii(buf, buflen)) {
         char* out = new char[buflen];
         force_ascii(buf, out, buflen);
-        val = String::NewFromOneByte(node_isolate,
-                                     reinterpret_cast<const uint8_t*>(out),
-                                     String::kNormalString,
-                                     buflen);
-        delete[] out;
+        if (buflen < EXTERN_APEX) {
+          val = String::NewFromOneByte(node_isolate,
+                                       reinterpret_cast<const uint8_t*>(out),
+                                       String::kNormalString,
+                                       buflen);
+          delete[] out;
+        } else {
+          val = ExternOneByteString::New(out, buflen);
+        }
       } else {
-        val = String::NewFromOneByte(node_isolate,
-                                     reinterpret_cast<const uint8_t*>(buf),
-                                     String::kNormalString,
-                                     buflen);
+        if (buflen < EXTERN_APEX)
+          val = String::NewFromOneByte(node_isolate,
+                                       reinterpret_cast<const uint8_t*>(buf),
+                                       String::kNormalString,
+                                       buflen);
+        else
+          val = ExternOneByteString::NewFromCopy(buf, buflen);
       }
       break;
 
@@ -576,13 +646,15 @@ Local<Value> StringBytes::Encode(const char* buf,
                                 buflen);
       break;
 
-    case BINARY: {
-      val = String::NewFromOneByte(node_isolate,
-                                   reinterpret_cast<const uint8_t*>(buf),
-                                   String::kNormalString,
-                                   buflen);
+    case BINARY:
+      if (buflen < EXTERN_APEX)
+        val = String::NewFromOneByte(node_isolate,
+                                     reinterpret_cast<const uint8_t*>(buf),
+                                     String::kNormalString,
+                                     buflen);
+      else
+        val = ExternOneByteString::NewFromCopy(buf, buflen);
       break;
-    }
 
     case BASE64: {
       size_t dlen = base64_encoded_size(buflen);
@@ -591,19 +663,27 @@ Local<Value> StringBytes::Encode(const char* buf,
       size_t written = base64_encode(buf, buflen, dst, dlen);
       assert(written == dlen);
 
-      val = String::NewFromOneByte(node_isolate,
-                                   reinterpret_cast<const uint8_t*>(dst),
-                                   String::kNormalString,
-                                   dlen);
-      delete[] dst;
+      if (dlen < EXTERN_APEX) {
+        val = String::NewFromOneByte(node_isolate,
+                                     reinterpret_cast<const uint8_t*>(dst),
+                                     String::kNormalString,
+                                     dlen);
+        delete[] dst;
+      } else {
+        val = ExternOneByteString::New(dst, dlen);
+      }
       break;
     }
 
     case UCS2: {
-      val = String::NewFromTwoByte(node_isolate,
-                                   reinterpret_cast<const uint16_t*>(buf),
-                                   String::kNormalString,
-                                   buflen / 2);
+      const uint16_t* out = reinterpret_cast<const uint16_t*>(buf);
+      if (buflen < EXTERN_APEX)
+        val = String::NewFromTwoByte(node_isolate,
+                                     out,
+                                     String::kNormalString,
+                                     buflen / 2);
+      else
+        val = ExternTwoByteString::NewFromCopy(out, buflen / 2);
       break;
     }
 
@@ -613,11 +693,15 @@ Local<Value> StringBytes::Encode(const char* buf,
       size_t written = hex_encode(buf, buflen, dst, dlen);
       assert(written == dlen);
 
-      val = String::NewFromOneByte(node_isolate,
-                                   reinterpret_cast<uint8_t*>(dst),
-                                   String::kNormalString,
-                                   dlen);
-      delete[] dst;
+      if (dlen < EXTERN_APEX) {
+        val = String::NewFromOneByte(node_isolate,
+                                     reinterpret_cast<const uint8_t*>(dst),
+                                     String::kNormalString,
+                                     dlen);
+        delete[] dst;
+      } else {
+        val = ExternOneByteString::New(dst, dlen);
+      }
       break;
     }