From 642ea2b668c12773c371b333926ccc10070ef2cb Mon Sep 17 00:00:00 2001 From: Trevor Norris Date: Sun, 9 Jun 2013 15:54:30 -0700 Subject: [PATCH] string_bytes: use external for large strings When large strings are used they cause v8's GC to spend a lot more time cleaning up. In these cases it's much faster to use external string resources. UTF8 strings do not use external string resources because only one and two byte external strings are supported. EXTERN_APEX is the value at which v8's GC overtakes performance. The following table has the type and buffer size that use to encode the strings as rough estimates of the percentage of performance gain from this patch (UTF8 is missing because they cannot be externalized). encoding 128KB 1MB 5MB ----------------------------- ASCII 58% 208% 250% HEX 15% 74% 86% BASE64 11% 74% 71% UCS2 2% 225% 398% BINARY 2234% 1728% 2305% BINARY is so much faster across the board because of using the new v8 WriteOneByte API. --- src/string_bytes.cc | 142 +++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 113 insertions(+), 29 deletions(-) diff --git a/src/string_bytes.cc b/src/string_bytes.cc index 3ae3b5b..e162fd7 100644 --- a/src/string_bytes.cc +++ b/src/string_bytes.cc @@ -29,6 +29,11 @@ #include "node_buffer.h" #include "v8.h" +// When creating strings >= this length v8's gc spins up and consumes +// most of the execution time. For these cases it's more performant to +// use external string resources. +#define EXTERN_APEX 0xFBEE9 + namespace node { using v8::Local; @@ -39,6 +44,64 @@ using v8::String; using v8::Value; +template +class ExternString: public ResourceType { + public: + ~ExternString() { + delete[] data_; + node_isolate->AdjustAmountOfExternalAllocatedMemory(-length_); + } + + const TypeName* data() const { + return data_; + } + + size_t length() const { + return length_; + } + + static Local NewFromCopy(const TypeName* data, size_t length) { + HandleScope scope(node_isolate); + + if (length == 0) + return scope.Close(String::Empty(node_isolate)); + + TypeName* new_data = new TypeName[length]; + memcpy(new_data, data, length * sizeof(*new_data)); + + return scope.Close(ExternString::New(new_data, + length)); + } + + // uses "data" for external resource, and will be free'd on gc + static Local New(const TypeName* data, size_t length) { + HandleScope scope(node_isolate); + + if (length == 0) + return scope.Close(String::Empty(node_isolate)); + + ExternString* h_str = new ExternString(data, + length); + Local str = String::NewExternal(h_str); + node_isolate->AdjustAmountOfExternalAllocatedMemory(length); + + return scope.Close(str); + } + + private: + ExternString(const TypeName* data, size_t length) + : data_(data), length_(length) { } + const TypeName* data_; + size_t length_; +}; + + +typedef ExternString ExternOneByteString; +typedef ExternString ExternTwoByteString; + + //// Base 64 //// #define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4) @@ -556,16 +619,23 @@ Local StringBytes::Encode(const char* buf, if (contains_non_ascii(buf, buflen)) { char* out = new char[buflen]; force_ascii(buf, out, buflen); - val = String::NewFromOneByte(node_isolate, - reinterpret_cast(out), - String::kNormalString, - buflen); - delete[] out; + if (buflen < EXTERN_APEX) { + val = String::NewFromOneByte(node_isolate, + reinterpret_cast(out), + String::kNormalString, + buflen); + delete[] out; + } else { + val = ExternOneByteString::New(out, buflen); + } } else { - val = String::NewFromOneByte(node_isolate, - reinterpret_cast(buf), - String::kNormalString, - buflen); + if (buflen < EXTERN_APEX) + val = String::NewFromOneByte(node_isolate, + reinterpret_cast(buf), + String::kNormalString, + buflen); + else + val = ExternOneByteString::NewFromCopy(buf, buflen); } break; @@ -576,13 +646,15 @@ Local StringBytes::Encode(const char* buf, buflen); break; - case BINARY: { - val = String::NewFromOneByte(node_isolate, - reinterpret_cast(buf), - String::kNormalString, - buflen); + case BINARY: + if (buflen < EXTERN_APEX) + val = String::NewFromOneByte(node_isolate, + reinterpret_cast(buf), + String::kNormalString, + buflen); + else + val = ExternOneByteString::NewFromCopy(buf, buflen); break; - } case BASE64: { size_t dlen = base64_encoded_size(buflen); @@ -591,19 +663,27 @@ Local StringBytes::Encode(const char* buf, size_t written = base64_encode(buf, buflen, dst, dlen); assert(written == dlen); - val = String::NewFromOneByte(node_isolate, - reinterpret_cast(dst), - String::kNormalString, - dlen); - delete[] dst; + if (dlen < EXTERN_APEX) { + val = String::NewFromOneByte(node_isolate, + reinterpret_cast(dst), + String::kNormalString, + dlen); + delete[] dst; + } else { + val = ExternOneByteString::New(dst, dlen); + } break; } case UCS2: { - val = String::NewFromTwoByte(node_isolate, - reinterpret_cast(buf), - String::kNormalString, - buflen / 2); + const uint16_t* out = reinterpret_cast(buf); + if (buflen < EXTERN_APEX) + val = String::NewFromTwoByte(node_isolate, + out, + String::kNormalString, + buflen / 2); + else + val = ExternTwoByteString::NewFromCopy(out, buflen / 2); break; } @@ -613,11 +693,15 @@ Local StringBytes::Encode(const char* buf, size_t written = hex_encode(buf, buflen, dst, dlen); assert(written == dlen); - val = String::NewFromOneByte(node_isolate, - reinterpret_cast(dst), - String::kNormalString, - dlen); - delete[] dst; + if (dlen < EXTERN_APEX) { + val = String::NewFromOneByte(node_isolate, + reinterpret_cast(dst), + String::kNormalString, + dlen); + delete[] dst; + } else { + val = ExternOneByteString::New(dst, dlen); + } break; } -- 2.7.4