1 #include "string_bytes.h"
4 #include "node_buffer.h"
8 #include <string.h> // memcpy
10 // When creating strings >= this length v8's gc spins up and consumes
11 // most of the execution time. For these cases it's more performant to
12 // use external string resources.
13 #define EXTERN_APEX 0xFBEE9
17 using v8::EscapableHandleScope;
19 using v8::HandleScope;
26 template <typename ResourceType, typename TypeName>
27 class ExternString: public ResourceType {
29 ~ExternString() override {
31 int64_t change_in_bytes = -static_cast<int64_t>(length_);
32 isolate()->AdjustAmountOfExternalAllocatedMemory(change_in_bytes);
35 const TypeName* data() const {
39 size_t length() const {
43 static Local<String> NewFromCopy(Isolate* isolate,
46 EscapableHandleScope scope(isolate);
49 return scope.Escape(String::Empty(isolate));
51 TypeName* new_data = new TypeName[length];
52 memcpy(new_data, data, length * sizeof(*new_data));
54 return scope.Escape(ExternString<ResourceType, TypeName>::New(isolate,
59 // uses "data" for external resource, and will be free'd on gc
60 static Local<String> New(Isolate* isolate,
63 EscapableHandleScope scope(isolate);
66 return scope.Escape(String::Empty(isolate));
68 ExternString* h_str = new ExternString<ResourceType, TypeName>(isolate,
71 Local<String> str = String::NewExternal(isolate, h_str);
72 isolate->AdjustAmountOfExternalAllocatedMemory(length);
74 return scope.Escape(str);
77 inline Isolate* isolate() const { return isolate_; }
80 ExternString(Isolate* isolate, const TypeName* data, size_t length)
81 : isolate_(isolate), data_(data), length_(length) { }
83 const TypeName* data_;
88 typedef ExternString<String::ExternalOneByteStringResource,
89 char> ExternOneByteString;
90 typedef ExternString<String::ExternalStringResource,
91 uint16_t> ExternTwoByteString;
96 #define base64_encoded_size(size) ((size + 2 - ((size + 2) % 3)) / 3 * 4)
99 // Doesn't check for padding at the end. Can be 1-2 bytes over.
100 static inline size_t base64_decoded_size_fast(size_t size) {
101 size_t remainder = size % 4;
103 size = (size / 4) * 3;
105 if (size == 0 && remainder == 1) {
106 // special case: 1-byte input cannot be decoded
109 // non-padded input, add 1 or 2 extra bytes
110 size += 1 + (remainder == 3);
117 template <typename TypeName>
118 size_t base64_decoded_size(const TypeName* src, size_t size) {
122 if (src[size - 1] == '=')
124 if (size > 0 && src[size - 1] == '=')
127 return base64_decoded_size_fast(size);
131 // supports regular and URL-safe base64
132 static const int unbase64_table[] =
133 { -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, -1, -1, -2, -1, -1,
134 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
135 -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 62, -1, 62, -1, 63,
136 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -1, -1, -1, -1, -1, -1,
137 -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
138 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, 63,
139 -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
140 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, -1, -1,
141 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
142 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
143 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
144 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
145 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
146 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
147 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
148 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
150 #define unbase64(x) unbase64_table[(uint8_t)(x)]
153 template <typename TypeName>
154 size_t base64_decode(char* buf,
157 const size_t srcLen) {
160 char* dstEnd = buf + len;
161 const TypeName* srcEnd = src + srcLen;
163 while (src < srcEnd && dst < dstEnd) {
164 int remaining = srcEnd - src;
166 while (unbase64(*src) < 0 && src < srcEnd)
168 if (remaining == 0 || *src == '=')
170 a = unbase64(*src++);
172 while (unbase64(*src) < 0 && src < srcEnd)
174 if (remaining <= 1 || *src == '=')
176 b = unbase64(*src++);
178 *dst++ = (a << 2) | ((b & 0x30) >> 4);
182 while (unbase64(*src) < 0 && src < srcEnd)
184 if (remaining <= 2 || *src == '=')
186 c = unbase64(*src++);
188 *dst++ = ((b & 0x0F) << 4) | ((c & 0x3C) >> 2);
192 while (unbase64(*src) < 0 && src < srcEnd)
194 if (remaining <= 3 || *src == '=')
196 d = unbase64(*src++);
198 *dst++ = ((c & 0x03) << 6) | (d & 0x3F);
207 template <typename TypeName>
208 unsigned hex2bin(TypeName c) {
209 if (c >= '0' && c <= '9')
211 if (c >= 'A' && c <= 'F')
212 return 10 + (c - 'A');
213 if (c >= 'a' && c <= 'f')
214 return 10 + (c - 'a');
215 return static_cast<unsigned>(-1);
219 template <typename TypeName>
220 size_t hex_decode(char* buf,
223 const size_t srcLen) {
225 for (i = 0; i < len && i * 2 + 1 < srcLen; ++i) {
226 unsigned a = hex2bin(src[i * 2 + 0]);
227 unsigned b = hex2bin(src[i * 2 + 1]);
237 bool StringBytes::GetExternalParts(Isolate* isolate,
241 if (Buffer::HasInstance(val)) {
242 *data = Buffer::Data(val);
243 *len = Buffer::Length(val);
247 if (!val->IsString())
250 Local<String> str = val.As<String>();
252 if (str->IsExternalOneByte()) {
253 const String::ExternalOneByteStringResource* ext;
254 ext = str->GetExternalOneByteStringResource();
256 *len = ext->length();
259 } else if (str->IsExternal()) {
260 const String::ExternalStringResource* ext;
261 ext = str->GetExternalStringResource();
262 *data = reinterpret_cast<const char*>(ext->data());
263 *len = ext->length();
271 size_t StringBytes::Write(Isolate* isolate,
275 enum encoding encoding,
276 int* chars_written) {
277 HandleScope scope(isolate);
278 const char* data = nullptr;
280 bool is_extern = GetExternalParts(isolate, val, &data, &len);
283 CHECK(val->IsString() == true);
284 Local<String> str = val.As<String>();
285 len = len < buflen ? len : buflen;
287 int flags = String::NO_NULL_TERMINATION |
288 String::HINT_MANY_WRITES_EXPECTED;
295 memcpy(buf, data, len);
297 len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf),
301 if (chars_written != nullptr)
302 *chars_written = len;
307 // TODO(tjfontaine) should this validate invalid surrogate pairs as
309 memcpy(buf, data, len);
311 len = str->WriteUtf8(buf, buflen, chars_written, WRITE_UTF8_FLAGS);
316 memcpy(buf, data, len * 2);
318 len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags);
320 // Node's "ucs2" encoding wants LE character data stored in
321 // the Buffer, so we need to reorder on BE platforms. See
322 // http://nodejs.org/api/buffer.html regarding Node's "ucs2"
323 // encoding specification
324 uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf);
325 for (size_t i = 0; i < len; i++) {
326 buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8);
329 if (chars_written != nullptr)
330 *chars_written = len;
331 len = len * sizeof(uint16_t);
336 len = base64_decode(buf, buflen, data, extlen);
338 String::Value value(str);
339 len = base64_decode(buf, buflen, *value, value.length());
341 if (chars_written != nullptr) {
342 *chars_written = len;
348 len = hex_decode(buf, buflen, data, extlen);
350 String::Value value(str);
351 len = hex_decode(buf, buflen, *value, value.length());
353 if (chars_written != nullptr) {
354 *chars_written = len * 2;
359 CHECK(0 && "unknown encoding");
367 bool StringBytes::IsValidString(Isolate* isolate,
368 Handle<String> string,
370 if (enc == HEX && string->Length() % 2 != 0)
372 // TODO(bnoordhuis) Add BASE64 check?
377 // Quick and dirty size calculation
378 // Will always be at least big enough, but may have some extra
379 // UTF8 can be as much as 3x the size, Base64 can have 1-2 extra bytes
380 size_t StringBytes::StorageSize(Isolate* isolate,
382 enum encoding encoding) {
383 HandleScope scope(isolate);
384 size_t data_size = 0;
385 bool is_buffer = Buffer::HasInstance(val);
387 if (is_buffer && (encoding == BUFFER || encoding == BINARY)) {
388 return Buffer::Length(val);
391 Local<String> str = val->ToString(isolate);
397 data_size = str->Length();
401 // A single UCS2 codepoint never takes up more than 3 utf8 bytes.
402 // It is an exercise for the caller to decide when a string is
403 // long enough to justify calling Size() instead of StorageSize()
404 data_size = 3 * str->Length();
408 data_size = str->Length() * sizeof(uint16_t);
412 data_size = base64_decoded_size_fast(str->Length());
416 CHECK(str->Length() % 2 == 0 && "invalid hex string length");
417 data_size = str->Length() / 2;
421 CHECK(0 && "unknown encoding");
429 size_t StringBytes::Size(Isolate* isolate,
431 enum encoding encoding) {
432 HandleScope scope(isolate);
433 size_t data_size = 0;
434 bool is_buffer = Buffer::HasInstance(val);
436 if (is_buffer && (encoding == BUFFER || encoding == BINARY))
437 return Buffer::Length(val);
440 if (GetExternalParts(isolate, val, &data, &data_size))
443 Local<String> str = val->ToString(isolate);
449 data_size = str->Length();
453 data_size = str->Utf8Length();
457 data_size = str->Length() * sizeof(uint16_t);
461 String::Value value(str);
462 data_size = base64_decoded_size(*value, value.length());
467 data_size = str->Length() / 2;
471 CHECK(0 && "unknown encoding");
481 static bool contains_non_ascii_slow(const char* buf, size_t len) {
482 for (size_t i = 0; i < len; ++i) {
490 static bool contains_non_ascii(const char* src, size_t len) {
492 return contains_non_ascii_slow(src, len);
495 const unsigned bytes_per_word = sizeof(uintptr_t);
496 const unsigned align_mask = bytes_per_word - 1;
497 const unsigned unaligned = reinterpret_cast<uintptr_t>(src) & align_mask;
500 const unsigned n = bytes_per_word - unaligned;
501 if (contains_non_ascii_slow(src, n))
508 #if defined(_WIN64) || defined(_LP64)
509 const uintptr_t mask = 0x8080808080808080ll;
511 const uintptr_t mask = 0x80808080l;
514 const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
516 for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
521 const unsigned remainder = len & align_mask;
523 const size_t offset = len - remainder;
524 if (contains_non_ascii_slow(src + offset, remainder))
532 static void force_ascii_slow(const char* src, char* dst, size_t len) {
533 for (size_t i = 0; i < len; ++i) {
534 dst[i] = src[i] & 0x7f;
539 static void force_ascii(const char* src, char* dst, size_t len) {
541 force_ascii_slow(src, dst, len);
545 const unsigned bytes_per_word = sizeof(uintptr_t);
546 const unsigned align_mask = bytes_per_word - 1;
547 const unsigned src_unalign = reinterpret_cast<uintptr_t>(src) & align_mask;
548 const unsigned dst_unalign = reinterpret_cast<uintptr_t>(dst) & align_mask;
550 if (src_unalign > 0) {
551 if (src_unalign == dst_unalign) {
552 const unsigned unalign = bytes_per_word - src_unalign;
553 force_ascii_slow(src, dst, unalign);
558 force_ascii_slow(src, dst, len);
563 #if defined(_WIN64) || defined(_LP64)
564 const uintptr_t mask = ~0x8080808080808080ll;
566 const uintptr_t mask = ~0x80808080l;
569 const uintptr_t* srcw = reinterpret_cast<const uintptr_t*>(src);
570 uintptr_t* dstw = reinterpret_cast<uintptr_t*>(dst);
572 for (size_t i = 0, n = len / bytes_per_word; i < n; ++i) {
573 dstw[i] = srcw[i] & mask;
576 const unsigned remainder = len & align_mask;
578 const size_t offset = len - remainder;
579 force_ascii_slow(src + offset, dst + offset, remainder);
584 static size_t base64_encode(const char* src,
588 // We know how much we'll write, just make sure that there's space.
589 CHECK(dlen >= base64_encoded_size(slen) &&
590 "not enough space provided for base64 encode");
592 dlen = base64_encoded_size(slen);
601 static const char table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
602 "abcdefghijklmnopqrstuvwxyz"
610 a = src[i + 0] & 0xff;
611 b = src[i + 1] & 0xff;
612 c = src[i + 2] & 0xff;
614 dst[k + 0] = table[a >> 2];
615 dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
616 dst[k + 2] = table[((b & 0x0f) << 2) | (c >> 6)];
617 dst[k + 3] = table[c & 0x3f];
626 a = src[i + 0] & 0xff;
627 dst[k + 0] = table[a >> 2];
628 dst[k + 1] = table[(a & 3) << 4];
634 a = src[i + 0] & 0xff;
635 b = src[i + 1] & 0xff;
636 dst[k + 0] = table[a >> 2];
637 dst[k + 1] = table[((a & 3) << 4) | (b >> 4)];
638 dst[k + 2] = table[(b & 0x0f) << 2];
648 static size_t hex_encode(const char* src, size_t slen, char* dst, size_t dlen) {
649 // We know how much we'll write, just make sure that there's space.
650 CHECK(dlen >= slen * 2 &&
651 "not enough space provided for hex encode");
654 for (uint32_t i = 0, k = 0; k < dlen; i += 1, k += 2) {
655 static const char hex[] = "0123456789abcdef";
656 uint8_t val = static_cast<uint8_t>(src[i]);
657 dst[k + 0] = hex[val >> 4];
658 dst[k + 1] = hex[val & 15];
666 Local<Value> StringBytes::Encode(Isolate* isolate,
669 enum encoding encoding) {
670 EscapableHandleScope scope(isolate);
672 CHECK_NE(encoding, UCS2);
673 CHECK_LE(buflen, Buffer::kMaxLength);
674 if (!buflen && encoding != BUFFER)
675 return scope.Escape(String::Empty(isolate));
680 return scope.Escape(Buffer::New(buf, buflen));
683 if (contains_non_ascii(buf, buflen)) {
684 char* out = new char[buflen];
685 force_ascii(buf, out, buflen);
686 if (buflen < EXTERN_APEX) {
687 val = OneByteString(isolate, out, buflen);
690 val = ExternOneByteString::New(isolate, out, buflen);
693 if (buflen < EXTERN_APEX)
694 val = OneByteString(isolate, buf, buflen);
696 val = ExternOneByteString::NewFromCopy(isolate, buf, buflen);
701 val = String::NewFromUtf8(isolate,
703 String::kNormalString,
708 if (buflen < EXTERN_APEX)
709 val = OneByteString(isolate, buf, buflen);
711 val = ExternOneByteString::NewFromCopy(isolate, buf, buflen);
715 size_t dlen = base64_encoded_size(buflen);
716 char* dst = new char[dlen];
718 size_t written = base64_encode(buf, buflen, dst, dlen);
719 CHECK_EQ(written, dlen);
721 if (dlen < EXTERN_APEX) {
722 val = OneByteString(isolate, dst, dlen);
725 val = ExternOneByteString::New(isolate, dst, dlen);
731 size_t dlen = buflen * 2;
732 char* dst = new char[dlen];
733 size_t written = hex_encode(buf, buflen, dst, dlen);
734 CHECK_EQ(written, dlen);
736 if (dlen < EXTERN_APEX) {
737 val = OneByteString(isolate, dst, dlen);
740 val = ExternOneByteString::New(isolate, dst, dlen);
746 CHECK(0 && "unknown encoding");
750 return scope.Escape(val);
754 Local<Value> StringBytes::Encode(Isolate* isolate,
757 const uint16_t* src = buf;
760 if (buflen < EXTERN_APEX) {
761 val = String::NewFromTwoByte(isolate,
763 String::kNormalString,
766 val = ExternTwoByteString::NewFromCopy(isolate, src, buflen);