src: fix ucs-2 buffer encoding regression

author Ben Noordhuis <info@bnoordhuis.nl>

Tue, 3 Mar 2015 14:44:54 +0000 (15:44 +0100)

committer Ben Noordhuis <info@bnoordhuis.nl>

Thu, 5 Mar 2015 19:44:19 +0000 (20:44 +0100)
author Ben Noordhuis <info@bnoordhuis.nl>
Tue, 3 Mar 2015 14:44:54 +0000 (15:44 +0100)
committer Ben Noordhuis <info@bnoordhuis.nl>
Thu, 5 Mar 2015 19:44:19 +0000 (20:44 +0100)
diff --git a/src/node_buffer.cc b/src/node_buffer.cc

index 2d00ca9..fa77c07 100644 (file)
--- a/src/node_buffer.cc
+++ b/src/node_buffer.cc
@@ -415,9 +415,6 @@ void StringWrite(const FunctionCallbackInfo<Value>& args) {
    if (max_length == 0)
      return args.GetReturnValue().Set(0);
  
-  if (encoding == UCS2)
-    max_length = max_length / 2;
-
    if (offset >= obj_length)
      return env->ThrowRangeError("Offset is out of bounds");
  
diff --git a/src/string_bytes.cc b/src/string_bytes.cc

index 1f5e592..4f896ac 100644 (file)
--- a/src/string_bytes.cc
+++ b/src/string_bytes.cc
@@ -279,13 +279,15 @@ size_t StringBytes::Write(Isolate* isolate,
                            int* chars_written) {
    HandleScope scope(isolate);
    const char* data = nullptr;
-  size_t len = 0;
-  bool is_extern = GetExternalParts(isolate, val, &data, &len);
-  size_t extlen = len;
+  size_t nbytes = 0;
+  const bool is_extern = GetExternalParts(isolate, val, &data, &nbytes);
+  const size_t external_nbytes = nbytes;
  
    CHECK(val->IsString() == true);
    Local<String> str = val.As<String>();
-  len = len < buflen ? len : buflen;
+
+  if (nbytes > buflen)
+    nbytes = buflen;
  
    int flags = String::HINT_MANY_WRITES_EXPECTED |
                String::NO_NULL_TERMINATION |
@@ -295,67 +297,65 @@ size_t StringBytes::Write(Isolate* isolate,
      case ASCII:
      case BINARY:
      case BUFFER:
-      if (is_extern)
-        memcpy(buf, data, len);
-      else
-        len = str->WriteOneByte(reinterpret_cast<uint8_t*>(buf),
-                                0,
-                                buflen,
-                                flags);
+      if (is_extern && str->IsOneByte()) {
+        memcpy(buf, data, nbytes);
+      } else {
+        uint8_t* const dst = reinterpret_cast<uint8_t*>(buf);
+        nbytes = str->WriteOneByte(dst, 0, buflen, flags);
+      }
        if (chars_written != nullptr)
-        *chars_written = len;
+        *chars_written = nbytes;
        break;
  
      case UTF8:
-      if (is_extern)
-        // TODO(tjfontaine) should this validate invalid surrogate pairs as
-        // well?
-        memcpy(buf, data, len);
-      else
-        len = str->WriteUtf8(buf, buflen, chars_written, flags);
+      nbytes = str->WriteUtf8(buf, buflen, chars_written, flags);
        break;
  
-    case UCS2:
-      if (is_extern)
-        memcpy(buf, data, len);
-      else
-        len = str->Write(reinterpret_cast<uint16_t*>(buf), 0, buflen, flags);
+    case UCS2: {
+      uint16_t* const dst = reinterpret_cast<uint16_t*>(buf);
+      size_t nchars;
+      if (is_extern && !str->IsOneByte()) {
+        memcpy(buf, data, nbytes);
+        nchars = nbytes / sizeof(*dst);
+      } else {
+        nchars = buflen / sizeof(*dst);
+        nchars = str->Write(dst, 0, nchars, flags);
+        nbytes = nchars * sizeof(*dst);
+      }
        if (IsBigEndian()) {
          // Node's "ucs2" encoding wants LE character data stored in
          // the Buffer, so we need to reorder on BE platforms.  See
          // http://nodejs.org/api/buffer.html regarding Node's "ucs2"
          // encoding specification
-        uint16_t* buf16 = reinterpret_cast<uint16_t*>(buf);
-        for (size_t i = 0; i < len; i++) {
-          buf16[i] = (buf16[i] << 8) | (buf16[i] >> 8);
-        }
+        for (size_t i = 0; i < nchars; i++)
+          dst[i] = dst[i] << 8 | dst[i] >> 8;
        }
        if (chars_written != nullptr)
-        *chars_written = len;
-      len = len * sizeof(uint16_t);
+        *chars_written = nchars;
        break;
+    }
  
      case BASE64:
        if (is_extern) {
-        len = base64_decode(buf, buflen, data, extlen);
+        nbytes = base64_decode(buf, buflen, data, external_nbytes);
        } else {
          String::Value value(str);
-        len = base64_decode(buf, buflen, *value, value.length());
+        nbytes = base64_decode(buf, buflen, *value, value.length());
        }
        if (chars_written != nullptr) {
-        *chars_written = len;
+        *chars_written = nbytes;
        }
        break;
  
      case HEX:
        if (is_extern) {
-        len = hex_decode(buf, buflen, data, extlen);
+        nbytes = hex_decode(buf, buflen, data, external_nbytes);
        } else {
          String::Value value(str);
-        len = hex_decode(buf, buflen, *value, value.length());
+        nbytes = hex_decode(buf, buflen, *value, value.length());
        }
        if (chars_written != nullptr) {
-        *chars_written = len * 2;
+        *chars_written = nbytes;
        }
        break;
  
@@ -364,7 +364,7 @@ size_t StringBytes::Write(Isolate* isolate,
        break;
    }
  
-  return len;
+  return nbytes;
  }
  
  
diff --git a/test/parallel/test-stringbytes-external.js b/test/parallel/test-stringbytes-external.js

index e4e3f36..5bc4c94 100644 (file)
--- a/test/parallel/test-stringbytes-external.js
+++ b/test/parallel/test-stringbytes-external.js
@@ -106,3 +106,16 @@ var PRE_3OF4_APEX = Math.ceil((EXTERN_APEX / 4) * 3) - RADIOS;
      }
    }
  })();
+
+// https://github.com/iojs/io.js/issues/1024
+(function() {
+  var a = Array(1 << 20).join('x');
+  var b = Buffer(a, 'ucs2').toString('ucs2');
+  var c = Buffer(b, 'utf8').toString('utf8');
+
+  assert.equal(a.length, b.length);
+  assert.equal(b.length, c.length);
+
+  assert.equal(a, b);
+  assert.equal(b, c);
+})();
author	Ben Noordhuis <info@bnoordhuis.nl>
	Tue, 3 Mar 2015 14:44:54 +0000 (15:44 +0100)
committer	Ben Noordhuis <info@bnoordhuis.nl>
	Thu, 5 Mar 2015 19:44:19 +0000 (20:44 +0100)
src/node_buffer.cc		patch \| blob \| history
src/string_bytes.cc		patch \| blob \| history
test/parallel/test-stringbytes-external.js		patch \| blob \| history