Add basic support for Latin1 to the API.
authoryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 3 Sep 2012 15:06:36 +0000 (15:06 +0000)
committeryangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 3 Sep 2012 15:06:36 +0000 (15:06 +0000)
BUG=

Review URL: https://chromiumcodereview.appspot.com/10857030

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12430 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

include/v8.h
src/api.cc
src/factory.cc
src/factory.h
src/heap-inl.h
src/heap.cc
src/heap.h
src/objects.h
src/v8-counters.h
test/cctest/test-api.cc

index 3e1b42700bb5843804c0bcc61459efc2ac44b901..42aaabf6664c8a93664b4468db5c87c16966cca2 100644 (file)
@@ -1069,19 +1069,48 @@ class String : public Primitive {
     PRESERVE_ASCII_NULL = 4
   };
 
-  // 16-bit character codes.
+
+  enum StringEncoding {
+    INVALID_ENCODING  = 0,
+    UTF_8_ENCODING    = 1,
+    LATIN1_ENCODING   = 2,
+    UTF_16_ENCODING   = 3,
+
+    ASCII_HINT        = 1 << 16,
+    NOT_ASCII_HINT    = 1 << 17
+  };
+
+  static const int kStringEncodingMask = 3;
+  static const int kAsciiHintMask = String::ASCII_HINT | String::NOT_ASCII_HINT;
+
+  static const int kUndefinedLength = -1;
+
+
+  // 16-bit UTF16 code units.  PRESERVE_ASCII_NULL is not supported as option,
+  // null-characters are never converted to spaces.
   V8EXPORT int Write(uint16_t* buffer,
                      int start = 0,
-                     int length = -1,
+                     int length = kUndefinedLength,
                      int options = NO_OPTIONS) const;
-  // ASCII characters.
+
+  // ASCII characters.  Null-characters are converted to spaces unless
+  // PRESERVE_ASCII_NULL is set as option.
   V8EXPORT int WriteAscii(char* buffer,
                           int start = 0,
-                          int length = -1,
+                          int length = kUndefinedLength,
                           int options = NO_OPTIONS) const;
-  // UTF-8 encoded characters.
+
+  // Latin1 characters.  PRESERVE_ASCII_NULL is not supported as option,
+  // null-characters are never converted to spaces.
+  V8EXPORT int WriteLatin1(char* buffer,
+                           int start = 0,
+                           int length = kUndefinedLength,
+                           int options = NO_OPTIONS) const;
+
+  // UTF-8 encoded characters.  PRESERVE_ASCII_NULL is not supported as option,
+  // null-characters are never converted to spaces.
   V8EXPORT int WriteUtf8(char* buffer,
-                         int length = -1,
+                         int length = kUndefinedLength,
                          int* nchars_ref = NULL,
                          int options = NO_OPTIONS) const;
 
@@ -1122,6 +1151,7 @@ class String : public Primitive {
     void operator=(const ExternalStringResourceBase&);
 
     friend class v8::internal::Heap;
+    friend class v8::String;
   };
 
   /**
@@ -1180,6 +1210,16 @@ class String : public Primitive {
     ExternalAsciiStringResource() {}
   };
 
+  /**
+   * An ExternalLatin1StringResource is a wrapper around an Latin1-encoded
+   * string buffer that resides outside V8's heap.  For usage in V8, a Latin1
+   * string is converted to ASCII or two-byte string depending on whether
+   * it contains non-ASCII characters.
+   */
+  class V8EXPORT ExternalLatin1StringResource
+      : public ExternalAsciiStringResource {
+  };
+
   /**
    * Get the ExternalStringResource for an external string.  Returns
    * NULL if IsExternal() doesn't return true.
@@ -1193,24 +1233,44 @@ class String : public Primitive {
   V8EXPORT const ExternalAsciiStringResource* GetExternalAsciiStringResource()
       const;
 
+  /**
+   * If the string is external, return its encoding (Latin1 or UTF16)
+   * and possibly a hint on whether the content is ASCII.
+   * Return String::INVALID_ENCODING otherwise.
+   */
+  inline int GetExternalStringEncoding() const;
+
+
+  /**
+   * Return the resource of the external string regardless of encoding.
+   * Call this only after having made sure that the string is indeed external!
+   */
+  inline ExternalStringResourceBase* GetExternalStringResourceBase() const;
+
   static inline String* Cast(v8::Value* obj);
 
   /**
-   * Allocates a new string from either UTF-8 encoded or ASCII data.
-   * The second parameter 'length' gives the buffer length.
-   * If the data is UTF-8 encoded, the caller must
-   * be careful to supply the length parameter.
-   * If it is not given, the function calls
-   * 'strlen' to determine the buffer length, it might be
-   * wrong if 'data' contains a null character.
+   * Allocates a new string from either UTF-8 or Latin1-encoded data.
+   * The second parameter 'length' gives the buffer length.  If the data may
+   * contain zero bytes, the caller must be careful to supply the length
+   * parameter.  If it is not given, the function calls 'strlen' to determine
+   * the buffer length, it might be wrong if 'data' contains a null character.
+   * The third parameter specifies the encoding, which may include an hint
+   * whether the string contains ASCII characters.  In the case of Latin1, the
+   * appropriate internal representation (UTF16 or ASCII) is chosen.
    */
-  V8EXPORT static Local<String> New(const char* data, int length = -1);
+  V8EXPORT static Local<String> New(const char* data,
+                                    int length = kUndefinedLength,
+                                    int encoding = UTF_8_ENCODING);
 
-  /** Allocates a new string from 16-bit character codes.*/
-  V8EXPORT static Local<String> New(const uint16_t* data, int length = -1);
+  /** Allocates a new string from 16-bit UTF-16 code units.*/
+  V8EXPORT static Local<String> New(const uint16_t* data,
+                                    int length = kUndefinedLength);
 
   /** Creates a symbol. Returns one if it exists already.*/
-  V8EXPORT static Local<String> NewSymbol(const char* data, int length = -1);
+  V8EXPORT static Local<String> NewSymbol(const char* data,
+                                          int length = kUndefinedLength,
+                                          int encoding = UTF_8_ENCODING);
 
   /**
    * Creates a new string by concatenating the left and the right strings
@@ -1247,7 +1307,8 @@ class String : public Primitive {
    * this function should not otherwise delete or modify the resource. Neither
    * should the underlying buffer be deallocated or modified except through the
    * destructor of the external string resource.
-   */ V8EXPORT static Local<String> NewExternal(
+   */
+  V8EXPORT static Local<String> NewExternal(
       ExternalAsciiStringResource* resource);
 
   /**
@@ -1261,6 +1322,24 @@ class String : public Primitive {
    */
   V8EXPORT bool MakeExternal(ExternalAsciiStringResource* resource);
 
+
+  /**
+   * Creates a new external string using the Latin1-encoded data defined in the
+   * given resource.  When the external string is no longer live on V8's heap
+   * the resource will be disposed by calling its Dispose method. The caller of
+   * this function should not otherwise delete or modify the resource. Neither
+   * should the underlying buffer be deallocated or modified except through the
+   * destructor of the external string resource.
+   * If the data contains a non-ASCII character, the string is created as a new
+   * string object on the V8 heap and the Dispose method is called on the
+   * resource immediately.  This is because V8 is unable to handle non-ASCII
+   * Latin1-encoded strings internally.
+   */
+  V8EXPORT static Local<String> NewExternal(
+      ExternalLatin1StringResource* resource,
+      int encoding = String::LATIN1_ENCODING);
+
+
   /**
    * Returns true if this string can be made external.
    */
@@ -1268,11 +1347,13 @@ class String : public Primitive {
 
   /** Creates an undetectable string from the supplied ASCII or UTF-8 data.*/
   V8EXPORT static Local<String> NewUndetectable(const char* data,
-                                                int length = -1);
+                                                int length = kUndefinedLength,
+                                                int encoding = UTF_8_ENCODING);
 
-  /** Creates an undetectable string from the supplied 16-bit character codes.*/
+  /** Creates an undetectable string from the supplied 16-bit UTF16 code units.
+   */
   V8EXPORT static Local<String> NewUndetectable(const uint16_t* data,
-                                                int length = -1);
+                                                int length = kUndefinedLength);
 
   /**
    * Converts an object to a UTF-8-encoded character array.  Useful if
@@ -1343,7 +1424,9 @@ class String : public Primitive {
   };
 
  private:
-  V8EXPORT void VerifyExternalStringResource(ExternalStringResource* val) const;
+  V8EXPORT void VerifyExternalStringEncoding(int encoding) const;
+  V8EXPORT void VerifyExternalStringResourceBase(
+      ExternalStringResourceBase* val) const;
   V8EXPORT static void CheckCast(v8::Value* obj);
 };
 
@@ -4034,6 +4117,9 @@ class Internals {
   static const int kJSObjectHeaderSize = 3 * kApiPointerSize;
   static const int kFullStringRepresentationMask = 0x07;
   static const int kExternalTwoByteRepresentationTag = 0x02;
+  static const int kExternalAsciiRepresentationTag = 0x06;
+  static const int kExternalAsciiDataHintMask = 0x08;
+  static const int kExternalAsciiDataHintTag = 0x08;
 
   static const int kIsolateStateOffset = 0;
   static const int kIsolateEmbedderDataOffset = 1 * kApiPointerSize;
@@ -4091,11 +4177,6 @@ class Internals {
     }
   }
 
-  static inline bool IsExternalTwoByteString(int instance_type) {
-    int representation = (instance_type & kFullStringRepresentationMask);
-    return representation == kExternalTwoByteRepresentationTag;
-  }
-
   static inline bool IsInitialized(v8::Isolate* isolate) {
     uint8_t* addr = reinterpret_cast<uint8_t*>(isolate) + kIsolateStateOffset;
     return *reinterpret_cast<int*>(addr) == 1;
@@ -4373,16 +4454,56 @@ Local<String> String::Empty(Isolate* isolate) {
 String::ExternalStringResource* String::GetExternalStringResource() const {
   typedef internal::Object O;
   typedef internal::Internals I;
+  String::ExternalStringResource* result = NULL;
   O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
-  String::ExternalStringResource* result;
-  if (I::IsExternalTwoByteString(I::GetInstanceType(obj))) {
-    void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
-    result = reinterpret_cast<String::ExternalStringResource*>(value);
-  } else {
-    result = NULL;
+  if ((I::GetInstanceType(obj) & I::kFullStringRepresentationMask) ==
+      I::kExternalTwoByteRepresentationTag) {
+    result = reinterpret_cast<String::ExternalStringResource*>(
+                 GetExternalStringResourceBase());
   }
+  return result;
+}
+
+
+int String::GetExternalStringEncoding() const {
+  typedef internal::Object O;
+  typedef internal::Internals I;
+  O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
+  static const int kRepresentationAndHintMask =
+      I::kFullStringRepresentationMask | I::kExternalAsciiDataHintMask;
+
+  int encoding;
+  switch (I::GetInstanceType(obj) & kRepresentationAndHintMask) {
+    case I::kExternalTwoByteRepresentationTag | I::kExternalAsciiDataHintTag:
+      encoding = UTF_16_ENCODING | ASCII_HINT;
+      break;
+    case I::kExternalTwoByteRepresentationTag:
+      encoding = UTF_16_ENCODING | NOT_ASCII_HINT;
+      break;
+    case I::kExternalAsciiRepresentationTag:
+      encoding = LATIN1_ENCODING | ASCII_HINT;
+      break;
+    default:
+      encoding = INVALID_ENCODING;
+      break;
+  }
+#ifdef V8_ENABLE_CHECKS
+  VerifyExternalStringEncoding(encoding);
+#endif
+  return encoding;
+}
+
+
+String::ExternalStringResourceBase* String::GetExternalStringResourceBase()
+    const {
+  typedef internal::Object O;
+  typedef internal::Internals I;
+  O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
+  void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
+  ExternalStringResourceBase* result =
+      reinterpret_cast<String::ExternalStringResourceBase*>(value);
 #ifdef V8_ENABLE_CHECKS
-  VerifyExternalStringResource(result);
+  VerifyExternalStringResourceBase(result);
 #endif
   return result;
 }
index 65b544c7919eae9cb63119d7c0131b9be357ac73..c8d5a9c19054aa02c5a73e24a10b58101c0f420f 100644 (file)
@@ -3857,7 +3857,7 @@ int String::WriteUtf8(char* buffer,
   int string_length = str->length();
   if (str->IsAsciiRepresentation()) {
     int len;
-    if (capacity == -1) {
+    if (capacity == kUndefinedLength) {
       capacity = str->length() + 1;
       len = string_length;
     } else {
@@ -3872,7 +3872,7 @@ int String::WriteUtf8(char* buffer,
     return len;
   }
 
-  if (capacity == -1 || capacity / 3 >= string_length) {
+  if (capacity == kUndefinedLength || capacity / 3 >= string_length) {
     int32_t previous = unibrow::Utf16::kNoPreviousCharacter;
     const int kMaxRecursion = 100;
     int utf8_bytes =
@@ -3903,7 +3903,7 @@ int String::WriteUtf8(char* buffer,
     int utf8_bytes = i::Utf8Length(str);
     if ((options & NO_NULL_TERMINATION) == 0) utf8_bytes++;
     if (utf8_bytes <= capacity) {
-      return WriteUtf8(buffer, -1, nchars_ref, options);
+      return WriteUtf8(buffer, kUndefinedLength, nchars_ref, options);
     }
   }
 
@@ -3921,7 +3921,9 @@ int String::WriteUtf8(char* buffer,
   int pos = 0;
   int nchars = 0;
   int previous = unibrow::Utf16::kNoPreviousCharacter;
-  for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
+  for (i = 0;
+       i < len && (capacity == kUndefinedLength || pos < fast_end);
+       i++) {
     i::uc32 c = write_input_buffer.GetNext();
     int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
     pos += written;
@@ -3967,7 +3969,7 @@ int String::WriteUtf8(char* buffer,
   }
   if (nchars_ref != NULL) *nchars_ref = nchars;
   if (!(options & NO_NULL_TERMINATION) &&
-      (i == len && (capacity == -1 || pos < capacity))) {
+      (i == len && (capacity == kUndefinedLength || pos < capacity))) {
     buffer[pos++] = '\0';
   }
   return pos;
@@ -3982,7 +3984,7 @@ int String::WriteAscii(char* buffer,
   if (IsDeadCheck(isolate, "v8::String::WriteAscii()")) return 0;
   LOG_API(isolate, "String::WriteAscii");
   ENTER_V8(isolate);
-  ASSERT(start >= 0 && length >= -1);
+  ASSERT(start >= 0 && length >= kUndefinedLength);
   i::Handle<i::String> str = Utils::OpenHandle(this);
   isolate->string_tracker()->RecordWrite(str);
   if (options & HINT_MANY_WRITES_EXPECTED) {
@@ -3991,7 +3993,7 @@ int String::WriteAscii(char* buffer,
 
   if (str->IsAsciiRepresentation()) {
     // WriteToFlat is faster than using the StringInputBuffer.
-    if (length == -1) length = str->length() + 1;
+    if (length == kUndefinedLength) length = str->length() + 1;
     int len = i::Min(length, str->length() - start);
     i::String::WriteToFlat(*str, buffer, start, start + len);
     if (!(options & PRESERVE_ASCII_NULL)) {
@@ -4007,7 +4009,7 @@ int String::WriteAscii(char* buffer,
 
   i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
   int end = length;
-  if ((length == -1) || (length > str->length() - start)) {
+  if ((length == kUndefinedLength) || (length > str->length() - start)) {
     end = str->length() - start;
   }
   if (end < 0) return 0;
@@ -4025,6 +4027,31 @@ int String::WriteAscii(char* buffer,
 }
 
 
+int String::WriteLatin1(char* buffer,
+                        int start,
+                        int length,
+                        int options) const {
+  i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate();
+  if (IsDeadCheck(isolate, "v8::String::WriteLatin1()")) return 0;
+  LOG_API(isolate, "String::WriteLatin1");
+  ENTER_V8(isolate);
+  ASSERT(start >= 0 && length >= kUndefinedLength);
+  i::Handle<i::String> str = Utils::OpenHandle(this);
+  isolate->string_tracker()->RecordWrite(str);
+  if (options & HINT_MANY_WRITES_EXPECTED) {
+    FlattenString(str);  // Flatten the string for efficiency.
+  }
+
+  if (length == kUndefinedLength) length = str->length() + 1;
+  int len = i::Min(length, str->length() - start);
+  i::String::WriteToFlat(*str, buffer, start, start + len);
+  if (!(options & NO_NULL_TERMINATION) && length > len) {
+    buffer[len] = '\0';
+  }
+  return len;
+}
+
+
 int String::Write(uint16_t* buffer,
                   int start,
                   int length,
@@ -4033,7 +4060,7 @@ int String::Write(uint16_t* buffer,
   if (IsDeadCheck(isolate, "v8::String::Write()")) return 0;
   LOG_API(isolate, "String::Write");
   ENTER_V8(isolate);
-  ASSERT(start >= 0 && length >= -1);
+  ASSERT(start >= 0 && length >= kUndefinedLength);
   i::Handle<i::String> str = Utils::OpenHandle(this);
   isolate->string_tracker()->RecordWrite(str);
   if (options & HINT_MANY_WRITES_EXPECTED) {
@@ -4042,7 +4069,7 @@ int String::Write(uint16_t* buffer,
     FlattenString(str);
   }
   int end = start + length;
-  if ((length == -1) || (length > str->length() - start) )
+  if ((length == kUndefinedLength) || (length > str->length() - start) )
     end = str->length();
   if (end < 0) return 0;
   i::String::WriteToFlat(*str, buffer, start, end);
@@ -4073,18 +4100,43 @@ bool v8::String::IsExternalAscii() const {
 }
 
 
-void v8::String::VerifyExternalStringResource(
-    v8::String::ExternalStringResource* value) const {
+void v8::String::VerifyExternalStringEncoding(int encoding) const {
+  typedef internal::Internals I;
   i::Handle<i::String> str = Utils::OpenHandle(this);
-  const v8::String::ExternalStringResource* expected;
+  switch (encoding) {
+    case UTF_16_ENCODING | ASCII_HINT:
+      CHECK(str->HasOnlyAsciiChars());
+      // Fall through
+    case UTF_16_ENCODING | NOT_ASCII_HINT :
+      CHECK(str->IsExternalTwoByteString());
+      break;
+    case LATIN1_ENCODING | ASCII_HINT:
+    CHECK(str->IsExternalAsciiString());
+      break;
+    default:
+      CHECK_EQ(INVALID_ENCODING, encoding);
+      CHECK(!str->IsExternalString());
+      break;
+  }
+}
+
+
+void v8::String::VerifyExternalStringResourceBase(
+    v8::String::ExternalStringResourceBase* value) const {
+  i::Handle<i::String> str = Utils::OpenHandle(this);
+  i::StringShape shape(*str);
+  const void* expected;
+  // We expect an external string at this point since GetExternalStringEncoding
+  // should have already been called to rule out non-external strings.
   if (i::StringShape(*str).IsExternalTwoByte()) {
-    const void* resource =
-        i::Handle<i::ExternalTwoByteString>::cast(str)->resource();
-    expected = reinterpret_cast<const ExternalStringResource*>(resource);
+    expected = i::ExternalTwoByteString::cast(*str)->resource();
   } else {
-    expected = NULL;
+    ASSERT(i::StringShape(*str).IsExternalAscii());
+    expected = i::ExternalAsciiString::cast(*str)->resource();
   }
-  CHECK_EQ(expected, value);
+
+  CHECK_EQ(expected,
+           reinterpret_cast<const ExternalStringResourceBase*>(value));
 }
 
 
@@ -4701,17 +4753,44 @@ Local<String> v8::String::Empty() {
 }
 
 
-Local<String> v8::String::New(const char* data, int length) {
+static i::Handle<i::String> NewOneByteEncodedString(
+    i::Factory* factory, const char* data, int length, int encoding) {
+  if (length == String::kUndefinedLength) length = i::StrLength(data);
+  typedef v8::String S;
+
+  static const int kAsciiHintShift = 16;
+  ASSERT(IS_POWER_OF_TWO(encoding & S::kAsciiHintMask));
+  i::String::AsciiHint ascii_hint =
+      static_cast<i::String::AsciiHint>(encoding >> kAsciiHintShift);
+  STATIC_ASSERT(i::String::MAYBE_ASCII == 0);
+  STATIC_ASSERT(i::String::NOT_ASCII ==
+                (v8::String::NOT_ASCII_HINT >> kAsciiHintShift));
+  STATIC_ASSERT(i::String::ASCII ==
+                (v8::String::ASCII_HINT >> kAsciiHintShift));
+
+  int masked_encoding = encoding & S::kStringEncodingMask;
+
+  if (masked_encoding == S::UTF_8_ENCODING) {
+    return factory->NewStringFromUtf8(
+               i::Vector<const char>(data, length), i::NOT_TENURED, ascii_hint);
+  } else if (masked_encoding == S::LATIN1_ENCODING) {
+    return factory->NewStringFromLatin1(
+               i::Vector<const char>(data, length), i::NOT_TENURED, ascii_hint);
+  } else {  // Wrong encoding.
+    return i::Handle<i::String>();
+  }
+}
+
+
+Local<String> v8::String::New(
+    const char* data, int length, int encoding) {
   i::Isolate* isolate = i::Isolate::Current();
   EnsureInitializedForIsolate(isolate, "v8::String::New()");
   LOG_API(isolate, "String::New(char)");
   if (length == 0) return Empty();
   ENTER_V8(isolate);
-  if (length == -1) length = i::StrLength(data);
-  i::Handle<i::String> result =
-      isolate->factory()->NewStringFromUtf8(
-          i::Vector<const char>(data, length));
-  return Utils::ToLocal(result);
+  return Utils::ToLocal(
+      NewOneByteEncodedString(isolate->factory(), data, length, encoding));
 }
 
 
@@ -4728,15 +4807,14 @@ Local<String> v8::String::Concat(Handle<String> left, Handle<String> right) {
 }
 
 
-Local<String> v8::String::NewUndetectable(const char* data, int length) {
+Local<String> v8::String::NewUndetectable(
+    const char* data, int length, int encoding) {
   i::Isolate* isolate = i::Isolate::Current();
   EnsureInitializedForIsolate(isolate, "v8::String::NewUndetectable()");
   LOG_API(isolate, "String::NewUndetectable(char)");
   ENTER_V8(isolate);
-  if (length == -1) length = i::StrLength(data);
   i::Handle<i::String> result =
-      isolate->factory()->NewStringFromUtf8(
-          i::Vector<const char>(data, length));
+      NewOneByteEncodedString(isolate->factory(), data, length, encoding);
   result->MarkAsUndetectable();
   return Utils::ToLocal(result);
 }
@@ -4755,7 +4833,7 @@ Local<String> v8::String::New(const uint16_t* data, int length) {
   LOG_API(isolate, "String::New(uint16_)");
   if (length == 0) return Empty();
   ENTER_V8(isolate);
-  if (length == -1) length = TwoByteStringLength(data);
+  if (length == kUndefinedLength) length = TwoByteStringLength(data);
   i::Handle<i::String> result =
       isolate->factory()->NewStringFromTwoByte(
           i::Vector<const uint16_t>(data, length));
@@ -4768,7 +4846,7 @@ Local<String> v8::String::NewUndetectable(const uint16_t* data, int length) {
   EnsureInitializedForIsolate(isolate, "v8::String::NewUndetectable()");
   LOG_API(isolate, "String::NewUndetectable(uint16_)");
   ENTER_V8(isolate);
-  if (length == -1) length = TwoByteStringLength(data);
+  if (length == kUndefinedLength) length = TwoByteStringLength(data);
   i::Handle<i::String> result =
       isolate->factory()->NewStringFromTwoByte(
           i::Vector<const uint16_t>(data, length));
@@ -4806,29 +4884,43 @@ Local<String> v8::String::NewExternal(
 }
 
 
-bool v8::String::MakeExternal(v8::String::ExternalStringResource* resource) {
-  i::Handle<i::String> obj = Utils::OpenHandle(this);
-  i::Isolate* isolate = obj->GetIsolate();
+template<class StringResourceType>
+static bool MakeStringExternal(
+    i::Handle<i::String> string, StringResourceType* resource) {
+  i::Isolate* isolate = string->GetIsolate();
   if (IsDeadCheck(isolate, "v8::String::MakeExternal()")) return false;
-  if (i::StringShape(*obj).IsExternalTwoByte()) {
+  if (i::StringShape(*string).IsExternal()) {
     return false;  // Already an external string.
   }
   ENTER_V8(isolate);
-  if (isolate->string_tracker()->IsFreshUnusedString(obj)) {
+  if (isolate->string_tracker()->IsFreshUnusedString(string)) {
     return false;
   }
   if (isolate->heap()->IsInGCPostProcessing()) {
     return false;
   }
   CHECK(resource && resource->data());
-  bool result = obj->MakeExternal(resource);
-  if (result && !obj->IsSymbol()) {
-    isolate->heap()->external_string_table()->AddString(*obj);
+  bool result = string->MakeExternal(resource);
+  if (result && !string->IsSymbol()) {
+    isolate->heap()->external_string_table()->AddString(*string);
   }
   return result;
 }
 
 
+bool v8::String::MakeExternal(ExternalStringResource* resource) {
+  i::Handle<i::String> obj = Utils::OpenHandle(this);
+  return MakeStringExternal(obj, resource);
+}
+
+
+bool v8::String::MakeExternal(ExternalAsciiStringResource* resource) {
+  i::Handle<i::String> obj = Utils::OpenHandle(this);
+  ASSERT(obj->HasOnlyAsciiChars());
+  return MakeStringExternal(obj, resource);
+}
+
+
 Local<String> v8::String::NewExternal(
       v8::String::ExternalAsciiStringResource* resource) {
   i::Isolate* isolate = i::Isolate::Current();
@@ -4842,27 +4934,37 @@ Local<String> v8::String::NewExternal(
 }
 
 
-bool v8::String::MakeExternal(
-    v8::String::ExternalAsciiStringResource* resource) {
-  i::Handle<i::String> obj = Utils::OpenHandle(this);
-  i::Isolate* isolate = obj->GetIsolate();
-  if (IsDeadCheck(isolate, "v8::String::MakeExternal()")) return false;
-  if (i::StringShape(*obj).IsExternalTwoByte()) {
-    return false;  // Already an external string.
-  }
+Local<String> v8::String::NewExternal(ExternalLatin1StringResource* resource,
+                                      int encoding) {
+  typedef v8::internal::Internals I;
+  i::Isolate* isolate = i::Isolate::Current();
+  EnsureInitializedForIsolate(isolate, "v8::String::NewExternal()");
+  LOG_API(isolate, "String::NewExternal");
   ENTER_V8(isolate);
-  if (isolate->string_tracker()->IsFreshUnusedString(obj)) {
-    return false;
-  }
-  if (isolate->heap()->IsInGCPostProcessing()) {
-    return false;
-  }
+  ASSERT((encoding & kStringEncodingMask) == LATIN1_ENCODING);
   CHECK(resource && resource->data());
-  bool result = obj->MakeExternal(resource);
-  if (result && !obj->IsSymbol()) {
-    isolate->heap()->external_string_table()->AddString(*obj);
+  bool ascii_hint = (encoding & kAsciiHintMask);
+  i::Handle<i::String> result;
+
+  if (ascii_hint == ASCII_HINT ||
+      (ascii_hint != NOT_ASCII_HINT &&
+      i::String::IsAscii(resource->data(), resource->length()))) {
+    // Assert that the ascii hint is correct.
+    ASSERT(ascii_hint != ASCII_HINT ||
+           i::String::IsAscii(resource->data(), resource->length()));
+    result = NewExternalAsciiStringHandle(isolate, resource);
+    isolate->heap()->external_string_table()->AddString(*result);
+  } else {
+    // We cannot simply take the backing store and use it as an ASCII string,
+    // since it's not.  Instead, we convert it to an internal string and dispose
+    // the external resource.
+    result = isolate->factory()->NewStringFromLatin1(
+        i::Vector<const char>(resource->data(), resource->length()),
+        i::NOT_TENURED,
+        i::String::NOT_ASCII);
+    resource->Dispose();
   }
-  return result;
+  return Utils::ToLocal(result);
 }
 
 
@@ -5125,14 +5227,28 @@ Local<Object> Array::CloneElementAt(uint32_t index) {
 }
 
 
-Local<String> v8::String::NewSymbol(const char* data, int length) {
+Local<String> v8::String::NewSymbol(
+    const char* data, int length, int encoding) {
   i::Isolate* isolate = i::Isolate::Current();
   EnsureInitializedForIsolate(isolate, "v8::String::NewSymbol()");
   LOG_API(isolate, "String::NewSymbol(char)");
   ENTER_V8(isolate);
-  if (length == -1) length = i::StrLength(data);
-  i::Handle<i::String> result =
-      isolate->factory()->LookupSymbol(i::Vector<const char>(data, length));
+  if (length == kUndefinedLength) length = i::StrLength(data);
+  i::Handle<i::String> result;
+
+  ASSERT(IS_POWER_OF_TWO(encoding & kAsciiHintMask));
+  if (((encoding & kStringEncodingMask) == LATIN1_ENCODING) &&
+      ((encoding & kAsciiHintMask) == NOT_ASCII_HINT ||
+       !i::String::IsAscii(data, length))) {
+    result = isolate->factory()->NewStringFromLatin1(
+                 i::Vector<const char>(data, length),
+                 i::NOT_TENURED,
+                 i::String::NOT_ASCII);
+    result = isolate->factory()->LookupSymbol(result);
+  } else {  // We can handle UTF8 and ASCII strings here.
+    result =
+        isolate->factory()->LookupSymbol(i::Vector<const char>(data, length));
+  }
   return Utils::ToLocal(result);
 }
 
index 462af590d2f02ab592062ffe5f943e2cf31fc812..69b9d29333f23b69537b0dbf93f18e76ed57de17 100644 (file)
@@ -204,10 +204,23 @@ Handle<String> Factory::NewStringFromAscii(Vector<const char> string,
 }
 
 Handle<String> Factory::NewStringFromUtf8(Vector<const char> string,
-                                          PretenureFlag pretenure) {
+                                          PretenureFlag pretenure,
+                                          String::AsciiHint ascii_hint) {
   CALL_HEAP_FUNCTION(
       isolate(),
-      isolate()->heap()->AllocateStringFromUtf8(string, pretenure),
+      isolate()->heap()->AllocateStringFromUtf8(
+          string, pretenure, ascii_hint),
+      String);
+}
+
+
+Handle<String> Factory::NewStringFromLatin1(Vector<const char> string,
+                                            PretenureFlag pretenure,
+                                            String::AsciiHint ascii_hint) {
+  CALL_HEAP_FUNCTION(
+      isolate(),
+      isolate()->heap()->AllocateStringFromLatin1(
+          string, pretenure, ascii_hint),
       String);
 }
 
index e617abb6d12e8cdde155dc569fd87beb7c2aab1d..6411d8e47bae5dcd04b2ccdb93da503cdef62354 100644 (file)
@@ -120,7 +120,13 @@ class Factory {
   // flags in the parser.
   Handle<String> NewStringFromUtf8(
       Vector<const char> str,
-      PretenureFlag pretenure = NOT_TENURED);
+      PretenureFlag pretenure = NOT_TENURED,
+      String::AsciiHint ascii_hint = String::MAYBE_ASCII);
+
+  Handle<String> NewStringFromLatin1(
+      Vector<const char> str,
+      PretenureFlag pretenure = NOT_TENURED,
+      String::AsciiHint ascii_hint = String::MAYBE_ASCII);
 
   Handle<String> NewStringFromTwoByte(
       Vector<const uc16> str,
index 4a827fef176fd57cafb2c929bb834cd229737aaa..1bc6e8db5aa141f9fa11b9510c2c3403a102a347 100644 (file)
@@ -83,9 +83,14 @@ void PromotionQueue::ActivateGuardIfOnTheSamePage() {
 
 
 MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
-                                          PretenureFlag pretenure) {
-  // Check for ASCII first since this is the common case.
-  if (String::IsAscii(str.start(), str.length())) {
+                                          PretenureFlag pretenure,
+                                          String::AsciiHint ascii_hint) {
+  if ((ascii_hint == String::MAYBE_ASCII &&
+       String::IsAscii(str.start(), str.length())) ||
+      ascii_hint == String::ASCII) {
+    // Assert that the ASCII-hint is correct.
+    ASSERT(ascii_hint != String::ASCII ||
+           String::IsAscii(str.start(), str.length()));
     // If the string is ASCII, we do not need to convert the characters
     // since UTF8 is backwards compatible with ASCII.
     return AllocateStringFromAscii(str, pretenure);
@@ -95,6 +100,24 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
 }
 
 
+MaybeObject* Heap::AllocateStringFromLatin1(Vector<const char> str,
+                                            PretenureFlag pretenure,
+                                            String::AsciiHint ascii_hint) {
+  if ((ascii_hint == String::MAYBE_ASCII &&
+        String::IsAscii(str.start(), str.length())) ||
+       ascii_hint == String::ASCII) {
+     // Assert that the strict ASCII-hint is correct.
+     ASSERT(ascii_hint != String::ASCII ||
+            String::IsAscii(str.start(), str.length()));
+     // If the string is ASCII, we do not need to convert the characters
+     // since Latin1 is backwards compatible with ASCII.
+     return AllocateStringFromAscii(str, pretenure);
+  }
+  // Non-ASCII and we need to decode.
+  return AllocateStringFromLatin1Slow(str, pretenure);
+}
+
+
 MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
                                   int chars,
                                   uint32_t hash_field) {
index 9ba769212d757505a8bf2ab835f882eacacdafba..20921d0219ca85cae77670961c9bd3838873c1cb 100644 (file)
@@ -48,6 +48,7 @@
 #include "snapshot.h"
 #include "store-buffer.h"
 #include "v8threads.h"
+#include "v8utils.h"
 #include "vm-state-inl.h"
 #if V8_TARGET_ARCH_ARM && !V8_INTERPRETED_REGEXP
 #include "regexp-macro-assembler.h"
@@ -4390,7 +4391,8 @@ MaybeObject* Heap::ReinitializeJSGlobalProxy(JSFunction* constructor,
 
 MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
                                            PretenureFlag pretenure) {
-  if (string.length() == 1) {
+  int length = string.length();
+  if (length == 1) {
     return Heap::LookupSingleCharacterStringFromCode(string[0]);
   }
   Object* result;
@@ -4399,11 +4401,10 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
     if (!maybe_result->ToObject(&result)) return maybe_result;
   }
 
+  isolate_->counters()->string_length_ascii()->Increment(length);
+
   // Copy the characters into the new object.
-  SeqAsciiString* string_result = SeqAsciiString::cast(result);
-  for (int i = 0; i < string.length(); i++) {
-    string_result->SeqAsciiStringSet(i, string[i]);
-  }
+  CopyChars(SeqAsciiString::cast(result)->GetChars(), string.start(), length);
   return result;
 }
 
@@ -4430,41 +4431,63 @@ MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
     if (!maybe_result->ToObject(&result)) return maybe_result;
   }
 
+  isolate_->counters()->string_length_utf8()->Increment(chars);
+
   // Convert and copy the characters into the new object.
-  String* string_result = String::cast(result);
+  SeqTwoByteString* twobyte = SeqTwoByteString::cast(result);
   decoder->Reset(string.start(), string.length());
   int i = 0;
   while (i < chars) {
     uint32_t r = decoder->GetNext();
     if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
-      string_result->Set(i++, unibrow::Utf16::LeadSurrogate(r));
-      string_result->Set(i++, unibrow::Utf16::TrailSurrogate(r));
+      twobyte->SeqTwoByteStringSet(i++, unibrow::Utf16::LeadSurrogate(r));
+      twobyte->SeqTwoByteStringSet(i++, unibrow::Utf16::TrailSurrogate(r));
     } else {
-      string_result->Set(i++, r);
+      twobyte->SeqTwoByteStringSet(i++, r);
     }
   }
   return result;
 }
 
 
+MaybeObject* Heap::AllocateStringFromLatin1Slow(Vector<const char> string,
+                                                PretenureFlag pretenure) {
+  int chars = string.length();
+  Object* result;
+  { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
+    if (!maybe_result->ToObject(&result)) return maybe_result;
+  }
+
+  isolate_->counters()->string_length_latin1()->Increment(chars);
+
+  // Convert and copy the characters into the new object.
+  SeqTwoByteString* string_result = SeqTwoByteString::cast(result);
+  CopyChars(string_result->GetChars(),
+            reinterpret_cast<const unsigned char*>(string.start()),
+            chars);
+  return result;
+}
+
+
 MaybeObject* Heap::AllocateStringFromTwoByte(Vector<const uc16> string,
                                              PretenureFlag pretenure) {
   // Check if the string is an ASCII string.
-  MaybeObject* maybe_result;
-  if (String::IsAscii(string.start(), string.length())) {
-    maybe_result = AllocateRawAsciiString(string.length(), pretenure);
-  } else {  // It's not an ASCII string.
-    maybe_result = AllocateRawTwoByteString(string.length(), pretenure);
-  }
   Object* result;
-  if (!maybe_result->ToObject(&result)) return maybe_result;
+  int length = string.length();
+  const uc16* start = string.start();
 
-  // Copy the characters into the new object, which may be either ASCII or
-  // UTF-16.
-  String* string_result = String::cast(result);
-  for (int i = 0; i < string.length(); i++) {
-    string_result->Set(i, string[i]);
+  if (String::IsAscii(start, length)) {
+    MaybeObject* maybe_result = AllocateRawAsciiString(length, pretenure);
+    if (!maybe_result->ToObject(&result)) return maybe_result;
+    isolate_->counters()->string_length_ascii()->Increment(length);
+    CopyChars(SeqAsciiString::cast(result)->GetChars(), start, length);
+  } else {  // It's not an ASCII string.
+    MaybeObject* maybe_result = AllocateRawTwoByteString(length, pretenure);
+    if (!maybe_result->ToObject(&result)) return maybe_result;
+    isolate_->counters()->string_length_utf16()->Increment(length);
+    CopyChars(SeqTwoByteString::cast(result)->GetChars(), start, length);
   }
+
   return result;
 }
 
index ba340a24e6a2dc11d044d4546bfca6e515aded15..a27fb0c0efdb3dd09edd8f3aab5166ec8afd11ef 100644 (file)
@@ -679,10 +679,18 @@ class Heap {
       PretenureFlag pretenure = NOT_TENURED);
   MUST_USE_RESULT inline MaybeObject* AllocateStringFromUtf8(
       Vector<const char> str,
-      PretenureFlag pretenure = NOT_TENURED);
+      PretenureFlag pretenure = NOT_TENURED,
+      String::AsciiHint ascii_hint = String::MAYBE_ASCII);
   MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);
+  MUST_USE_RESULT inline MaybeObject* AllocateStringFromLatin1(
+      Vector<const char> str,
+      PretenureFlag pretenure = NOT_TENURED,
+      String::AsciiHint ascii_hint = String::MAYBE_ASCII);
+  MUST_USE_RESULT MaybeObject* AllocateStringFromLatin1Slow(
+      Vector<const char> str,
+      PretenureFlag pretenure = NOT_TENURED);
   MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
       Vector<const uc16> str,
       PretenureFlag pretenure = NOT_TENURED);
index 720d96e0ea22d891ee22f4668b2cd4a866dc8d91..1f74a1be76209f837dca33bf08cca38d1a83490d 100644 (file)
@@ -7128,6 +7128,10 @@ class String: public HeapObject {
     friend class String;
   };
 
+  enum AsciiHint { MAYBE_ASCII = 0,
+                   ASCII       = 1,
+                   NOT_ASCII   = 2 };
+
   // Get and set the length of the string.
   inline int length();
   inline void set_length(int value);
index fad3454812377a159733766f87f49a930e362e5d..76be0a224301268f6532a690e91d6e2f6a7dfad7 100644 (file)
@@ -252,6 +252,10 @@ namespace internal {
   SC(string_add_make_two_char, V8.StringAddMakeTwoChar)               \
   SC(string_compare_native, V8.StringCompareNative)                   \
   SC(string_compare_runtime, V8.StringCompareRuntime)                 \
+  SC(string_length_utf8, V8.StringLengthUtf8)                         \
+  SC(string_length_ascii, V8.StringLengthAScii)                       \
+  SC(string_length_latin1, V8.StringLengthLatin1)                     \
+  SC(string_length_utf16, V8.StringLengthUtf16)                       \
   SC(regexp_entry_runtime, V8.RegExpEntryRuntime)                     \
   SC(regexp_entry_native, V8.RegExpEntryNative)                       \
   SC(number_to_string_native, V8.NumberToStringNative)                \
index 689286f904c7a5a9c8f7d6e2d938d7e337d7b921..4e1948630e0568655a1ff0fc9090416400373a0a 100644 (file)
@@ -714,9 +714,294 @@ TEST(ExternalStringWithDisposeHandling) {
 }
 
 
+static void TestNewLatin1String(int encoding1, int encoding2) {
+  const char* chars1 = "ASCII 123";
+  const char* chars1js = "'ASCII 123'";
+  int str1_len = strlen(chars1);
+  const char* chars2 = "Non-ASCII \xAB\xCD\xEF";
+  const char* chars2js = "'Non-ASCII \\u00ab\\u00cd\\u00ef'";
+  int str2_len = strlen(chars2);
+
+  Local<String> str1 = String::New(chars1, str1_len, encoding1);
+  Local<String> str2 = String::New(chars2, str2_len, encoding2);
+  Local<String> str1_compare = CompileRun(chars1js)->ToString();
+  Local<String> str2_compare = CompileRun(chars2js)->ToString();
+
+  if (encoding1 & String::NOT_ASCII_HINT) {
+    CHECK(v8::Utils::OpenHandle(*str1)->IsSeqTwoByteString());
+  } else {
+    CHECK(v8::Utils::OpenHandle(*str1)->IsSeqAsciiString());
+  }
+  CHECK(v8::Utils::OpenHandle(*str1_compare)->IsSeqAsciiString());
+  CHECK(v8::Utils::OpenHandle(*str2)->IsSeqTwoByteString());
+  CHECK(v8::Utils::OpenHandle(*str2_compare)->IsSeqTwoByteString());
+
+  CHECK(str1_compare->Equals(str1));
+  CHECK(str2_compare->Equals(str2));
+}
+
+
+TEST(CreateLatin1String) {
+  v8::HandleScope scope;
+  LocalContext env;
+
+  int latin1 = String::LATIN1_ENCODING;
+  int l_noascii = String::LATIN1_ENCODING | String::NOT_ASCII_HINT;
+  int l_ascii = String::LATIN1_ENCODING | String::ASCII_HINT;
+
+  TestNewLatin1String(latin1, latin1);
+  TestNewLatin1String(l_ascii, latin1);
+  TestNewLatin1String(l_noascii, l_noascii);
+}
+
+
+TEST(ExternalStringEncoding) {
+  v8::HandleScope scope;
+  LocalContext env;
+  int counter = 0;
+
+  { HandleScope scope;
+    uint16_t* two_byte_ascii = AsciiToTwoByteString("two byte ascii");
+    uint16_t* two_byte = AsciiToTwoByteString("two byte non-ascii \x99");
+    char* ascii = i::StrDup("ascii");
+
+    TestResource* two_byte_resource = new TestResource(two_byte, &counter);
+    TestResource* two_byte_ascii_resource =
+        new TestResource(two_byte_ascii, &counter);
+    TestAsciiResource* ascii_resource =
+        new TestAsciiResource(ascii, &counter);
+
+    Local<String> two_byte_external = String::NewExternal(two_byte_resource);
+    Local<String> two_byte_ascii_external =
+        String::NewExternal(two_byte_ascii_resource);
+    Local<String> ascii_external = String::NewExternal(ascii_resource);
+    Local<String> not_external = v8_str("not external");
+
+    CHECK_EQ(String::UTF_16_ENCODING | String::NOT_ASCII_HINT,
+             two_byte_external->GetExternalStringEncoding());
+    CHECK_EQ(String::UTF_16_ENCODING | String::ASCII_HINT,
+             two_byte_ascii_external->GetExternalStringEncoding());
+    CHECK_EQ(String::LATIN1_ENCODING | String::ASCII_HINT,
+             ascii_external->GetExternalStringEncoding());
+    CHECK_EQ(String::INVALID_ENCODING,
+             not_external->GetExternalStringEncoding());
+
+    CHECK_EQ(two_byte_resource, two_byte_external->GetExternalStringResource());
+    CHECK_EQ(two_byte_ascii_resource,
+             two_byte_ascii_external->GetExternalStringResourceBase());
+    CHECK_EQ(ascii_resource, ascii_external->GetExternalStringResourceBase());
+
+    CHECK_EQ(0, counter);
+  }
+
+  HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
+
+  CHECK_EQ(3, counter);
+}
+
+
+TEST(WriteLatin1String) {
+  HandleScope scope;
+  LocalContext env;
+  const char* latin1_ascii = "latin1 ascii";
+  const char* latin1 = "\x99 latin1 non-ascii \xF8";
+  const char* concat = "latin1 ascii\x99 latin1 non-ascii \xF8";
+  const char* sub = "latin1 non-ascii \xF8";
+
+  Local<String> latin1_ascii_string = String::New(latin1_ascii,
+                                                  String::kUndefinedLength,
+                                                  String::LATIN1_ENCODING);
+  Local<String> latin1_string = String::New(latin1,
+                                            String::kUndefinedLength,
+                                            String::LATIN1_ENCODING);
+  Local<String> concat_string = String::Concat(latin1_ascii_string,
+                                               latin1_string);
+  Local<String> sub_string = v8::Utils::ToLocal(
+      FACTORY->NewSubString(
+          v8::Utils::OpenHandle(*latin1_string), 2, latin1_string->Length()));
+
+  CHECK(v8::Utils::OpenHandle(*latin1_ascii_string)->IsSeqAsciiString());
+  CHECK(v8::Utils::OpenHandle(*latin1_string)->IsSeqTwoByteString());
+  CHECK(v8::Utils::OpenHandle(*concat_string)->IsConsString());
+  CHECK(v8::Utils::OpenHandle(*sub_string)->IsSlicedString());
+
+  char buffer[64];
+  CHECK_EQ(strlen(latin1_ascii), latin1_ascii_string->WriteLatin1(buffer));
+  CHECK_EQ(0, strcmp(latin1_ascii, buffer));
+  CHECK_EQ(strlen(latin1), latin1_string->WriteLatin1(buffer));
+  CHECK_EQ(0, strcmp(latin1, buffer));
+  CHECK_EQ(strlen(concat), concat_string->WriteLatin1(buffer));
+  CHECK_EQ(0, strcmp(concat, buffer));
+  CHECK_EQ(strlen(sub), sub_string->WriteLatin1(buffer));
+  CHECK_EQ(0, strcmp(sub, buffer));
+
+  memset(buffer, 0x1, sizeof(buffer));
+  CHECK_EQ(strlen(latin1),
+           latin1_string->WriteLatin1(buffer,
+                                      0,
+                                      String::kUndefinedLength,
+                                      String::NO_NULL_TERMINATION));
+  CHECK_EQ(0, strncmp(latin1, buffer, strlen(latin1)));
+  CHECK_NE(0, strcmp(latin1, buffer));
+  buffer[strlen(latin1)] = '\0';
+  CHECK_EQ(0, strcmp(latin1, buffer));
+
+  CHECK_EQ(strlen(latin1) - 2,
+           latin1_string->WriteLatin1(buffer, 2));
+  CHECK_EQ(0, strncmp(latin1 + 2, buffer, strlen(latin1)));
+}
+
+
+class TestLatin1Resource: public String::ExternalLatin1StringResource {
+ public:
+  explicit TestLatin1Resource(const char* data, int* counter = NULL)
+    : data_(data), length_(strlen(data)), counter_(counter) { }
+
+  ~TestLatin1Resource() {
+    i::DeleteArray(data_);
+    if (counter_ != NULL) ++*counter_;
+  }
+
+  const char* data() const {
+    return data_;
+  }
+
+  size_t length() const {
+    return length_;
+  }
+ private:
+  const char* data_;
+  size_t length_;
+  int* counter_;
+};
+
+
+TEST(ExternalLatin1String) {
+  HandleScope scope;
+  LocalContext env;
+  int counter = 0;
+
+  { HandleScope scope;
+    char* latin1_ascii_a = i::StrDup("latin1 ascii a");
+    char* latin1_ascii_b = i::StrDup("latin1 ascii b");
+    char* latin1_a = i::StrDup("latin non-ascii \xAA");
+    char* latin1_b = i::StrDup("latin non-ascii \xBB");
+
+    TestLatin1Resource* latin1_ascii_a_resource =
+        new TestLatin1Resource(latin1_ascii_a, &counter);
+    TestLatin1Resource* latin1_ascii_b_resource =
+        new TestLatin1Resource(latin1_ascii_b, &counter);
+    TestLatin1Resource* latin1_a_resource =
+        new TestLatin1Resource(latin1_a, &counter);
+    TestLatin1Resource* latin1_b_resource =
+        new TestLatin1Resource(latin1_b, &counter);
+
+    Local<String> latin1_ascii_a_external =
+        String::NewExternal(latin1_ascii_a_resource);
+    Local<String> latin1_ascii_b_external = String::NewExternal(
+        latin1_ascii_b_resource,
+        String::LATIN1_ENCODING | String::ASCII_HINT);
+    CHECK_EQ(0, counter);
+
+    // Non-ascii latin1 strings are internalized immediately as two-byte
+    // string and the external resource is disposed.
+    Local<String> latin1_a_external = String::NewExternal(latin1_a_resource);
+    Local<String> latin1_b_external = String::NewExternal(
+        latin1_b_resource, String::LATIN1_ENCODING | String::NOT_ASCII_HINT);
+    CHECK(v8::Utils::OpenHandle(*latin1_a_external)->IsSeqTwoByteString());
+    CHECK(v8::Utils::OpenHandle(*latin1_b_external)->IsSeqTwoByteString());
+    CHECK_EQ(2, counter);
+
+    CHECK_EQ(latin1_ascii_a_external->GetExternalStringEncoding(),
+             (v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
+    CHECK_EQ(latin1_ascii_b_external->GetExternalStringEncoding(),
+             (v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
+    CHECK_EQ(latin1_a_external->GetExternalStringEncoding(),
+             v8::String::INVALID_ENCODING);
+    CHECK_EQ(latin1_b_external->GetExternalStringEncoding(),
+             v8::String::INVALID_ENCODING);
+
+    CHECK_EQ(latin1_ascii_a_resource,
+             latin1_ascii_a_external->GetExternalStringResourceBase());
+    CHECK_EQ(latin1_ascii_b_resource,
+             latin1_ascii_b_external->GetExternalStringResourceBase());
+  }
+
+  HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
+  CHECK_EQ(4, counter);
+}
+
+
+TEST(ExternalizeLatin1String) {
+  HandleScope scope;
+  LocalContext env;
+  int counter = 0;
+
+  { HandleScope scope;
+    Local<String> latin1_a_ascii = String::New("latin1 a ascii");
+    Local<String> latin1_b_ascii = String::New("latin1 b ascii");
+    Local<String> latin1 = String::New("latin1 non-ascii \xAA",
+                                        String::kUndefinedLength,
+                                        String::LATIN1_ENCODING);
+
+    CHECK(v8::Utils::OpenHandle(*latin1_a_ascii)->IsSeqAsciiString());
+    CHECK(v8::Utils::OpenHandle(*latin1_b_ascii)->IsSeqAsciiString());
+    CHECK(v8::Utils::OpenHandle(*latin1)->IsSeqTwoByteString());
+
+    // Run GC twice to put those strings into old space for externalizing.
+    HEAP->CollectGarbage(i::NEW_SPACE);
+    HEAP->CollectGarbage(i::NEW_SPACE);
+
+    char* latin1_a_ascii_chars = i::NewArray<char>(64);
+    uint16_t* latin1_b_ascii_chars = i::NewArray<uint16_t>(64);
+    uint16_t* latin1_chars = i::NewArray<uint16_t>(64);
+
+    latin1_a_ascii->WriteLatin1(latin1_a_ascii_chars);
+    latin1_b_ascii->Write(latin1_b_ascii_chars);
+    latin1->Write(latin1_chars);
+
+    TestLatin1Resource* latin1_a_ascii_resource =
+        new TestLatin1Resource(latin1_a_ascii_chars, &counter);
+    TestResource* latin1_b_ascii_resource =
+        new TestResource(latin1_b_ascii_chars, &counter);
+    TestResource* latin1_resource =
+        new TestResource(latin1_chars, &counter);
+
+    CHECK(latin1_a_ascii->MakeExternal(latin1_a_ascii_resource));
+    CHECK(latin1_a_ascii->IsExternalAscii());
+    CHECK_EQ(latin1_a_ascii->GetExternalStringEncoding(),
+             (v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
+    CHECK_EQ(latin1_a_ascii_resource,
+             latin1_a_ascii->GetExternalStringResourceBase());
+    CHECK(latin1_a_ascii->Equals(String::New("latin1 a ascii")));
+
+    CHECK(latin1_b_ascii->MakeExternal(latin1_b_ascii_resource));
+    CHECK(latin1_b_ascii->IsExternal());
+    CHECK_EQ(latin1_b_ascii->GetExternalStringEncoding(),
+             (v8::String::UTF_16_ENCODING | v8::String::ASCII_HINT));
+    CHECK_EQ(latin1_b_ascii_resource,
+             latin1_b_ascii->GetExternalStringResourceBase());
+    CHECK(latin1_b_ascii->Equals(String::New("latin1 b ascii")));
+
+    CHECK(latin1->MakeExternal(latin1_resource));
+    CHECK(latin1->IsExternal());
+    CHECK_EQ(latin1->GetExternalStringEncoding(),
+             (v8::String::UTF_16_ENCODING | v8::String::NOT_ASCII_HINT));
+    CHECK_EQ(latin1_resource,
+             latin1->GetExternalStringResourceBase());
+    CHECK(latin1->Equals(String::New("latin1 non-ascii \xAA",
+                                     String::kUndefinedLength,
+                                     String::LATIN1_ENCODING)));
+  }
+
+  HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
+  CHECK_EQ(3, counter);
+}
+
+
 THREADED_TEST(StringConcat) {
   {
-    v8::HandleScope scope;
+    HandleScope scope;
     LocalContext env;
     const char* one_byte_string_1 = "function a_times_t";
     const char* two_byte_string_1 = "wo_plus_b(a, b) {return ";