PRESERVE_ASCII_NULL = 4
};
- // 16-bit character codes.
+
+ enum StringEncoding {
+ INVALID_ENCODING = 0,
+ UTF_8_ENCODING = 1,
+ LATIN1_ENCODING = 2,
+ UTF_16_ENCODING = 3,
+
+ ASCII_HINT = 1 << 16,
+ NOT_ASCII_HINT = 1 << 17
+ };
+
+ static const int kStringEncodingMask = 3;
+ static const int kAsciiHintMask = String::ASCII_HINT | String::NOT_ASCII_HINT;
+
+ static const int kUndefinedLength = -1;
+
+
+ // 16-bit UTF16 code units. PRESERVE_ASCII_NULL is not supported as option,
+ // null-characters are never converted to spaces.
V8EXPORT int Write(uint16_t* buffer,
int start = 0,
- int length = -1,
+ int length = kUndefinedLength,
int options = NO_OPTIONS) const;
- // ASCII characters.
+
+ // ASCII characters. Null-characters are converted to spaces unless
+ // PRESERVE_ASCII_NULL is set as option.
V8EXPORT int WriteAscii(char* buffer,
int start = 0,
- int length = -1,
+ int length = kUndefinedLength,
int options = NO_OPTIONS) const;
- // UTF-8 encoded characters.
+
+ // Latin1 characters. PRESERVE_ASCII_NULL is not supported as option,
+ // null-characters are never converted to spaces.
+ V8EXPORT int WriteLatin1(char* buffer,
+ int start = 0,
+ int length = kUndefinedLength,
+ int options = NO_OPTIONS) const;
+
+ // UTF-8 encoded characters. PRESERVE_ASCII_NULL is not supported as option,
+ // null-characters are never converted to spaces.
V8EXPORT int WriteUtf8(char* buffer,
- int length = -1,
+ int length = kUndefinedLength,
int* nchars_ref = NULL,
int options = NO_OPTIONS) const;
void operator=(const ExternalStringResourceBase&);
friend class v8::internal::Heap;
+ friend class v8::String;
};
/**
ExternalAsciiStringResource() {}
};
+ /**
+ * An ExternalLatin1StringResource is a wrapper around an Latin1-encoded
+ * string buffer that resides outside V8's heap. For usage in V8, a Latin1
+ * string is converted to ASCII or two-byte string depending on whether
+ * it contains non-ASCII characters.
+ */
+ class V8EXPORT ExternalLatin1StringResource
+ : public ExternalAsciiStringResource {
+ };
+
/**
* Get the ExternalStringResource for an external string. Returns
* NULL if IsExternal() doesn't return true.
V8EXPORT const ExternalAsciiStringResource* GetExternalAsciiStringResource()
const;
+ /**
+ * If the string is external, return its encoding (Latin1 or UTF16)
+ * and possibly a hint on whether the content is ASCII.
+ * Return String::INVALID_ENCODING otherwise.
+ */
+ inline int GetExternalStringEncoding() const;
+
+
+ /**
+ * Return the resource of the external string regardless of encoding.
+ * Call this only after having made sure that the string is indeed external!
+ */
+ inline ExternalStringResourceBase* GetExternalStringResourceBase() const;
+
static inline String* Cast(v8::Value* obj);
/**
- * Allocates a new string from either UTF-8 encoded or ASCII data.
- * The second parameter 'length' gives the buffer length.
- * If the data is UTF-8 encoded, the caller must
- * be careful to supply the length parameter.
- * If it is not given, the function calls
- * 'strlen' to determine the buffer length, it might be
- * wrong if 'data' contains a null character.
+ * Allocates a new string from either UTF-8 or Latin1-encoded data.
+ * The second parameter 'length' gives the buffer length. If the data may
+ * contain zero bytes, the caller must be careful to supply the length
+ * parameter. If it is not given, the function calls 'strlen' to determine
+ * the buffer length, it might be wrong if 'data' contains a null character.
+ * The third parameter specifies the encoding, which may include an hint
+ * whether the string contains ASCII characters. In the case of Latin1, the
+ * appropriate internal representation (UTF16 or ASCII) is chosen.
*/
- V8EXPORT static Local<String> New(const char* data, int length = -1);
+ V8EXPORT static Local<String> New(const char* data,
+ int length = kUndefinedLength,
+ int encoding = UTF_8_ENCODING);
- /** Allocates a new string from 16-bit character codes.*/
- V8EXPORT static Local<String> New(const uint16_t* data, int length = -1);
+ /** Allocates a new string from 16-bit UTF-16 code units.*/
+ V8EXPORT static Local<String> New(const uint16_t* data,
+ int length = kUndefinedLength);
/** Creates a symbol. Returns one if it exists already.*/
- V8EXPORT static Local<String> NewSymbol(const char* data, int length = -1);
+ V8EXPORT static Local<String> NewSymbol(const char* data,
+ int length = kUndefinedLength,
+ int encoding = UTF_8_ENCODING);
/**
* Creates a new string by concatenating the left and the right strings
* this function should not otherwise delete or modify the resource. Neither
* should the underlying buffer be deallocated or modified except through the
* destructor of the external string resource.
- */ V8EXPORT static Local<String> NewExternal(
+ */
+ V8EXPORT static Local<String> NewExternal(
ExternalAsciiStringResource* resource);
/**
*/
V8EXPORT bool MakeExternal(ExternalAsciiStringResource* resource);
+
+ /**
+ * Creates a new external string using the Latin1-encoded data defined in the
+ * given resource. When the external string is no longer live on V8's heap
+ * the resource will be disposed by calling its Dispose method. The caller of
+ * this function should not otherwise delete or modify the resource. Neither
+ * should the underlying buffer be deallocated or modified except through the
+ * destructor of the external string resource.
+ * If the data contains a non-ASCII character, the string is created as a new
+ * string object on the V8 heap and the Dispose method is called on the
+ * resource immediately. This is because V8 is unable to handle non-ASCII
+ * Latin1-encoded strings internally.
+ */
+ V8EXPORT static Local<String> NewExternal(
+ ExternalLatin1StringResource* resource,
+ int encoding = String::LATIN1_ENCODING);
+
+
/**
* Returns true if this string can be made external.
*/
/** Creates an undetectable string from the supplied ASCII or UTF-8 data.*/
V8EXPORT static Local<String> NewUndetectable(const char* data,
- int length = -1);
+ int length = kUndefinedLength,
+ int encoding = UTF_8_ENCODING);
- /** Creates an undetectable string from the supplied 16-bit character codes.*/
+ /** Creates an undetectable string from the supplied 16-bit UTF16 code units.
+ */
V8EXPORT static Local<String> NewUndetectable(const uint16_t* data,
- int length = -1);
+ int length = kUndefinedLength);
/**
* Converts an object to a UTF-8-encoded character array. Useful if
};
private:
- V8EXPORT void VerifyExternalStringResource(ExternalStringResource* val) const;
+ V8EXPORT void VerifyExternalStringEncoding(int encoding) const;
+ V8EXPORT void VerifyExternalStringResourceBase(
+ ExternalStringResourceBase* val) const;
V8EXPORT static void CheckCast(v8::Value* obj);
};
static const int kJSObjectHeaderSize = 3 * kApiPointerSize;
static const int kFullStringRepresentationMask = 0x07;
static const int kExternalTwoByteRepresentationTag = 0x02;
+ static const int kExternalAsciiRepresentationTag = 0x06;
+ static const int kExternalAsciiDataHintMask = 0x08;
+ static const int kExternalAsciiDataHintTag = 0x08;
static const int kIsolateStateOffset = 0;
static const int kIsolateEmbedderDataOffset = 1 * kApiPointerSize;
}
}
- static inline bool IsExternalTwoByteString(int instance_type) {
- int representation = (instance_type & kFullStringRepresentationMask);
- return representation == kExternalTwoByteRepresentationTag;
- }
-
static inline bool IsInitialized(v8::Isolate* isolate) {
uint8_t* addr = reinterpret_cast<uint8_t*>(isolate) + kIsolateStateOffset;
return *reinterpret_cast<int*>(addr) == 1;
String::ExternalStringResource* String::GetExternalStringResource() const {
typedef internal::Object O;
typedef internal::Internals I;
+ String::ExternalStringResource* result = NULL;
O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
- String::ExternalStringResource* result;
- if (I::IsExternalTwoByteString(I::GetInstanceType(obj))) {
- void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
- result = reinterpret_cast<String::ExternalStringResource*>(value);
- } else {
- result = NULL;
+ if ((I::GetInstanceType(obj) & I::kFullStringRepresentationMask) ==
+ I::kExternalTwoByteRepresentationTag) {
+ result = reinterpret_cast<String::ExternalStringResource*>(
+ GetExternalStringResourceBase());
}
+ return result;
+}
+
+
+int String::GetExternalStringEncoding() const {
+ typedef internal::Object O;
+ typedef internal::Internals I;
+ O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
+ static const int kRepresentationAndHintMask =
+ I::kFullStringRepresentationMask | I::kExternalAsciiDataHintMask;
+
+ int encoding;
+ switch (I::GetInstanceType(obj) & kRepresentationAndHintMask) {
+ case I::kExternalTwoByteRepresentationTag | I::kExternalAsciiDataHintTag:
+ encoding = UTF_16_ENCODING | ASCII_HINT;
+ break;
+ case I::kExternalTwoByteRepresentationTag:
+ encoding = UTF_16_ENCODING | NOT_ASCII_HINT;
+ break;
+ case I::kExternalAsciiRepresentationTag:
+ encoding = LATIN1_ENCODING | ASCII_HINT;
+ break;
+ default:
+ encoding = INVALID_ENCODING;
+ break;
+ }
+#ifdef V8_ENABLE_CHECKS
+ VerifyExternalStringEncoding(encoding);
+#endif
+ return encoding;
+}
+
+
+String::ExternalStringResourceBase* String::GetExternalStringResourceBase()
+ const {
+ typedef internal::Object O;
+ typedef internal::Internals I;
+ O* obj = *reinterpret_cast<O**>(const_cast<String*>(this));
+ void* value = I::ReadField<void*>(obj, I::kStringResourceOffset);
+ ExternalStringResourceBase* result =
+ reinterpret_cast<String::ExternalStringResourceBase*>(value);
#ifdef V8_ENABLE_CHECKS
- VerifyExternalStringResource(result);
+ VerifyExternalStringResourceBase(result);
#endif
return result;
}
int string_length = str->length();
if (str->IsAsciiRepresentation()) {
int len;
- if (capacity == -1) {
+ if (capacity == kUndefinedLength) {
capacity = str->length() + 1;
len = string_length;
} else {
return len;
}
- if (capacity == -1 || capacity / 3 >= string_length) {
+ if (capacity == kUndefinedLength || capacity / 3 >= string_length) {
int32_t previous = unibrow::Utf16::kNoPreviousCharacter;
const int kMaxRecursion = 100;
int utf8_bytes =
int utf8_bytes = i::Utf8Length(str);
if ((options & NO_NULL_TERMINATION) == 0) utf8_bytes++;
if (utf8_bytes <= capacity) {
- return WriteUtf8(buffer, -1, nchars_ref, options);
+ return WriteUtf8(buffer, kUndefinedLength, nchars_ref, options);
}
}
int pos = 0;
int nchars = 0;
int previous = unibrow::Utf16::kNoPreviousCharacter;
- for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
+ for (i = 0;
+ i < len && (capacity == kUndefinedLength || pos < fast_end);
+ i++) {
i::uc32 c = write_input_buffer.GetNext();
int written = unibrow::Utf8::Encode(buffer + pos, c, previous);
pos += written;
}
if (nchars_ref != NULL) *nchars_ref = nchars;
if (!(options & NO_NULL_TERMINATION) &&
- (i == len && (capacity == -1 || pos < capacity))) {
+ (i == len && (capacity == kUndefinedLength || pos < capacity))) {
buffer[pos++] = '\0';
}
return pos;
if (IsDeadCheck(isolate, "v8::String::WriteAscii()")) return 0;
LOG_API(isolate, "String::WriteAscii");
ENTER_V8(isolate);
- ASSERT(start >= 0 && length >= -1);
+ ASSERT(start >= 0 && length >= kUndefinedLength);
i::Handle<i::String> str = Utils::OpenHandle(this);
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
if (str->IsAsciiRepresentation()) {
// WriteToFlat is faster than using the StringInputBuffer.
- if (length == -1) length = str->length() + 1;
+ if (length == kUndefinedLength) length = str->length() + 1;
int len = i::Min(length, str->length() - start);
i::String::WriteToFlat(*str, buffer, start, start + len);
if (!(options & PRESERVE_ASCII_NULL)) {
i::StringInputBuffer& write_input_buffer = *isolate->write_input_buffer();
int end = length;
- if ((length == -1) || (length > str->length() - start)) {
+ if ((length == kUndefinedLength) || (length > str->length() - start)) {
end = str->length() - start;
}
if (end < 0) return 0;
}
+int String::WriteLatin1(char* buffer,
+ int start,
+ int length,
+ int options) const {
+ i::Isolate* isolate = Utils::OpenHandle(this)->GetIsolate();
+ if (IsDeadCheck(isolate, "v8::String::WriteLatin1()")) return 0;
+ LOG_API(isolate, "String::WriteLatin1");
+ ENTER_V8(isolate);
+ ASSERT(start >= 0 && length >= kUndefinedLength);
+ i::Handle<i::String> str = Utils::OpenHandle(this);
+ isolate->string_tracker()->RecordWrite(str);
+ if (options & HINT_MANY_WRITES_EXPECTED) {
+ FlattenString(str); // Flatten the string for efficiency.
+ }
+
+ if (length == kUndefinedLength) length = str->length() + 1;
+ int len = i::Min(length, str->length() - start);
+ i::String::WriteToFlat(*str, buffer, start, start + len);
+ if (!(options & NO_NULL_TERMINATION) && length > len) {
+ buffer[len] = '\0';
+ }
+ return len;
+}
+
+
int String::Write(uint16_t* buffer,
int start,
int length,
if (IsDeadCheck(isolate, "v8::String::Write()")) return 0;
LOG_API(isolate, "String::Write");
ENTER_V8(isolate);
- ASSERT(start >= 0 && length >= -1);
+ ASSERT(start >= 0 && length >= kUndefinedLength);
i::Handle<i::String> str = Utils::OpenHandle(this);
isolate->string_tracker()->RecordWrite(str);
if (options & HINT_MANY_WRITES_EXPECTED) {
FlattenString(str);
}
int end = start + length;
- if ((length == -1) || (length > str->length() - start) )
+ if ((length == kUndefinedLength) || (length > str->length() - start) )
end = str->length();
if (end < 0) return 0;
i::String::WriteToFlat(*str, buffer, start, end);
}
-void v8::String::VerifyExternalStringResource(
- v8::String::ExternalStringResource* value) const {
+void v8::String::VerifyExternalStringEncoding(int encoding) const {
+ typedef internal::Internals I;
i::Handle<i::String> str = Utils::OpenHandle(this);
- const v8::String::ExternalStringResource* expected;
+ switch (encoding) {
+ case UTF_16_ENCODING | ASCII_HINT:
+ CHECK(str->HasOnlyAsciiChars());
+ // Fall through
+ case UTF_16_ENCODING | NOT_ASCII_HINT :
+ CHECK(str->IsExternalTwoByteString());
+ break;
+ case LATIN1_ENCODING | ASCII_HINT:
+ CHECK(str->IsExternalAsciiString());
+ break;
+ default:
+ CHECK_EQ(INVALID_ENCODING, encoding);
+ CHECK(!str->IsExternalString());
+ break;
+ }
+}
+
+
+void v8::String::VerifyExternalStringResourceBase(
+ v8::String::ExternalStringResourceBase* value) const {
+ i::Handle<i::String> str = Utils::OpenHandle(this);
+ i::StringShape shape(*str);
+ const void* expected;
+ // We expect an external string at this point since GetExternalStringEncoding
+ // should have already been called to rule out non-external strings.
if (i::StringShape(*str).IsExternalTwoByte()) {
- const void* resource =
- i::Handle<i::ExternalTwoByteString>::cast(str)->resource();
- expected = reinterpret_cast<const ExternalStringResource*>(resource);
+ expected = i::ExternalTwoByteString::cast(*str)->resource();
} else {
- expected = NULL;
+ ASSERT(i::StringShape(*str).IsExternalAscii());
+ expected = i::ExternalAsciiString::cast(*str)->resource();
}
- CHECK_EQ(expected, value);
+
+ CHECK_EQ(expected,
+ reinterpret_cast<const ExternalStringResourceBase*>(value));
}
}
-Local<String> v8::String::New(const char* data, int length) {
+static i::Handle<i::String> NewOneByteEncodedString(
+ i::Factory* factory, const char* data, int length, int encoding) {
+ if (length == String::kUndefinedLength) length = i::StrLength(data);
+ typedef v8::String S;
+
+ static const int kAsciiHintShift = 16;
+ ASSERT(IS_POWER_OF_TWO(encoding & S::kAsciiHintMask));
+ i::String::AsciiHint ascii_hint =
+ static_cast<i::String::AsciiHint>(encoding >> kAsciiHintShift);
+ STATIC_ASSERT(i::String::MAYBE_ASCII == 0);
+ STATIC_ASSERT(i::String::NOT_ASCII ==
+ (v8::String::NOT_ASCII_HINT >> kAsciiHintShift));
+ STATIC_ASSERT(i::String::ASCII ==
+ (v8::String::ASCII_HINT >> kAsciiHintShift));
+
+ int masked_encoding = encoding & S::kStringEncodingMask;
+
+ if (masked_encoding == S::UTF_8_ENCODING) {
+ return factory->NewStringFromUtf8(
+ i::Vector<const char>(data, length), i::NOT_TENURED, ascii_hint);
+ } else if (masked_encoding == S::LATIN1_ENCODING) {
+ return factory->NewStringFromLatin1(
+ i::Vector<const char>(data, length), i::NOT_TENURED, ascii_hint);
+ } else { // Wrong encoding.
+ return i::Handle<i::String>();
+ }
+}
+
+
+Local<String> v8::String::New(
+ const char* data, int length, int encoding) {
i::Isolate* isolate = i::Isolate::Current();
EnsureInitializedForIsolate(isolate, "v8::String::New()");
LOG_API(isolate, "String::New(char)");
if (length == 0) return Empty();
ENTER_V8(isolate);
- if (length == -1) length = i::StrLength(data);
- i::Handle<i::String> result =
- isolate->factory()->NewStringFromUtf8(
- i::Vector<const char>(data, length));
- return Utils::ToLocal(result);
+ return Utils::ToLocal(
+ NewOneByteEncodedString(isolate->factory(), data, length, encoding));
}
}
-Local<String> v8::String::NewUndetectable(const char* data, int length) {
+Local<String> v8::String::NewUndetectable(
+ const char* data, int length, int encoding) {
i::Isolate* isolate = i::Isolate::Current();
EnsureInitializedForIsolate(isolate, "v8::String::NewUndetectable()");
LOG_API(isolate, "String::NewUndetectable(char)");
ENTER_V8(isolate);
- if (length == -1) length = i::StrLength(data);
i::Handle<i::String> result =
- isolate->factory()->NewStringFromUtf8(
- i::Vector<const char>(data, length));
+ NewOneByteEncodedString(isolate->factory(), data, length, encoding);
result->MarkAsUndetectable();
return Utils::ToLocal(result);
}
LOG_API(isolate, "String::New(uint16_)");
if (length == 0) return Empty();
ENTER_V8(isolate);
- if (length == -1) length = TwoByteStringLength(data);
+ if (length == kUndefinedLength) length = TwoByteStringLength(data);
i::Handle<i::String> result =
isolate->factory()->NewStringFromTwoByte(
i::Vector<const uint16_t>(data, length));
EnsureInitializedForIsolate(isolate, "v8::String::NewUndetectable()");
LOG_API(isolate, "String::NewUndetectable(uint16_)");
ENTER_V8(isolate);
- if (length == -1) length = TwoByteStringLength(data);
+ if (length == kUndefinedLength) length = TwoByteStringLength(data);
i::Handle<i::String> result =
isolate->factory()->NewStringFromTwoByte(
i::Vector<const uint16_t>(data, length));
}
-bool v8::String::MakeExternal(v8::String::ExternalStringResource* resource) {
- i::Handle<i::String> obj = Utils::OpenHandle(this);
- i::Isolate* isolate = obj->GetIsolate();
+template<class StringResourceType>
+static bool MakeStringExternal(
+ i::Handle<i::String> string, StringResourceType* resource) {
+ i::Isolate* isolate = string->GetIsolate();
if (IsDeadCheck(isolate, "v8::String::MakeExternal()")) return false;
- if (i::StringShape(*obj).IsExternalTwoByte()) {
+ if (i::StringShape(*string).IsExternal()) {
return false; // Already an external string.
}
ENTER_V8(isolate);
- if (isolate->string_tracker()->IsFreshUnusedString(obj)) {
+ if (isolate->string_tracker()->IsFreshUnusedString(string)) {
return false;
}
if (isolate->heap()->IsInGCPostProcessing()) {
return false;
}
CHECK(resource && resource->data());
- bool result = obj->MakeExternal(resource);
- if (result && !obj->IsSymbol()) {
- isolate->heap()->external_string_table()->AddString(*obj);
+ bool result = string->MakeExternal(resource);
+ if (result && !string->IsSymbol()) {
+ isolate->heap()->external_string_table()->AddString(*string);
}
return result;
}
+bool v8::String::MakeExternal(ExternalStringResource* resource) {
+ i::Handle<i::String> obj = Utils::OpenHandle(this);
+ return MakeStringExternal(obj, resource);
+}
+
+
+bool v8::String::MakeExternal(ExternalAsciiStringResource* resource) {
+ i::Handle<i::String> obj = Utils::OpenHandle(this);
+ ASSERT(obj->HasOnlyAsciiChars());
+ return MakeStringExternal(obj, resource);
+}
+
+
Local<String> v8::String::NewExternal(
v8::String::ExternalAsciiStringResource* resource) {
i::Isolate* isolate = i::Isolate::Current();
}
-bool v8::String::MakeExternal(
- v8::String::ExternalAsciiStringResource* resource) {
- i::Handle<i::String> obj = Utils::OpenHandle(this);
- i::Isolate* isolate = obj->GetIsolate();
- if (IsDeadCheck(isolate, "v8::String::MakeExternal()")) return false;
- if (i::StringShape(*obj).IsExternalTwoByte()) {
- return false; // Already an external string.
- }
+Local<String> v8::String::NewExternal(ExternalLatin1StringResource* resource,
+ int encoding) {
+ typedef v8::internal::Internals I;
+ i::Isolate* isolate = i::Isolate::Current();
+ EnsureInitializedForIsolate(isolate, "v8::String::NewExternal()");
+ LOG_API(isolate, "String::NewExternal");
ENTER_V8(isolate);
- if (isolate->string_tracker()->IsFreshUnusedString(obj)) {
- return false;
- }
- if (isolate->heap()->IsInGCPostProcessing()) {
- return false;
- }
+ ASSERT((encoding & kStringEncodingMask) == LATIN1_ENCODING);
CHECK(resource && resource->data());
- bool result = obj->MakeExternal(resource);
- if (result && !obj->IsSymbol()) {
- isolate->heap()->external_string_table()->AddString(*obj);
+ bool ascii_hint = (encoding & kAsciiHintMask);
+ i::Handle<i::String> result;
+
+ if (ascii_hint == ASCII_HINT ||
+ (ascii_hint != NOT_ASCII_HINT &&
+ i::String::IsAscii(resource->data(), resource->length()))) {
+ // Assert that the ascii hint is correct.
+ ASSERT(ascii_hint != ASCII_HINT ||
+ i::String::IsAscii(resource->data(), resource->length()));
+ result = NewExternalAsciiStringHandle(isolate, resource);
+ isolate->heap()->external_string_table()->AddString(*result);
+ } else {
+ // We cannot simply take the backing store and use it as an ASCII string,
+ // since it's not. Instead, we convert it to an internal string and dispose
+ // the external resource.
+ result = isolate->factory()->NewStringFromLatin1(
+ i::Vector<const char>(resource->data(), resource->length()),
+ i::NOT_TENURED,
+ i::String::NOT_ASCII);
+ resource->Dispose();
}
- return result;
+ return Utils::ToLocal(result);
}
}
-Local<String> v8::String::NewSymbol(const char* data, int length) {
+Local<String> v8::String::NewSymbol(
+ const char* data, int length, int encoding) {
i::Isolate* isolate = i::Isolate::Current();
EnsureInitializedForIsolate(isolate, "v8::String::NewSymbol()");
LOG_API(isolate, "String::NewSymbol(char)");
ENTER_V8(isolate);
- if (length == -1) length = i::StrLength(data);
- i::Handle<i::String> result =
- isolate->factory()->LookupSymbol(i::Vector<const char>(data, length));
+ if (length == kUndefinedLength) length = i::StrLength(data);
+ i::Handle<i::String> result;
+
+ ASSERT(IS_POWER_OF_TWO(encoding & kAsciiHintMask));
+ if (((encoding & kStringEncodingMask) == LATIN1_ENCODING) &&
+ ((encoding & kAsciiHintMask) == NOT_ASCII_HINT ||
+ !i::String::IsAscii(data, length))) {
+ result = isolate->factory()->NewStringFromLatin1(
+ i::Vector<const char>(data, length),
+ i::NOT_TENURED,
+ i::String::NOT_ASCII);
+ result = isolate->factory()->LookupSymbol(result);
+ } else { // We can handle UTF8 and ASCII strings here.
+ result =
+ isolate->factory()->LookupSymbol(i::Vector<const char>(data, length));
+ }
return Utils::ToLocal(result);
}
}
Handle<String> Factory::NewStringFromUtf8(Vector<const char> string,
- PretenureFlag pretenure) {
+ PretenureFlag pretenure,
+ String::AsciiHint ascii_hint) {
CALL_HEAP_FUNCTION(
isolate(),
- isolate()->heap()->AllocateStringFromUtf8(string, pretenure),
+ isolate()->heap()->AllocateStringFromUtf8(
+ string, pretenure, ascii_hint),
+ String);
+}
+
+
+Handle<String> Factory::NewStringFromLatin1(Vector<const char> string,
+ PretenureFlag pretenure,
+ String::AsciiHint ascii_hint) {
+ CALL_HEAP_FUNCTION(
+ isolate(),
+ isolate()->heap()->AllocateStringFromLatin1(
+ string, pretenure, ascii_hint),
String);
}
// flags in the parser.
Handle<String> NewStringFromUtf8(
Vector<const char> str,
- PretenureFlag pretenure = NOT_TENURED);
+ PretenureFlag pretenure = NOT_TENURED,
+ String::AsciiHint ascii_hint = String::MAYBE_ASCII);
+
+ Handle<String> NewStringFromLatin1(
+ Vector<const char> str,
+ PretenureFlag pretenure = NOT_TENURED,
+ String::AsciiHint ascii_hint = String::MAYBE_ASCII);
Handle<String> NewStringFromTwoByte(
Vector<const uc16> str,
MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
- PretenureFlag pretenure) {
- // Check for ASCII first since this is the common case.
- if (String::IsAscii(str.start(), str.length())) {
+ PretenureFlag pretenure,
+ String::AsciiHint ascii_hint) {
+ if ((ascii_hint == String::MAYBE_ASCII &&
+ String::IsAscii(str.start(), str.length())) ||
+ ascii_hint == String::ASCII) {
+ // Assert that the ASCII-hint is correct.
+ ASSERT(ascii_hint != String::ASCII ||
+ String::IsAscii(str.start(), str.length()));
// If the string is ASCII, we do not need to convert the characters
// since UTF8 is backwards compatible with ASCII.
return AllocateStringFromAscii(str, pretenure);
}
+MaybeObject* Heap::AllocateStringFromLatin1(Vector<const char> str,
+ PretenureFlag pretenure,
+ String::AsciiHint ascii_hint) {
+ if ((ascii_hint == String::MAYBE_ASCII &&
+ String::IsAscii(str.start(), str.length())) ||
+ ascii_hint == String::ASCII) {
+ // Assert that the strict ASCII-hint is correct.
+ ASSERT(ascii_hint != String::ASCII ||
+ String::IsAscii(str.start(), str.length()));
+ // If the string is ASCII, we do not need to convert the characters
+ // since Latin1 is backwards compatible with ASCII.
+ return AllocateStringFromAscii(str, pretenure);
+ }
+ // Non-ASCII and we need to decode.
+ return AllocateStringFromLatin1Slow(str, pretenure);
+}
+
+
MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
int chars,
uint32_t hash_field) {
#include "snapshot.h"
#include "store-buffer.h"
#include "v8threads.h"
+#include "v8utils.h"
#include "vm-state-inl.h"
#if V8_TARGET_ARCH_ARM && !V8_INTERPRETED_REGEXP
#include "regexp-macro-assembler.h"
MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
PretenureFlag pretenure) {
- if (string.length() == 1) {
+ int length = string.length();
+ if (length == 1) {
return Heap::LookupSingleCharacterStringFromCode(string[0]);
}
Object* result;
if (!maybe_result->ToObject(&result)) return maybe_result;
}
+ isolate_->counters()->string_length_ascii()->Increment(length);
+
// Copy the characters into the new object.
- SeqAsciiString* string_result = SeqAsciiString::cast(result);
- for (int i = 0; i < string.length(); i++) {
- string_result->SeqAsciiStringSet(i, string[i]);
- }
+ CopyChars(SeqAsciiString::cast(result)->GetChars(), string.start(), length);
return result;
}
if (!maybe_result->ToObject(&result)) return maybe_result;
}
+ isolate_->counters()->string_length_utf8()->Increment(chars);
+
// Convert and copy the characters into the new object.
- String* string_result = String::cast(result);
+ SeqTwoByteString* twobyte = SeqTwoByteString::cast(result);
decoder->Reset(string.start(), string.length());
int i = 0;
while (i < chars) {
uint32_t r = decoder->GetNext();
if (r > unibrow::Utf16::kMaxNonSurrogateCharCode) {
- string_result->Set(i++, unibrow::Utf16::LeadSurrogate(r));
- string_result->Set(i++, unibrow::Utf16::TrailSurrogate(r));
+ twobyte->SeqTwoByteStringSet(i++, unibrow::Utf16::LeadSurrogate(r));
+ twobyte->SeqTwoByteStringSet(i++, unibrow::Utf16::TrailSurrogate(r));
} else {
- string_result->Set(i++, r);
+ twobyte->SeqTwoByteStringSet(i++, r);
}
}
return result;
}
+MaybeObject* Heap::AllocateStringFromLatin1Slow(Vector<const char> string,
+ PretenureFlag pretenure) {
+ int chars = string.length();
+ Object* result;
+ { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
+ if (!maybe_result->ToObject(&result)) return maybe_result;
+ }
+
+ isolate_->counters()->string_length_latin1()->Increment(chars);
+
+ // Convert and copy the characters into the new object.
+ SeqTwoByteString* string_result = SeqTwoByteString::cast(result);
+ CopyChars(string_result->GetChars(),
+ reinterpret_cast<const unsigned char*>(string.start()),
+ chars);
+ return result;
+}
+
+
MaybeObject* Heap::AllocateStringFromTwoByte(Vector<const uc16> string,
PretenureFlag pretenure) {
// Check if the string is an ASCII string.
- MaybeObject* maybe_result;
- if (String::IsAscii(string.start(), string.length())) {
- maybe_result = AllocateRawAsciiString(string.length(), pretenure);
- } else { // It's not an ASCII string.
- maybe_result = AllocateRawTwoByteString(string.length(), pretenure);
- }
Object* result;
- if (!maybe_result->ToObject(&result)) return maybe_result;
+ int length = string.length();
+ const uc16* start = string.start();
- // Copy the characters into the new object, which may be either ASCII or
- // UTF-16.
- String* string_result = String::cast(result);
- for (int i = 0; i < string.length(); i++) {
- string_result->Set(i, string[i]);
+ if (String::IsAscii(start, length)) {
+ MaybeObject* maybe_result = AllocateRawAsciiString(length, pretenure);
+ if (!maybe_result->ToObject(&result)) return maybe_result;
+ isolate_->counters()->string_length_ascii()->Increment(length);
+ CopyChars(SeqAsciiString::cast(result)->GetChars(), start, length);
+ } else { // It's not an ASCII string.
+ MaybeObject* maybe_result = AllocateRawTwoByteString(length, pretenure);
+ if (!maybe_result->ToObject(&result)) return maybe_result;
+ isolate_->counters()->string_length_utf16()->Increment(length);
+ CopyChars(SeqTwoByteString::cast(result)->GetChars(), start, length);
}
+
return result;
}
PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT inline MaybeObject* AllocateStringFromUtf8(
Vector<const char> str,
- PretenureFlag pretenure = NOT_TENURED);
+ PretenureFlag pretenure = NOT_TENURED,
+ String::AsciiHint ascii_hint = String::MAYBE_ASCII);
MUST_USE_RESULT MaybeObject* AllocateStringFromUtf8Slow(
Vector<const char> str,
PretenureFlag pretenure = NOT_TENURED);
+ MUST_USE_RESULT inline MaybeObject* AllocateStringFromLatin1(
+ Vector<const char> str,
+ PretenureFlag pretenure = NOT_TENURED,
+ String::AsciiHint ascii_hint = String::MAYBE_ASCII);
+ MUST_USE_RESULT MaybeObject* AllocateStringFromLatin1Slow(
+ Vector<const char> str,
+ PretenureFlag pretenure = NOT_TENURED);
MUST_USE_RESULT MaybeObject* AllocateStringFromTwoByte(
Vector<const uc16> str,
PretenureFlag pretenure = NOT_TENURED);
friend class String;
};
+ enum AsciiHint { MAYBE_ASCII = 0,
+ ASCII = 1,
+ NOT_ASCII = 2 };
+
// Get and set the length of the string.
inline int length();
inline void set_length(int value);
SC(string_add_make_two_char, V8.StringAddMakeTwoChar) \
SC(string_compare_native, V8.StringCompareNative) \
SC(string_compare_runtime, V8.StringCompareRuntime) \
+ SC(string_length_utf8, V8.StringLengthUtf8) \
+ SC(string_length_ascii, V8.StringLengthAScii) \
+ SC(string_length_latin1, V8.StringLengthLatin1) \
+ SC(string_length_utf16, V8.StringLengthUtf16) \
SC(regexp_entry_runtime, V8.RegExpEntryRuntime) \
SC(regexp_entry_native, V8.RegExpEntryNative) \
SC(number_to_string_native, V8.NumberToStringNative) \
}
+static void TestNewLatin1String(int encoding1, int encoding2) {
+ const char* chars1 = "ASCII 123";
+ const char* chars1js = "'ASCII 123'";
+ int str1_len = strlen(chars1);
+ const char* chars2 = "Non-ASCII \xAB\xCD\xEF";
+ const char* chars2js = "'Non-ASCII \\u00ab\\u00cd\\u00ef'";
+ int str2_len = strlen(chars2);
+
+ Local<String> str1 = String::New(chars1, str1_len, encoding1);
+ Local<String> str2 = String::New(chars2, str2_len, encoding2);
+ Local<String> str1_compare = CompileRun(chars1js)->ToString();
+ Local<String> str2_compare = CompileRun(chars2js)->ToString();
+
+ if (encoding1 & String::NOT_ASCII_HINT) {
+ CHECK(v8::Utils::OpenHandle(*str1)->IsSeqTwoByteString());
+ } else {
+ CHECK(v8::Utils::OpenHandle(*str1)->IsSeqAsciiString());
+ }
+ CHECK(v8::Utils::OpenHandle(*str1_compare)->IsSeqAsciiString());
+ CHECK(v8::Utils::OpenHandle(*str2)->IsSeqTwoByteString());
+ CHECK(v8::Utils::OpenHandle(*str2_compare)->IsSeqTwoByteString());
+
+ CHECK(str1_compare->Equals(str1));
+ CHECK(str2_compare->Equals(str2));
+}
+
+
+TEST(CreateLatin1String) {
+ v8::HandleScope scope;
+ LocalContext env;
+
+ int latin1 = String::LATIN1_ENCODING;
+ int l_noascii = String::LATIN1_ENCODING | String::NOT_ASCII_HINT;
+ int l_ascii = String::LATIN1_ENCODING | String::ASCII_HINT;
+
+ TestNewLatin1String(latin1, latin1);
+ TestNewLatin1String(l_ascii, latin1);
+ TestNewLatin1String(l_noascii, l_noascii);
+}
+
+
+TEST(ExternalStringEncoding) {
+ v8::HandleScope scope;
+ LocalContext env;
+ int counter = 0;
+
+ { HandleScope scope;
+ uint16_t* two_byte_ascii = AsciiToTwoByteString("two byte ascii");
+ uint16_t* two_byte = AsciiToTwoByteString("two byte non-ascii \x99");
+ char* ascii = i::StrDup("ascii");
+
+ TestResource* two_byte_resource = new TestResource(two_byte, &counter);
+ TestResource* two_byte_ascii_resource =
+ new TestResource(two_byte_ascii, &counter);
+ TestAsciiResource* ascii_resource =
+ new TestAsciiResource(ascii, &counter);
+
+ Local<String> two_byte_external = String::NewExternal(two_byte_resource);
+ Local<String> two_byte_ascii_external =
+ String::NewExternal(two_byte_ascii_resource);
+ Local<String> ascii_external = String::NewExternal(ascii_resource);
+ Local<String> not_external = v8_str("not external");
+
+ CHECK_EQ(String::UTF_16_ENCODING | String::NOT_ASCII_HINT,
+ two_byte_external->GetExternalStringEncoding());
+ CHECK_EQ(String::UTF_16_ENCODING | String::ASCII_HINT,
+ two_byte_ascii_external->GetExternalStringEncoding());
+ CHECK_EQ(String::LATIN1_ENCODING | String::ASCII_HINT,
+ ascii_external->GetExternalStringEncoding());
+ CHECK_EQ(String::INVALID_ENCODING,
+ not_external->GetExternalStringEncoding());
+
+ CHECK_EQ(two_byte_resource, two_byte_external->GetExternalStringResource());
+ CHECK_EQ(two_byte_ascii_resource,
+ two_byte_ascii_external->GetExternalStringResourceBase());
+ CHECK_EQ(ascii_resource, ascii_external->GetExternalStringResourceBase());
+
+ CHECK_EQ(0, counter);
+ }
+
+ HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
+
+ CHECK_EQ(3, counter);
+}
+
+
+TEST(WriteLatin1String) {
+ HandleScope scope;
+ LocalContext env;
+ const char* latin1_ascii = "latin1 ascii";
+ const char* latin1 = "\x99 latin1 non-ascii \xF8";
+ const char* concat = "latin1 ascii\x99 latin1 non-ascii \xF8";
+ const char* sub = "latin1 non-ascii \xF8";
+
+ Local<String> latin1_ascii_string = String::New(latin1_ascii,
+ String::kUndefinedLength,
+ String::LATIN1_ENCODING);
+ Local<String> latin1_string = String::New(latin1,
+ String::kUndefinedLength,
+ String::LATIN1_ENCODING);
+ Local<String> concat_string = String::Concat(latin1_ascii_string,
+ latin1_string);
+ Local<String> sub_string = v8::Utils::ToLocal(
+ FACTORY->NewSubString(
+ v8::Utils::OpenHandle(*latin1_string), 2, latin1_string->Length()));
+
+ CHECK(v8::Utils::OpenHandle(*latin1_ascii_string)->IsSeqAsciiString());
+ CHECK(v8::Utils::OpenHandle(*latin1_string)->IsSeqTwoByteString());
+ CHECK(v8::Utils::OpenHandle(*concat_string)->IsConsString());
+ CHECK(v8::Utils::OpenHandle(*sub_string)->IsSlicedString());
+
+ char buffer[64];
+ CHECK_EQ(strlen(latin1_ascii), latin1_ascii_string->WriteLatin1(buffer));
+ CHECK_EQ(0, strcmp(latin1_ascii, buffer));
+ CHECK_EQ(strlen(latin1), latin1_string->WriteLatin1(buffer));
+ CHECK_EQ(0, strcmp(latin1, buffer));
+ CHECK_EQ(strlen(concat), concat_string->WriteLatin1(buffer));
+ CHECK_EQ(0, strcmp(concat, buffer));
+ CHECK_EQ(strlen(sub), sub_string->WriteLatin1(buffer));
+ CHECK_EQ(0, strcmp(sub, buffer));
+
+ memset(buffer, 0x1, sizeof(buffer));
+ CHECK_EQ(strlen(latin1),
+ latin1_string->WriteLatin1(buffer,
+ 0,
+ String::kUndefinedLength,
+ String::NO_NULL_TERMINATION));
+ CHECK_EQ(0, strncmp(latin1, buffer, strlen(latin1)));
+ CHECK_NE(0, strcmp(latin1, buffer));
+ buffer[strlen(latin1)] = '\0';
+ CHECK_EQ(0, strcmp(latin1, buffer));
+
+ CHECK_EQ(strlen(latin1) - 2,
+ latin1_string->WriteLatin1(buffer, 2));
+ CHECK_EQ(0, strncmp(latin1 + 2, buffer, strlen(latin1)));
+}
+
+
+class TestLatin1Resource: public String::ExternalLatin1StringResource {
+ public:
+ explicit TestLatin1Resource(const char* data, int* counter = NULL)
+ : data_(data), length_(strlen(data)), counter_(counter) { }
+
+ ~TestLatin1Resource() {
+ i::DeleteArray(data_);
+ if (counter_ != NULL) ++*counter_;
+ }
+
+ const char* data() const {
+ return data_;
+ }
+
+ size_t length() const {
+ return length_;
+ }
+ private:
+ const char* data_;
+ size_t length_;
+ int* counter_;
+};
+
+
+TEST(ExternalLatin1String) {
+ HandleScope scope;
+ LocalContext env;
+ int counter = 0;
+
+ { HandleScope scope;
+ char* latin1_ascii_a = i::StrDup("latin1 ascii a");
+ char* latin1_ascii_b = i::StrDup("latin1 ascii b");
+ char* latin1_a = i::StrDup("latin non-ascii \xAA");
+ char* latin1_b = i::StrDup("latin non-ascii \xBB");
+
+ TestLatin1Resource* latin1_ascii_a_resource =
+ new TestLatin1Resource(latin1_ascii_a, &counter);
+ TestLatin1Resource* latin1_ascii_b_resource =
+ new TestLatin1Resource(latin1_ascii_b, &counter);
+ TestLatin1Resource* latin1_a_resource =
+ new TestLatin1Resource(latin1_a, &counter);
+ TestLatin1Resource* latin1_b_resource =
+ new TestLatin1Resource(latin1_b, &counter);
+
+ Local<String> latin1_ascii_a_external =
+ String::NewExternal(latin1_ascii_a_resource);
+ Local<String> latin1_ascii_b_external = String::NewExternal(
+ latin1_ascii_b_resource,
+ String::LATIN1_ENCODING | String::ASCII_HINT);
+ CHECK_EQ(0, counter);
+
+ // Non-ascii latin1 strings are internalized immediately as two-byte
+ // string and the external resource is disposed.
+ Local<String> latin1_a_external = String::NewExternal(latin1_a_resource);
+ Local<String> latin1_b_external = String::NewExternal(
+ latin1_b_resource, String::LATIN1_ENCODING | String::NOT_ASCII_HINT);
+ CHECK(v8::Utils::OpenHandle(*latin1_a_external)->IsSeqTwoByteString());
+ CHECK(v8::Utils::OpenHandle(*latin1_b_external)->IsSeqTwoByteString());
+ CHECK_EQ(2, counter);
+
+ CHECK_EQ(latin1_ascii_a_external->GetExternalStringEncoding(),
+ (v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
+ CHECK_EQ(latin1_ascii_b_external->GetExternalStringEncoding(),
+ (v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
+ CHECK_EQ(latin1_a_external->GetExternalStringEncoding(),
+ v8::String::INVALID_ENCODING);
+ CHECK_EQ(latin1_b_external->GetExternalStringEncoding(),
+ v8::String::INVALID_ENCODING);
+
+ CHECK_EQ(latin1_ascii_a_resource,
+ latin1_ascii_a_external->GetExternalStringResourceBase());
+ CHECK_EQ(latin1_ascii_b_resource,
+ latin1_ascii_b_external->GetExternalStringResourceBase());
+ }
+
+ HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
+ CHECK_EQ(4, counter);
+}
+
+
+TEST(ExternalizeLatin1String) {
+ HandleScope scope;
+ LocalContext env;
+ int counter = 0;
+
+ { HandleScope scope;
+ Local<String> latin1_a_ascii = String::New("latin1 a ascii");
+ Local<String> latin1_b_ascii = String::New("latin1 b ascii");
+ Local<String> latin1 = String::New("latin1 non-ascii \xAA",
+ String::kUndefinedLength,
+ String::LATIN1_ENCODING);
+
+ CHECK(v8::Utils::OpenHandle(*latin1_a_ascii)->IsSeqAsciiString());
+ CHECK(v8::Utils::OpenHandle(*latin1_b_ascii)->IsSeqAsciiString());
+ CHECK(v8::Utils::OpenHandle(*latin1)->IsSeqTwoByteString());
+
+ // Run GC twice to put those strings into old space for externalizing.
+ HEAP->CollectGarbage(i::NEW_SPACE);
+ HEAP->CollectGarbage(i::NEW_SPACE);
+
+ char* latin1_a_ascii_chars = i::NewArray<char>(64);
+ uint16_t* latin1_b_ascii_chars = i::NewArray<uint16_t>(64);
+ uint16_t* latin1_chars = i::NewArray<uint16_t>(64);
+
+ latin1_a_ascii->WriteLatin1(latin1_a_ascii_chars);
+ latin1_b_ascii->Write(latin1_b_ascii_chars);
+ latin1->Write(latin1_chars);
+
+ TestLatin1Resource* latin1_a_ascii_resource =
+ new TestLatin1Resource(latin1_a_ascii_chars, &counter);
+ TestResource* latin1_b_ascii_resource =
+ new TestResource(latin1_b_ascii_chars, &counter);
+ TestResource* latin1_resource =
+ new TestResource(latin1_chars, &counter);
+
+ CHECK(latin1_a_ascii->MakeExternal(latin1_a_ascii_resource));
+ CHECK(latin1_a_ascii->IsExternalAscii());
+ CHECK_EQ(latin1_a_ascii->GetExternalStringEncoding(),
+ (v8::String::LATIN1_ENCODING | v8::String::ASCII_HINT));
+ CHECK_EQ(latin1_a_ascii_resource,
+ latin1_a_ascii->GetExternalStringResourceBase());
+ CHECK(latin1_a_ascii->Equals(String::New("latin1 a ascii")));
+
+ CHECK(latin1_b_ascii->MakeExternal(latin1_b_ascii_resource));
+ CHECK(latin1_b_ascii->IsExternal());
+ CHECK_EQ(latin1_b_ascii->GetExternalStringEncoding(),
+ (v8::String::UTF_16_ENCODING | v8::String::ASCII_HINT));
+ CHECK_EQ(latin1_b_ascii_resource,
+ latin1_b_ascii->GetExternalStringResourceBase());
+ CHECK(latin1_b_ascii->Equals(String::New("latin1 b ascii")));
+
+ CHECK(latin1->MakeExternal(latin1_resource));
+ CHECK(latin1->IsExternal());
+ CHECK_EQ(latin1->GetExternalStringEncoding(),
+ (v8::String::UTF_16_ENCODING | v8::String::NOT_ASCII_HINT));
+ CHECK_EQ(latin1_resource,
+ latin1->GetExternalStringResourceBase());
+ CHECK(latin1->Equals(String::New("latin1 non-ascii \xAA",
+ String::kUndefinedLength,
+ String::LATIN1_ENCODING)));
+ }
+
+ HEAP->CollectAllGarbage(i::Heap::kNoGCFlags);
+ CHECK_EQ(3, counter);
+}
+
+
THREADED_TEST(StringConcat) {
{
- v8::HandleScope scope;
+ HandleScope scope;
LocalContext env;
const char* one_byte_string_1 = "function a_times_t";
const char* two_byte_string_1 = "wo_plus_b(a, b) {return ";