// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
/** \mainpage V8 API Reference Guide
- *
- * V8 is Google's open source JavaScript engine.
+ *
+ * V8 is Google's open source JavaScript engine.
*
* This set of documents provides reference material generated from the
* V8 header file, include/v8.h.
/**
* Pre-compilation data that can be associated with a script. This
* data can be calculated for a script in advance of actually
- * compiling it, and can bestored between compilations. When script
+ * compiling it, and can be stored between compilations. When script
* data is given to the compile method compilation will be faster.
*/
class EXPORT ScriptData { // NOLINT
* Returns true if this value is boolean.
*/
bool IsBoolean();
-
+
/**
* Returns true if this value is a number.
*/
*/
class EXPORT String : public Primitive {
public:
+
+ /**
+ * Returns the number of characters in this string.
+ */
int Length();
/**
+ * Returns the number of bytes in the UTF-8 encoded
+ * representation of this string.
+ */
+ int Utf8Length();
+
+ /**
* Write the contents of the string to an external buffer.
* If no arguments are given, expects the buffer to be large
* enough to hold the entire string and NULL terminator. Copies
* excluding the NULL terminator.
*/
int Write(uint16_t* buffer, int start = 0, int length = -1); // UTF-16
- int WriteAscii(char* buffer,
- int start = 0,
- int length = -1); // literally ascii
+ int WriteAscii(char* buffer, int start = 0, int length = -1); // ASCII
+ int WriteUtf8(char* buffer, int length = -1); // UTF-8
/**
* Returns true if the string is external
static void IgnoreOutOfMemoryException();
/**
- * Check if V8 is dead and therefore unusable. This is the case after
+ * Check if V8 is dead and therefore unusable. This is the case after
* fatal errors such as out-of-memory situations.
*/
static bool IsDead();
}
+int String::Utf8Length() {
+ if (IsDeadCheck("v8::String::Utf8Length()")) return 0;
+ return Utils::OpenHandle(this)->Utf8Length();
+}
+
+
+int String::WriteUtf8(char* buffer, int capacity) {
+ if (IsDeadCheck("v8::String::WriteUtf8()")) return 0;
+ LOG_API("String::WriteUtf8");
+ i::Handle<i::String> str = Utils::OpenHandle(this);
+ write_input_buffer.Reset(0, *str);
+ int len = str->length();
+ // Encode the first K - 3 bytes directly into the buffer since we
+ // know there's room for them. If no capacity is given we copy all
+ // of them here.
+ int fast_end = capacity - (unibrow::Utf8::kMaxEncodedSize - 1);
+ int i;
+ int pos = 0;
+ for (i = 0; i < len && (capacity == -1 || pos < fast_end); i++) {
+ i::uc32 c = write_input_buffer.GetNext();
+ int written = unibrow::Utf8::Encode(buffer + pos, c);
+ pos += written;
+ }
+ if (i < len) {
+ // For the last characters we need to check the length for each one
+ // because they may be longer than the remaining space in the
+ // buffer.
+ char intermediate[unibrow::Utf8::kMaxEncodedSize];
+ for (; i < len && pos < capacity; i++) {
+ i::uc32 c = write_input_buffer.GetNext();
+ int written = unibrow::Utf8::Encode(intermediate, c);
+ if (pos + written <= capacity) {
+ for (int j = 0; j < written; j++)
+ buffer[pos + j] = intermediate[j];
+ pos += written;
+ } else {
+ // We've reached the end of the buffer
+ break;
+ }
+ }
+ }
+ if (i == len && (capacity == -1 || pos < capacity))
+ buffer[pos++] = '\0';
+ return pos;
+}
+
+
int String::WriteAscii(char* buffer, int start, int length) {
if (IsDeadCheck("v8::String::WriteAscii()")) return 0;
LOG_API("String::WriteAscii");
}
+int String::Utf8Length() {
+ if (is_ascii()) return length();
+ // Attempt to flatten before accessing the string. It probably
+ // doesn't make Utf8Length faster, but it is very likely that
+ // the string will be accessed later (for example by WriteUtf8)
+ // so it's still a good idea.
+ TryFlatten();
+ Access<StringInputBuffer> buffer(&string_input_buffer);
+ buffer->Reset(0, this);
+ int result = 0;
+ while (buffer->has_more())
+ result += unibrow::Utf8::Length(buffer->GetNext());
+ return result;
+}
+
+
SmartPointer<char> String::ToCString(AllowNullsFlag allow_nulls,
RobustnessFlag robust_flag,
int offset,
RobustnessFlag robustness_flag = FAST_STRING_TRAVERSAL,
int* length_output = 0);
+ int Utf8Length();
+
// Return a 16 bit Unicode representation of the string.
// The string should be nearly flat, otherwise the performance of
// of this method may be very bad. Setting robustness_flag to
TraverseFirst(flat_string, string, DEEP_ASCII_DEPTH);
}
}
+
+
+TEST(Utf8Conversion) {
+ // Smoke test for converting strings to utf-8.
+ InitializeVM();
+ v8::HandleScope handle_scope;
+ // A simple ascii string
+ const char* ascii_string = "abcdef12345";
+ int len = v8::String::New(ascii_string, strlen(ascii_string))->Utf8Length();
+ CHECK_EQ(strlen(ascii_string), len);
+ // A mixed ascii and non-ascii string
+ // U+02E4 -> CB A4
+ // U+0064 -> 64
+ // U+12E4 -> E1 8B A4
+ // U+0030 -> 30
+ // U+3045 -> E3 81 85
+ const uint16_t mixed_string[] = {0x02E4, 0x0064, 0x12E4, 0x0030, 0x3045};
+ // The characters we expect to be output
+ const char as_utf8[11] = {0xCB, 0xA4, 0x64, 0xE1, 0x8B, 0xA4, 0x30,
+ 0xE3, 0x81, 0x85, 0x00};
+ // The number of bytes expected to be written for each length
+ const int lengths[12] = {0, 0, 2, 3, 3, 3, 6, 7, 7, 7, 10, 11};
+ v8::Handle<v8::String> mixed = v8::String::New(mixed_string, 5);
+ CHECK_EQ(10, mixed->Utf8Length());
+ // Try encoding the string with all capacities
+ char buffer[11];
+ for (int i = 0; i <= 11; i++) {
+ // Clear the buffer before reusing it
+ for (int j = 0; j < 11; j++)
+ buffer[j] = -1;
+ int written = mixed->WriteUtf8(buffer, i);
+ CHECK_EQ(lengths[i], written);
+ // Check that the contents are correct
+ for (int j = 0; j < lengths[i]; j++)
+ CHECK_EQ(as_utf8[j], buffer[j]);
+ // Check that the rest of the buffer hasn't been touched
+ for (int j = lengths[i]; j < 11; j++)
+ CHECK_EQ(-1, buffer[j]);
+ }
+}