Avoid decoding overhead when allocating ascii strings.
authorager@chromium.org <ager@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 21 Dec 2010 13:24:23 +0000 (13:24 +0000)
committerager@chromium.org <ager@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Tue, 21 Dec 2010 13:24:23 +0000 (13:24 +0000)
The assumption is that most utf8 strings allocated are actually ascii
and that if they are not we will encounter a non-ascii char pretty
quickly.

Review URL: http://codereview.chromium.org/6072004

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@6099 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/heap-inl.h
src/heap.cc
src/heap.h

index ef839988da2aeef9bdabcf07a7184e9107761d40..62e810fcb1b80b44a2b518c1c8eb460abd68534c 100644 (file)
@@ -40,6 +40,21 @@ int Heap::MaxObjectSizeInPagedSpace() {
 }
 
 
+MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> str,
+                                          PretenureFlag pretenure) {
+  // Check for ASCII first since this is the common case.
+  for (int i = 0; i < str.length(); ++i) {
+    if (static_cast<uint8_t>(str[i]) > String::kMaxAsciiCharCodeU) {
+      // Non-ASCII and we need to decode.
+      return AllocateStringFromUtf8Slow(str, pretenure);
+    }
+  }
+  // If the string is ASCII, we do not need to convert the characters
+  // since UTF8 is backwards compatible with ASCII.
+  return AllocateStringFromAscii(str, pretenure);
+}
+
+
 MaybeObject* Heap::AllocateSymbol(Vector<const char> str,
                                   int chars,
                                   uint32_t hash_field) {
index 1e9999164ccf2a9394fbca35a37737e27a4f6729..2f70ef0188dbad295cb44622708e2dec3c4a4b24 100644 (file)
@@ -3307,8 +3307,8 @@ MaybeObject* Heap::AllocateStringFromAscii(Vector<const char> string,
 }
 
 
-MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
-                                          PretenureFlag pretenure) {
+MaybeObject* Heap::AllocateStringFromUtf8Slow(Vector<const char> string,
+                                              PretenureFlag pretenure) {
   // V8 only supports characters in the Basic Multilingual Plane.
   const uc32 kMaxSupportedChar = 0xFFFF;
   // Count the number of characters in the UTF-8 string and check if
@@ -3317,17 +3317,11 @@ MaybeObject* Heap::AllocateStringFromUtf8(Vector<const char> string,
       decoder(ScannerConstants::utf8_decoder());
   decoder->Reset(string.start(), string.length());
   int chars = 0;
-  bool is_ascii = true;
   while (decoder->has_more()) {
-    uc32 r = decoder->GetNext();
-    if (r > String::kMaxAsciiCharCode) is_ascii = false;
+    decoder->GetNext();
     chars++;
   }
 
-  // If the string is ascii, we do not need to convert the characters
-  // since UTF8 is backwards compatible with ascii.
-  if (is_ascii) return AllocateStringFromAscii(string, pretenure);
-
   Object* result;
   { MaybeObject* maybe_result = AllocateRawTwoByteString(chars, pretenure);
     if (!maybe_result->ToObject(&result)) return maybe_result;
index c2d36e248fd7be38c91b9173178aa62a3e51d6a7..c612f2f413c0163c8b2d18f2a00cabcf51c401f8 100644 (file)
@@ -412,7 +412,10 @@ class Heap : public AllStatic {
   MUST_USE_RESULT static MaybeObject* AllocateStringFromAscii(
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);
-  MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8(
+  MUST_USE_RESULT static inline MaybeObject* AllocateStringFromUtf8(
+      Vector<const char> str,
+      PretenureFlag pretenure = NOT_TENURED);
+  MUST_USE_RESULT static MaybeObject* AllocateStringFromUtf8Slow(
       Vector<const char> str,
       PretenureFlag pretenure = NOT_TENURED);
   MUST_USE_RESULT static MaybeObject* AllocateStringFromTwoByte(