Speed up quoting of JSON strings by allocating a string that is big enough
authorerik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 8 Dec 2010 16:23:25 +0000 (16:23 +0000)
committererik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Wed, 8 Dec 2010 16:23:25 +0000 (16:23 +0000)
and then trimming it when the length is known.  This way we only have to
traverse the input once.
Review URL: http://codereview.chromium.org/5556012

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5951 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

src/runtime.cc
src/spaces-inl.h
src/spaces.h

index 0f00a33..58cde3b 100644 (file)
@@ -4546,42 +4546,53 @@ static MaybeObject* Runtime_URIUnescape(Arguments args) {
 
 static const unsigned int kQuoteTableLength = 128u;
 
-static const char* const JsonQuotes[kQuoteTableLength] = {
-    "\\u0000", "\\u0001", "\\u0002", "\\u0003",
-    "\\u0004", "\\u0005", "\\u0006", "\\u0007",
-    "\\b", "\\t", "\\n", "\\u000b",
-    "\\f", "\\r", "\\u000e", "\\u000f",
-    "\\u0010", "\\u0011", "\\u0012", "\\u0013",
-    "\\u0014", "\\u0015", "\\u0016", "\\u0017",
-    "\\u0018", "\\u0019", "\\u001a", "\\u001b",
-    "\\u001c", "\\u001d", "\\u001e", "\\u001f",
-    NULL, NULL, "\\\"", NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    "\\\\", NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-    NULL, NULL, NULL, NULL,
-};
-
-
+static const int kJsonQuotesCharactersPerEntry = 8;
+static const char* const JsonQuotes =
+    "\\u0000  \\u0001  \\u0002  \\u0003  "
+    "\\u0004  \\u0005  \\u0006  \\u0007  "
+    "\\b      \\t      \\n      \\u000b  "
+    "\\f      \\r      \\u000e  \\u000f  "
+    "\\u0010  \\u0011  \\u0012  \\u0013  "
+    "\\u0014  \\u0015  \\u0016  \\u0017  "
+    "\\u0018  \\u0019  \\u001a  \\u001b  "
+    "\\u001c  \\u001d  \\u001e  \\u001f  "
+    "        !       \\\"      #       "
+    "$       %       &       '       "
+    "(       )       *       +       "
+    ",       -       .       /       "
+    "0       1       2       3       "
+    "4       5       6       7       "
+    "8       9       :       ;       "
+    "<       =       >       ?       "
+    "@       A       B       C       "
+    "D       E       F       G       "
+    "H       I       J       K       "
+    "L       M       N       O       "
+    "P       Q       R       S       "
+    "T       U       V       W       "
+    "X       Y       Z       [       "
+    "\\\\      ]       ^       _       "
+    "`       a       b       c       "
+    "d       e       f       g       "
+    "h       i       j       k       "
+    "l       m       n       o       "
+    "p       q       r       s       "
+    "t       u       v       w       "
+    "x       y       z       {       "
+    "|       }       ~       \177       ";
+
+
+// For a string that is less than 32k characters it should always be
+// possible to allocate it in new space.
+static const int kMaxGuaranteedNewSpaceString = 32 * 1024;
+
+
+// Doing JSON quoting cannot make the string more than this many times larger.
+static const int kJsonQuoteWorstCaseBlowup = 6;
+
+
+// Covers the entire ASCII range (all other characters are unchanged by JSON
+// quoting).
 static const byte JsonQuoteLengths[kQuoteTableLength] = {
     6, 6, 6, 6, 6, 6, 6, 6,
     2, 2, 2, 6, 2, 2, 6, 6,
@@ -4602,18 +4613,6 @@ static const byte JsonQuoteLengths[kQuoteTableLength] = {
 };
 
 
-template <typename Char>
-Char* WriteString(Char* dst, const char* src_string) {
-  char c;
-  for (c = *src_string; c; c = *src_string) {
-    *dst = c;
-    dst++;
-    src_string++;
-  }
-  return dst;
-}
-
-
 template <typename StringType>
 MaybeObject* AllocateRawString(int length);
 
@@ -4631,58 +4630,104 @@ MaybeObject* AllocateRawString<SeqAsciiString>(int length) {
 
 
 template <typename Char, typename StringType>
-static MaybeObject* QuoteJsonString(Vector<const Char> characters) {
+static MaybeObject* SlowQuoteJsonString(Vector<const Char> characters) {
   int length = characters.length();
-  int quoted_length = 0;
-  for (int i = 0; i < length; i++) {
-    unsigned int c = characters[i];
-    if (sizeof(Char) > 1u) {
-      quoted_length += (c >= kQuoteTableLength) ? 1 : JsonQuoteLengths[c];
+  const Char* read_cursor = characters.start();
+  const Char* end = read_cursor + length;
+  const int kSpaceForQuotes = 2;
+  int quoted_length = kSpaceForQuotes;
+  while (read_cursor < end) {
+    Char c = *(read_cursor++);
+    if (sizeof(Char) > 1u && static_cast<unsigned>(c) >= kQuoteTableLength) {
+      quoted_length++;
     } else {
-      quoted_length += JsonQuoteLengths[c];
+      quoted_length += JsonQuoteLengths[static_cast<unsigned>(c)];
     }
   }
-  Counters::quote_json_char_count.Increment(length);
-
-  // Add space for quotes.
-  quoted_length += 2;
-
   MaybeObject* new_alloc = AllocateRawString<StringType>(quoted_length);
   Object* new_object;
   if (!new_alloc->ToObject(&new_object)) {
-    Counters::quote_json_char_recount.Increment(length);
     return new_alloc;
   }
   StringType* new_string = StringType::cast(new_object);
 
+  Char* write_cursor = reinterpret_cast<Char*>(
+      new_string->address() + SeqAsciiString::kHeaderSize);
+  *(write_cursor++) = '"';
+
+  read_cursor = characters.start();
+  while (read_cursor < end) {
+    Char c = *(read_cursor++);
+    if (sizeof(Char) > 1u && static_cast<unsigned>(c) >= kQuoteTableLength) {
+      *(write_cursor++) = c;
+    } else {
+      int len = JsonQuoteLengths[static_cast<unsigned>(c)];
+      const char* replacement = JsonQuotes +
+          static_cast<unsigned>(c) * kJsonQuotesCharactersPerEntry;
+      for (int i = 0; i < len; i++) {
+        *write_cursor++ = *replacement++;
+      }
+    }
+  }
+  *(write_cursor++) = '"';
+  return new_string;
+}
+
+
+template <typename Char, typename StringType>
+static MaybeObject* QuoteJsonString(Vector<const Char> characters) {
+  int length = characters.length();
+  Counters::quote_json_char_count.Increment(length);
+  const int kSpaceForQuotes = 2;
+  int worst_case_length = length * kJsonQuoteWorstCaseBlowup + kSpaceForQuotes;
+  if (worst_case_length > kMaxGuaranteedNewSpaceString) {
+    return SlowQuoteJsonString<Char, StringType>(characters);
+  }
+
+  MaybeObject* new_alloc = AllocateRawString<StringType>(worst_case_length);
+  Object* new_object;
+  if (!new_alloc->ToObject(&new_object)) {
+    return new_alloc;
+  }
+  StringType* new_string = StringType::cast(new_object);
+  ASSERT(Heap::new_space()->Contains(new_string));
 
   STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize);
   Char* write_cursor = reinterpret_cast<Char*>(
       new_string->address() + SeqAsciiString::kHeaderSize);
   *(write_cursor++) = '"';
+
   const Char* read_cursor = characters.start();
-  if (quoted_length == length + 2) {
-    CopyChars(write_cursor, read_cursor, length);
-    write_cursor += length;
-  } else {
-    const Char* end = read_cursor + length;
-    while (read_cursor < end) {
-      Char c = *(read_cursor++);
-      if (sizeof(Char) > 1u && static_cast<unsigned>(c) >= kQuoteTableLength) {
-        *(write_cursor++) = c;
-      } else {
-        const char* replacement = JsonQuotes[static_cast<unsigned>(c)];
-        if (!replacement) {
-          *(write_cursor++) = c;
-        } else {
-          write_cursor = WriteString(write_cursor, replacement);
+  const Char* end = read_cursor + length;
+  while (read_cursor < end) {
+    Char c = *(read_cursor++);
+    if (sizeof(Char) > 1u && static_cast<unsigned>(c) >= kQuoteTableLength) {
+      *(write_cursor++) = c;
+    } else {
+      int len = JsonQuoteLengths[static_cast<unsigned>(c)];
+      const char* replacement = JsonQuotes +
+          static_cast<unsigned>(c) * kJsonQuotesCharactersPerEntry;
+      write_cursor[0] = replacement[0];
+      if (len > 1) {
+        write_cursor[1] = replacement[1];
+        if (len > 2) {
+          ASSERT(len == 6);
+          write_cursor[2] = replacement[2];
+          write_cursor[3] = replacement[3];
+          write_cursor[4] = replacement[4];
+          write_cursor[5] = replacement[5];
         }
       }
+      write_cursor += len;
     }
   }
   *(write_cursor++) = '"';
-  ASSERT_EQ(SeqAsciiString::kHeaderSize + quoted_length * sizeof(Char),
-            reinterpret_cast<Address>(write_cursor) - new_string->address());
+
+  int final_length =
+      write_cursor - reinterpret_cast<Char*>(
+          new_string->address() + SeqAsciiString::kHeaderSize);
+  Heap::new_space()->ShrinkStringAtAllocationBoundary<StringType>(new_string,
+                                                                  final_length);
   return new_string;
 }
 
index ad09ae2..aaca9e4 100644 (file)
@@ -481,7 +481,7 @@ void LargeObjectChunk::Free(Executability executable) {
 }
 
 // -----------------------------------------------------------------------------
-// LargeObjectSpace
+// NewSpace
 
 MaybeObject* NewSpace::AllocateRawInternal(int size_in_bytes,
                                            AllocationInfo* alloc_info) {
@@ -501,6 +501,18 @@ MaybeObject* NewSpace::AllocateRawInternal(int size_in_bytes,
 }
 
 
+template <typename StringType>
+void NewSpace::ShrinkStringAtAllocationBoundary(String* string, int length) {
+  ASSERT(length <= string->length());
+  ASSERT(string->IsSeqString());
+  ASSERT(string->address() + StringType::SizeFor(string->length()) ==
+         allocation_info_.top);
+  allocation_info_.top =
+      string->address() + StringType::SizeFor(length);
+  string->set_length(length);
+}
+
+
 bool FreeListNode::IsFreeListNode(HeapObject* object) {
   return object->map() == Heap::raw_unchecked_byte_array_map()
       || object->map() == Heap::raw_unchecked_one_pointer_filler_map()
index 2e85003..a1f0c6b 100644 (file)
@@ -1643,6 +1643,11 @@ class NewSpace : public Space {
 
   virtual bool ReserveSpace(int bytes);
 
+  // Resizes a sequential string which must be the most recent thing that was
+  // allocated in new space.
+  template <typename StringType>
+  inline void ShrinkStringAtAllocationBoundary(String* string, int len);
+
 #ifdef ENABLE_HEAP_PROTECTION
   // Protect/unprotect the space by marking it read-only/writable.
   virtual void Protect();