From: yangguo@chromium.org Date: Fri, 31 Aug 2012 09:28:01 +0000 (+0000) Subject: Cache results in SearchRegExpMultiple. X-Git-Tag: upstream/4.7.83~16071 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=5dd51bafef2564ece854f7fe1d54349bc738e528;p=platform%2Fupstream%2Fv8.git Cache results in SearchRegExpMultiple. BUG= Review URL: https://chromiumcodereview.appspot.com/10837290 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12416 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/include/v8.h b/include/v8.h index 83801265a..3e1b42700 100644 --- a/include/v8.h +++ b/include/v8.h @@ -4042,7 +4042,7 @@ class Internals { static const int kNullValueRootIndex = 7; static const int kTrueValueRootIndex = 8; static const int kFalseValueRootIndex = 9; - static const int kEmptySymbolRootIndex = 114; + static const int kEmptySymbolRootIndex = 115; static const int kJSObjectType = 0xaa; static const int kFirstNonstringType = 0x80; diff --git a/src/heap.cc b/src/heap.cc index f8961dffb..9ba769212 100644 --- a/src/heap.cc +++ b/src/heap.cc @@ -1002,7 +1002,8 @@ void Heap::MarkCompactPrologue() { isolate_->keyed_lookup_cache()->Clear(); isolate_->context_slot_cache()->Clear(); isolate_->descriptor_lookup_cache()->Clear(); - StringSplitCache::Clear(string_split_cache()); + RegExpResultsCache::Clear(string_split_cache()); + RegExpResultsCache::Clear(regexp_multiple_cache()); isolate_->compilation_cache()->MarkCompactPrologue(); @@ -2761,12 +2762,18 @@ bool Heap::CreateInitialObjects() { set_single_character_string_cache(FixedArray::cast(obj)); // Allocate cache for string split. - { MaybeObject* maybe_obj = - AllocateFixedArray(StringSplitCache::kStringSplitCacheSize, TENURED); + { MaybeObject* maybe_obj = AllocateFixedArray( + RegExpResultsCache::kRegExpResultsCacheSize, TENURED); if (!maybe_obj->ToObject(&obj)) return false; } set_string_split_cache(FixedArray::cast(obj)); + { MaybeObject* maybe_obj = AllocateFixedArray( + RegExpResultsCache::kRegExpResultsCacheSize, TENURED); + if (!maybe_obj->ToObject(&obj)) return false; + } + set_regexp_multiple_cache(FixedArray::cast(obj)); + // Allocate cache for external strings pointing to native source code. { MaybeObject* maybe_obj = AllocateFixedArray(Natives::GetBuiltinsCount()); if (!maybe_obj->ToObject(&obj)) return false; @@ -2792,70 +2799,98 @@ bool Heap::CreateInitialObjects() { } -Object* StringSplitCache::Lookup( - FixedArray* cache, String* string, String* pattern) { - if (!string->IsSymbol() || !pattern->IsSymbol()) return Smi::FromInt(0); - uint32_t hash = string->Hash(); - uint32_t index = ((hash & (kStringSplitCacheSize - 1)) & +Object* RegExpResultsCache::Lookup(Heap* heap, + String* key_string, + Object* key_pattern, + ResultsCacheType type) { + FixedArray* cache; + if (!key_string->IsSymbol()) return Smi::FromInt(0); + if (type == STRING_SPLIT_SUBSTRINGS) { + ASSERT(key_pattern->IsString()); + if (!key_pattern->IsSymbol()) return Smi::FromInt(0); + cache = heap->string_split_cache(); + } else { + ASSERT(type == REGEXP_MULTIPLE_INDICES); + ASSERT(key_pattern->IsFixedArray()); + cache = heap->regexp_multiple_cache(); + } + + uint32_t hash = key_string->Hash(); + uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) & ~(kArrayEntriesPerCacheEntry - 1)); - if (cache->get(index + kStringOffset) == string && - cache->get(index + kPatternOffset) == pattern) { + if (cache->get(index + kStringOffset) == key_string && + cache->get(index + kPatternOffset) == key_pattern) { return cache->get(index + kArrayOffset); } - index = ((index + kArrayEntriesPerCacheEntry) & (kStringSplitCacheSize - 1)); - if (cache->get(index + kStringOffset) == string && - cache->get(index + kPatternOffset) == pattern) { + index = + ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1)); + if (cache->get(index + kStringOffset) == key_string && + cache->get(index + kPatternOffset) == key_pattern) { return cache->get(index + kArrayOffset); } return Smi::FromInt(0); } -void StringSplitCache::Enter(Heap* heap, - FixedArray* cache, - String* string, - String* pattern, - FixedArray* array) { - if (!string->IsSymbol() || !pattern->IsSymbol()) return; - uint32_t hash = string->Hash(); - uint32_t index = ((hash & (kStringSplitCacheSize - 1)) & +void RegExpResultsCache::Enter(Heap* heap, + String* key_string, + Object* key_pattern, + FixedArray* value_array, + ResultsCacheType type) { + FixedArray* cache; + if (!key_string->IsSymbol()) return; + if (type == STRING_SPLIT_SUBSTRINGS) { + ASSERT(key_pattern->IsString()); + if (!key_pattern->IsSymbol()) return; + cache = heap->string_split_cache(); + } else { + ASSERT(type == REGEXP_MULTIPLE_INDICES); + ASSERT(key_pattern->IsFixedArray()); + cache = heap->regexp_multiple_cache(); + } + + uint32_t hash = key_string->Hash(); + uint32_t index = ((hash & (kRegExpResultsCacheSize - 1)) & ~(kArrayEntriesPerCacheEntry - 1)); if (cache->get(index + kStringOffset) == Smi::FromInt(0)) { - cache->set(index + kStringOffset, string); - cache->set(index + kPatternOffset, pattern); - cache->set(index + kArrayOffset, array); + cache->set(index + kStringOffset, key_string); + cache->set(index + kPatternOffset, key_pattern); + cache->set(index + kArrayOffset, value_array); } else { uint32_t index2 = - ((index + kArrayEntriesPerCacheEntry) & (kStringSplitCacheSize - 1)); + ((index + kArrayEntriesPerCacheEntry) & (kRegExpResultsCacheSize - 1)); if (cache->get(index2 + kStringOffset) == Smi::FromInt(0)) { - cache->set(index2 + kStringOffset, string); - cache->set(index2 + kPatternOffset, pattern); - cache->set(index2 + kArrayOffset, array); + cache->set(index2 + kStringOffset, key_string); + cache->set(index2 + kPatternOffset, key_pattern); + cache->set(index2 + kArrayOffset, value_array); } else { cache->set(index2 + kStringOffset, Smi::FromInt(0)); cache->set(index2 + kPatternOffset, Smi::FromInt(0)); cache->set(index2 + kArrayOffset, Smi::FromInt(0)); - cache->set(index + kStringOffset, string); - cache->set(index + kPatternOffset, pattern); - cache->set(index + kArrayOffset, array); + cache->set(index + kStringOffset, key_string); + cache->set(index + kPatternOffset, key_pattern); + cache->set(index + kArrayOffset, value_array); } } - if (array->length() < 100) { // Limit how many new symbols we want to make. - for (int i = 0; i < array->length(); i++) { - String* str = String::cast(array->get(i)); + // If the array is a reasonably short list of substrings, convert it into a + // list of symbols. + if (type == STRING_SPLIT_SUBSTRINGS && value_array->length() < 100) { + for (int i = 0; i < value_array->length(); i++) { + String* str = String::cast(value_array->get(i)); Object* symbol; MaybeObject* maybe_symbol = heap->LookupSymbol(str); if (maybe_symbol->ToObject(&symbol)) { - array->set(i, symbol); + value_array->set(i, symbol); } } } - array->set_map_no_write_barrier(heap->fixed_cow_array_map()); + // Convert backing store to a copy-on-write array. + value_array->set_map_no_write_barrier(heap->fixed_cow_array_map()); } -void StringSplitCache::Clear(FixedArray* cache) { - for (int i = 0; i < kStringSplitCacheSize; i++) { +void RegExpResultsCache::Clear(FixedArray* cache) { + for (int i = 0; i < kRegExpResultsCacheSize; i++) { cache->set(i, Smi::FromInt(0)); } } diff --git a/src/heap.h b/src/heap.h index 00f3f2052..ba340a24e 100644 --- a/src/heap.h +++ b/src/heap.h @@ -87,6 +87,7 @@ namespace internal { V(Object, instanceof_cache_answer, InstanceofCacheAnswer) \ V(FixedArray, single_character_string_cache, SingleCharacterStringCache) \ V(FixedArray, string_split_cache, StringSplitCache) \ + V(FixedArray, regexp_multiple_cache, RegExpMultipleCache) \ V(Object, termination_exception, TerminationException) \ V(Smi, hash_seed, HashSeed) \ V(Map, string_map, StringMap) \ @@ -2582,24 +2583,31 @@ class GCTracer BASE_EMBEDDED { }; -class StringSplitCache { +class RegExpResultsCache { public: - static Object* Lookup(FixedArray* cache, String* string, String* pattern); + enum ResultsCacheType { REGEXP_MULTIPLE_INDICES, STRING_SPLIT_SUBSTRINGS }; + + // Attempt to retrieve a cached result. On failure, 0 is returned as a Smi. + // On success, the returned result is guaranteed to be a COW-array. + static Object* Lookup(Heap* heap, + String* key_string, + Object* key_pattern, + ResultsCacheType type); + // Attempt to add value_array to the cache specified by type. On success, + // value_array is turned into a COW-array. static void Enter(Heap* heap, - FixedArray* cache, - String* string, - String* pattern, - FixedArray* array); + String* key_string, + Object* key_pattern, + FixedArray* value_array, + ResultsCacheType type); static void Clear(FixedArray* cache); - static const int kStringSplitCacheSize = 0x100; + static const int kRegExpResultsCacheSize = 0x100; private: static const int kArrayEntriesPerCacheEntry = 4; static const int kStringOffset = 0; static const int kPatternOffset = 1; static const int kArrayOffset = 2; - - static MaybeObject* WrapFixedArrayInJSArray(Object* fixed_array); }; diff --git a/src/runtime.cc b/src/runtime.cc index 3d7a12689..4ce2e1ed5 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2413,18 +2413,13 @@ class FixedArrayBuilder { return array_->length(); } - Handle ToJSArray() { - Handle result_array = FACTORY->NewJSArrayWithElements(array_); - result_array->set_length(Smi::FromInt(length_)); - return result_array; - } - Handle ToJSArray(Handle target_array) { FACTORY->SetContent(target_array, array_); target_array->set_length(Smi::FromInt(length_)); return target_array; } + private: Handle array_; int length_; @@ -2543,10 +2538,6 @@ class ReplacementStringBuilder { character_count_ += by; } - Handle GetParts() { - return array_builder_.ToJSArray(); - } - private: Handle NewRawAsciiString(int length) { return heap_->isolate()->factory()->NewRawAsciiString(length); @@ -3667,21 +3658,57 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain // separate last match info. See comment on that function. template -static int SearchRegExpMultiple( +static MaybeObject* SearchRegExpMultiple( Isolate* isolate, Handle subject, Handle regexp, Handle last_match_array, - FixedArrayBuilder* builder) { + Handle result_array) { ASSERT(subject->IsFlat()); ASSERT_NE(has_capture, regexp->CaptureCount() == 0); - RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); - if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION; - int capture_count = regexp->CaptureCount(); int subject_length = subject->length(); + static const int kMinLengthToCache = 0x1000; + + if (subject_length > kMinLengthToCache) { + Handle cached_answer(RegExpResultsCache::Lookup( + isolate->heap(), + *subject, + regexp->data(), + RegExpResultsCache::REGEXP_MULTIPLE_INDICES)); + if (*cached_answer != Smi::FromInt(0)) { + Handle cached_fixed_array = + Handle(FixedArray::cast(*cached_answer)); + // The cache FixedArray is a COW-array and can therefore be reused. + isolate->factory()->SetContent(result_array, cached_fixed_array); + // The actual length of the result array is stored in the last element of + // the backing store (the backing FixedArray may have a larger capacity). + Object* cached_fixed_array_last_element = + cached_fixed_array->get(cached_fixed_array->length() - 1); + Smi* js_array_length = Smi::cast(cached_fixed_array_last_element); + result_array->set_length(js_array_length); + RegExpImpl::SetLastMatchInfo( + last_match_array, subject, capture_count, NULL); + return *result_array; + } + } + + RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); + if (global_cache.HasException()) return Failure::Exception(); + + Handle result_elements; + if (result_array->HasFastObjectElements()) { + result_elements = + Handle(FixedArray::cast(result_array->elements())); + } + if (result_elements.is_null() || result_elements->length() < 16) { + result_elements = isolate->factory()->NewFixedArrayWithHoles(16); + } + + FixedArrayBuilder builder(result_elements); + // Position to search from. int match_start = -1; int match_end = 0; @@ -3694,9 +3721,9 @@ static int SearchRegExpMultiple( int32_t* current_match = global_cache.FetchNext(); if (current_match == NULL) break; match_start = current_match[0]; - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); + builder.EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); if (match_end < match_start) { - ReplacementStringBuilder::AddSubjectSlice(builder, + ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, match_start); } @@ -3738,19 +3765,19 @@ static int SearchRegExpMultiple( } elements->set(capture_count + 1, Smi::FromInt(match_start)); elements->set(capture_count + 2, *subject); - builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); + builder.Add(*isolate->factory()->NewJSArrayWithElements(elements)); } else { - builder->Add(*match); + builder.Add(*match); } } } - if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION; + if (global_cache.HasException()) return Failure::Exception(); if (match_start >= 0) { // Finished matching, with at least one match. if (match_end < subject_length) { - ReplacementStringBuilder::AddSubjectSlice(builder, + ReplacementStringBuilder::AddSubjectSlice(&builder, match_end, subject_length); } @@ -3758,9 +3785,23 @@ static int SearchRegExpMultiple( RegExpImpl::SetLastMatchInfo( last_match_array, subject, capture_count, NULL); - return RegExpImpl::RE_SUCCESS; + if (subject_length > kMinLengthToCache) { + // Store the length of the result array into the last element of the + // backing FixedArray. + builder.EnsureCapacity(1); + Handle fixed_array = builder.array(); + fixed_array->set(fixed_array->length() - 1, + Smi::FromInt(builder.length())); + // Cache the result and turn the FixedArray into a COW array. + RegExpResultsCache::Enter(isolate->heap(), + *subject, + regexp->data(), + *fixed_array, + RegExpResultsCache::REGEXP_MULTIPLE_INDICES); + } + return *builder.ToJSArray(result_array); } else { - return RegExpImpl::RE_FAILURE; // No matches at all. + return isolate->heap()->null_value(); // No matches at all. } } @@ -3780,29 +3821,14 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { ASSERT(last_match_info->HasFastObjectElements()); ASSERT(regexp->GetFlags().is_global()); - Handle result_elements; - if (result_array->HasFastObjectElements()) { - result_elements = - Handle(FixedArray::cast(result_array->elements())); - } - if (result_elements.is_null() || result_elements->length() < 16) { - result_elements = isolate->factory()->NewFixedArrayWithHoles(16); - } - FixedArrayBuilder builder(result_elements); - int result; if (regexp->CaptureCount() == 0) { - result = SearchRegExpMultiple( - isolate, subject, regexp, last_match_info, &builder); + return SearchRegExpMultiple( + isolate, subject, regexp, last_match_info, result_array); } else { - result = SearchRegExpMultiple( - isolate, subject, regexp, last_match_info, &builder); + return SearchRegExpMultiple( + isolate, subject, regexp, last_match_info, result_array); } - - if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); - if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value(); - ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); - return Failure::Exception(); } @@ -6119,11 +6145,13 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { RUNTIME_ASSERT(pattern_length > 0); if (limit == 0xffffffffu) { - Handle cached_answer(StringSplitCache::Lookup( - isolate->heap()->string_split_cache(), + Handle cached_answer(RegExpResultsCache::Lookup( + isolate->heap(), *subject, - *pattern)); + *pattern, + RegExpResultsCache::STRING_SPLIT_SUBSTRINGS)); if (*cached_answer != Smi::FromInt(0)) { + // The cache FixedArray is a COW-array and can therefore be reused. Handle result = isolate->factory()->NewJSArrayWithElements( Handle::cast(cached_answer)); @@ -6183,11 +6211,11 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringSplit) { if (limit == 0xffffffffu) { if (result->HasFastObjectElements()) { - StringSplitCache::Enter(isolate->heap(), - isolate->heap()->string_split_cache(), - *subject, - *pattern, - *elements); + RegExpResultsCache::Enter(isolate->heap(), + *subject, + *pattern, + *elements, + RegExpResultsCache::STRING_SPLIT_SUBSTRINGS); } } diff --git a/test/mjsunit/regexp-results-cache.js b/test/mjsunit/regexp-results-cache.js new file mode 100644 index 000000000..7ee8c3fac --- /dev/null +++ b/test/mjsunit/regexp-results-cache.js @@ -0,0 +1,78 @@ +// Copyright 2012 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Long string to trigger caching. +var string = +"Friends, Romans, countrymen, lend me your ears! \ + I come to bury Caesar, not to praise him. \ + The evil that men do lives after them, \ + The good is oft interred with their bones; \ + So let it be with Caesar. The noble Brutus \ + Hath told you Caesar was ambitious; \ + If it were so, it was a grievous fault, \ + And grievously hath Caesar answer'd it. \ + Here, under leave of Brutus and the rest- \ + For Brutus is an honorable man; \ + So are they all, all honorable men- \ + Come I to speak in Caesar's funeral. \ + He was my friend, faithful and just to me; \ + But Brutus says he was ambitious, \ + And Brutus is an honorable man. \ + He hath brought many captives home to Rome, \ + Whose ransoms did the general coffers fill. \ + Did this in Caesar seem ambitious? \ + When that the poor have cried, Caesar hath wept; \ + Ambition should be made of sterner stuff: \ + Yet Brutus says he was ambitious, \ + And Brutus is an honorable man. \ + You all did see that on the Lupercal \ + I thrice presented him a kingly crown, \ + Which he did thrice refuse. Was this ambition? \ + Yet Brutus says he was ambitious, \ + And sure he is an honorable man. \ + I speak not to disprove what Brutus spoke, \ + But here I am to speak what I do know. \ + You all did love him once, not without cause; \ + What cause withholds you then to mourn for him? \ + O judgement, thou art fled to brutish beasts, \ + And men have lost their reason. Bear with me; \ + My heart is in the coffin there with Caesar, \ + And I must pause till it come back to me."; + +var replaced = string.replace(/\b\w+\b/g, function() { return "foo"; }); +for (var i = 0; i < 3; i++) { + assertEquals(replaced, + string.replace(/\b\w+\b/g, function() { return "foo"; })); +} + +// Check that the result is in a COW array. +var words = string.split(" "); +assertEquals("Friends,", words[0]); +words[0] = "Enemies,"; +words = string.split(" "); +assertEquals("Friends,", words[0]); +