From: sandholm@chromium.org Date: Tue, 31 Aug 2010 09:22:53 +0000 (+0000) Subject: Simplify code by removing special-casing for single-character patterns X-Git-Tag: upstream/4.7.83~21264 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=97ccb64a06bab25a96fb120948d3dc1996fd00ec;p=platform%2Fupstream%2Fv8.git Simplify code by removing special-casing for single-character patterns Review URL: http://codereview.chromium.org/3276004 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@5380 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/runtime.cc b/src/runtime.cc index 70065be..6b40a9b 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2818,40 +2818,6 @@ static int BoyerMooreIndexOf(Vector subject, } -template -static inline int SingleCharIndexOf(Vector string, - schar pattern_char, - int start_index) { - if (sizeof(schar) == 1) { - const schar* pos = reinterpret_cast( - memchr(string.start() + start_index, - pattern_char, - string.length() - start_index)); - if (pos == NULL) return -1; - return static_cast(pos - string.start()); - } - for (int i = start_index, n = string.length(); i < n; i++) { - if (pattern_char == string[i]) { - return i; - } - } - return -1; -} - - -template -static int SingleCharLastIndexOf(Vector string, - schar pattern_char, - int start_index) { - for (int i = start_index; i >= 0; i--) { - if (pattern_char == string[i]) { - return i; - } - } - return -1; -} - - // Trivial string search for shorter strings. // On return, if "complete" is set to true, the return value is the // final result of searching for the patter in the subject. @@ -2863,6 +2829,7 @@ static int SimpleIndexOf(Vector subject, Vector pattern, int idx, bool* complete) { + ASSERT(pattern.length() > 1); // Badness is a count of how much work we have done. When we have // done enough work we decide it's probably worth switching to a better // algorithm. @@ -2925,12 +2892,12 @@ static int SimpleIndexOf(Vector subject, if (subject[i] != pattern_first_char) continue; } int j = 1; - do { + while (j < pattern.length()) { if (pattern[j] != subject[i+j]) { break; } j++; - } while (j < pattern.length()); + } if (j == pattern.length()) { return i; } @@ -3032,54 +2999,15 @@ int Runtime::StringMatch(Handle sub, int subject_length = sub->length(); if (start_index + pattern_length > subject_length) return -1; - if (!sub->IsFlat()) { - FlattenString(sub); - } - - // Searching for one specific character is common. For one - // character patterns linear search is necessary, so any smart - // algorithm is unnecessary overhead. - if (pattern_length == 1) { - AssertNoAllocation no_heap_allocation; // ensure vectors stay valid - String* seq_sub = *sub; - if (seq_sub->IsConsString()) { - seq_sub = ConsString::cast(seq_sub)->first(); - } - if (seq_sub->IsAsciiRepresentation()) { - uc16 pchar = pat->Get(0); - if (pchar > String::kMaxAsciiCharCode) { - return -1; - } - Vector ascii_vector = - seq_sub->ToAsciiVector().SubVector(start_index, subject_length); - const void* pos = memchr(ascii_vector.start(), - static_cast(pchar), - static_cast(ascii_vector.length())); - if (pos == NULL) { - return -1; - } - return static_cast(reinterpret_cast(pos) - - ascii_vector.start() + start_index); - } - return SingleCharIndexOf(seq_sub->ToUC16Vector(), - pat->Get(0), - start_index); - } - - if (!pat->IsFlat()) { - FlattenString(pat); - } + if (!sub->IsFlat()) FlattenString(sub); + if (!pat->IsFlat()) FlattenString(pat); AssertNoAllocation no_heap_allocation; // ensure vectors stay valid // Extract flattened substrings of cons strings before determining asciiness. String* seq_sub = *sub; - if (seq_sub->IsConsString()) { - seq_sub = ConsString::cast(seq_sub)->first(); - } + if (seq_sub->IsConsString()) seq_sub = ConsString::cast(seq_sub)->first(); String* seq_pat = *pat; - if (seq_pat->IsConsString()) { - seq_pat = ConsString::cast(seq_pat)->first(); - } + if (seq_pat->IsConsString()) seq_pat = ConsString::cast(seq_pat)->first(); // dispatch on type of strings if (seq_pat->IsAsciiRepresentation()) { @@ -3169,30 +3097,8 @@ static Object* Runtime_StringLastIndexOf(Arguments args) { return Smi::FromInt(start_index); } - if (!sub->IsFlat()) { - FlattenString(sub); - } - - if (pat_length == 1) { - AssertNoAllocation no_heap_allocation; // ensure vectors stay valid - if (sub->IsAsciiRepresentation()) { - uc16 pchar = pat->Get(0); - if (pchar > String::kMaxAsciiCharCode) { - return Smi::FromInt(-1); - } - return Smi::FromInt(SingleCharLastIndexOf(sub->ToAsciiVector(), - static_cast(pat->Get(0)), - start_index)); - } else { - return Smi::FromInt(SingleCharLastIndexOf(sub->ToUC16Vector(), - pat->Get(0), - start_index)); - } - } - - if (!pat->IsFlat()) { - FlattenString(pat); - } + if (!sub->IsFlat()) FlattenString(sub); + if (!pat->IsFlat()) FlattenString(pat); AssertNoAllocation no_heap_allocation; // ensure vectors stay valid @@ -3370,88 +3276,6 @@ static void SetLastMatchInfoNoCaptures(Handle subject, } -template -static bool SearchCharMultiple(Vector subject, - String* pattern, - schar pattern_char, - FixedArrayBuilder* builder, - int* match_pos) { - // Position of last match. - int pos = *match_pos; - int subject_length = subject.length(); - while (pos < subject_length) { - int match_end = pos + 1; - if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { - *match_pos = pos; - return false; - } - int new_pos = SingleCharIndexOf(subject, pattern_char, match_end); - if (new_pos >= 0) { - // Match has been found. - if (new_pos > match_end) { - ReplacementStringBuilder::AddSubjectSlice(builder, match_end, new_pos); - } - pos = new_pos; - builder->Add(pattern); - } else { - break; - } - } - if (pos + 1 < subject_length) { - ReplacementStringBuilder::AddSubjectSlice(builder, pos + 1, subject_length); - } - *match_pos = pos; - return true; -} - - -static bool SearchCharMultiple(Handle subject, - Handle pattern, - Handle last_match_info, - FixedArrayBuilder* builder) { - ASSERT(subject->IsFlat()); - ASSERT_EQ(1, pattern->length()); - uc16 pattern_char = pattern->Get(0); - // Treating position before first as initial "previous match position". - int match_pos = -1; - - for (;;) { // Break when search complete. - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - AssertNoAllocation no_gc; - if (subject->IsAsciiRepresentation()) { - if (pattern_char > String::kMaxAsciiCharCode) { - break; - } - Vector subject_vector = subject->ToAsciiVector(); - char pattern_ascii_char = static_cast(pattern_char); - bool complete = SearchCharMultiple(subject_vector, - *pattern, - pattern_ascii_char, - builder, - &match_pos); - if (complete) break; - } else { - Vector subject_vector = subject->ToUC16Vector(); - bool complete = SearchCharMultiple(subject_vector, - *pattern, - pattern_char, - builder, - &match_pos); - if (complete) break; - } - } - - if (match_pos >= 0) { - SetLastMatchInfoNoCaptures(subject, - last_match_info, - match_pos, - match_pos + 1); - return true; - } - return false; // No matches at all. -} - - template static bool SearchStringMultiple(Vector subject, String* pattern, @@ -3529,7 +3353,6 @@ static bool SearchStringMultiple(Handle subject, FixedArrayBuilder* builder) { ASSERT(subject->IsFlat()); ASSERT(pattern->IsFlat()); - ASSERT(pattern->length() > 1); // Treating as if a previous match was before first character. int match_pos = -pattern->length(); @@ -3787,14 +3610,6 @@ static Object* Runtime_RegExpExecMultiple(Arguments args) { if (regexp->TypeTag() == JSRegExp::ATOM) { Handle pattern( String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); - int pattern_length = pattern->length(); - if (pattern_length == 1) { - if (SearchCharMultiple(subject, pattern, last_match_info, &builder)) { - return *builder.ToJSArray(result_array); - } - return Heap::null_value(); - } - if (!pattern->IsFlat()) FlattenString(pattern); if (SearchStringMultiple(subject, pattern, last_match_info, &builder)) { return *builder.ToJSArray(result_array); @@ -5396,23 +5211,6 @@ void FindStringIndices(Vector subject, } } -template -inline void FindCharIndices(Vector subject, - const schar pattern_char, - ZoneList* indices, - unsigned int limit) { - // Collect indices of pattern_char in subject, and the end-of-string index. - // Stop after finding at most limit values. - int index = 0; - while (limit > 0) { - index = SingleCharIndexOf(subject, pattern_char, index); - if (index < 0) return; - indices->Add(index); - index++; - limit--; - } -} - static Object* Runtime_StringSplit(Arguments args) { ASSERT(args.length() == 3); @@ -5438,49 +5236,33 @@ static Object* Runtime_StringSplit(Arguments args) { // Find (up to limit) indices of separator and end-of-string in subject int initial_capacity = Min(kMaxInitialListCapacity, limit); ZoneList indices(initial_capacity); - if (pattern_length == 1) { - // Special case, go directly to fast single-character split. - AssertNoAllocation nogc; - uc16 pattern_char = pattern->Get(0); - if (subject->IsTwoByteRepresentation()) { - FindCharIndices(subject->ToUC16Vector(), pattern_char, - &indices, - limit); - } else if (pattern_char <= String::kMaxAsciiCharCode) { - FindCharIndices(subject->ToAsciiVector(), - static_cast(pattern_char), - &indices, - limit); + if (!pattern->IsFlat()) FlattenString(pattern); + AssertNoAllocation nogc; + if (subject->IsAsciiRepresentation()) { + Vector subject_vector = subject->ToAsciiVector(); + if (pattern->IsAsciiRepresentation()) { + FindStringIndices(subject_vector, + pattern->ToAsciiVector(), + &indices, + limit); + } else { + FindStringIndices(subject_vector, + pattern->ToUC16Vector(), + &indices, + limit); } } else { - if (!pattern->IsFlat()) FlattenString(pattern); - AssertNoAllocation nogc; - if (subject->IsAsciiRepresentation()) { - Vector subject_vector = subject->ToAsciiVector(); - if (pattern->IsAsciiRepresentation()) { - FindStringIndices(subject_vector, - pattern->ToAsciiVector(), - &indices, - limit); - } else { - FindStringIndices(subject_vector, - pattern->ToUC16Vector(), - &indices, - limit); - } + Vector subject_vector = subject->ToUC16Vector(); + if (pattern->IsAsciiRepresentation()) { + FindStringIndices(subject_vector, + pattern->ToAsciiVector(), + &indices, + limit); } else { - Vector subject_vector = subject->ToUC16Vector(); - if (pattern->IsAsciiRepresentation()) { - FindStringIndices(subject_vector, - pattern->ToAsciiVector(), - &indices, - limit); - } else { - FindStringIndices(subject_vector, - pattern->ToUC16Vector(), - &indices, - limit); - } + FindStringIndices(subject_vector, + pattern->ToUC16Vector(), + &indices, + limit); } } if (static_cast(indices.length()) < limit) {