From: yangguo@chromium.org Date: Tue, 28 Aug 2012 09:37:41 +0000 (+0000) Subject: Reland regexp global optimizations. X-Git-Tag: upstream/4.7.83~16089 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=7cbca775eeb859ac71e0187846da8cefd31290f6;p=platform%2Fupstream%2Fv8.git Reland regexp global optimizations. BUG= Review URL: https://chromiumcodereview.appspot.com/10872010 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@12396 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc index 88178ff..d9e3a3d 100644 --- a/src/arm/code-stubs-arm.cc +++ b/src/arm/code-stubs-arm.cc @@ -4818,7 +4818,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); __ add(r2, r2, Operand(2)); // r2 was a smi. // Check that the static offsets vector buffer is large enough. - __ cmp(r2, Operand(OffsetsVector::kStaticOffsetsVectorSize)); + __ cmp(r2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize)); __ b(hi, &runtime); // r2: Number of capture registers diff --git a/src/assembler.cc b/src/assembler.cc index 6dcd2a0..a58f77f 100644 --- a/src/assembler.cc +++ b/src/assembler.cc @@ -1092,7 +1092,7 @@ ExternalReference ExternalReference::re_word_character_map() { ExternalReference ExternalReference::address_of_static_offsets_vector( Isolate* isolate) { return ExternalReference( - OffsetsVector::static_offsets_vector_address(isolate)); + reinterpret_cast
(isolate->jsregexp_static_offsets_vector())); } ExternalReference ExternalReference::address_of_regexp_stack_memory_address( diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index 80b0f22..140db8a 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -3748,7 +3748,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); __ add(edx, Immediate(2)); // edx was a smi. // Check that the static offsets vector buffer is large enough. - __ cmp(edx, OffsetsVector::kStaticOffsetsVectorSize); + __ cmp(edx, Isolate::kJSRegexpStaticOffsetsVectorSize); __ j(above, &runtime); // ecx: RegExp data (FixedArray) diff --git a/src/isolate.h b/src/isolate.h index f654459..3461f97 100644 --- a/src/isolate.h +++ b/src/isolate.h @@ -308,7 +308,7 @@ class ThreadLocalTop BASE_EMBEDDED { #define ISOLATE_INIT_ARRAY_LIST(V) \ /* SerializerDeserializer state. */ \ - V(int, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \ + V(int32_t, jsregexp_static_offsets_vector, kJSRegexpStaticOffsetsVectorSize) \ V(int, bad_char_shift_table, kUC16AlphabetSize) \ V(int, good_suffix_shift_table, (kBMMaxShift + 1)) \ V(int, suffix_table, (kBMMaxShift + 1)) \ diff --git a/src/jsregexp.cc b/src/jsregexp.cc index e730e14..ae25432 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -278,11 +278,12 @@ static void SetAtomLastCapture(FixedArray* array, } -Handle RegExpImpl::AtomExec(Handle re, - Handle subject, - int index, - Handle last_match_info) { - Isolate* isolate = re->GetIsolate(); +int RegExpImpl::AtomExecRaw(Handle regexp, + Handle subject, + int index, + int32_t* output, + int output_size) { + Isolate* isolate = regexp->GetIsolate(); ASSERT(0 <= index); ASSERT(index <= subject->length()); @@ -290,15 +291,16 @@ Handle RegExpImpl::AtomExec(Handle re, if (!subject->IsFlat()) FlattenString(subject); AssertNoAllocation no_heap_allocation; // ensure vectors stay valid - String* needle = String::cast(re->DataAt(JSRegExp::kAtomPatternIndex)); + String* needle = String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex)); int needle_len = needle->length(); ASSERT(needle->IsFlat()); + ASSERT_LT(0, needle_len); - if (needle_len != 0) { - if (index + needle_len > subject->length()) { - return isolate->factory()->null_value(); - } + if (index + needle_len > subject->length()) { + return RegExpImpl::RE_FAILURE; + } + for (int i = 0; i < output_size; i += 2) { String::FlatContent needle_content = needle->GetFlatContent(); String::FlatContent subject_content = subject->GetFlatContent(); ASSERT(needle_content.IsFlat()); @@ -323,15 +325,36 @@ Handle RegExpImpl::AtomExec(Handle re, subject_content.ToUC16Vector(), needle_content.ToUC16Vector(), index))); - if (index == -1) return isolate->factory()->null_value(); + if (index == -1) { + return i / 2; // Return number of matches. + } else { + output[i] = index; + output[i+1] = index + needle_len; + index += needle_len; + } } - ASSERT(last_match_info->HasFastObjectElements()); + return output_size / 2; +} - { - NoHandleAllocation no_handles; - FixedArray* array = FixedArray::cast(last_match_info->elements()); - SetAtomLastCapture(array, *subject, index, index + needle_len); - } + +Handle RegExpImpl::AtomExec(Handle re, + Handle subject, + int index, + Handle last_match_info) { + Isolate* isolate = re->GetIsolate(); + + static const int kNumRegisters = 2; + STATIC_ASSERT(kNumRegisters <= Isolate::kJSRegexpStaticOffsetsVectorSize); + int32_t* output_registers = isolate->jsregexp_static_offsets_vector(); + + int res = AtomExecRaw(re, subject, index, output_registers, kNumRegisters); + + if (res == RegExpImpl::RE_FAILURE) return isolate->factory()->null_value(); + + ASSERT_EQ(res, RegExpImpl::RE_SUCCESS); + NoHandleAllocation no_handles; + FixedArray* array = FixedArray::cast(last_match_info->elements()); + SetAtomLastCapture(array, *subject, output_registers[0], output_registers[1]); return last_match_info; } @@ -511,7 +534,11 @@ int RegExpImpl::IrregexpPrepare(Handle regexp, #ifdef V8_INTERPRETED_REGEXP // Byte-code regexp needs space allocated for all its registers. - return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())); + // The result captures are copied to the start of the registers array + // if the match succeeds. This way those registers are not clobbered + // when we set the last match info from last successful match. + return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data())) + + (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2; #else // V8_INTERPRETED_REGEXP // Native regexp only needs room to output captures. Registers are handled // internally. @@ -520,27 +547,11 @@ int RegExpImpl::IrregexpPrepare(Handle regexp, } -int RegExpImpl::GlobalOffsetsVectorSize(Handle regexp, - int registers_per_match, - int* max_matches) { -#ifdef V8_INTERPRETED_REGEXP - // Global loop in interpreted regexp is not implemented. Therefore we choose - // the size of the offsets vector so that it can only store one match. - *max_matches = 1; - return registers_per_match; -#else // V8_INTERPRETED_REGEXP - int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize); - *max_matches = size / registers_per_match; - return size; -#endif // V8_INTERPRETED_REGEXP -} - - -int RegExpImpl::IrregexpExecRaw( - Handle regexp, - Handle subject, - int index, - Vector output) { +int RegExpImpl::IrregexpExecRaw(Handle regexp, + Handle subject, + int index, + int32_t* output, + int output_size) { Isolate* isolate = regexp->GetIsolate(); Handle irregexp(FixedArray::cast(regexp->data()), isolate); @@ -552,15 +563,19 @@ int RegExpImpl::IrregexpExecRaw( bool is_ascii = subject->IsAsciiRepresentationUnderneath(); #ifndef V8_INTERPRETED_REGEXP - ASSERT(output.length() >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); + ASSERT(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2); do { EnsureCompiledIrregexp(regexp, subject, is_ascii); Handle code(IrregexpNativeCode(*irregexp, is_ascii), isolate); + // The stack is used to allocate registers for the compiled regexp code. + // This means that in case of failure, the output registers array is left + // untouched and contains the capture results from the previous successful + // match. We can use that to set the last match info lazily. NativeRegExpMacroAssembler::Result res = NativeRegExpMacroAssembler::Match(code, subject, - output.start(), - output.length(), + output, + output_size, index, isolate); if (res != NativeRegExpMacroAssembler::RETRY) { @@ -587,22 +602,29 @@ int RegExpImpl::IrregexpExecRaw( return RE_EXCEPTION; #else // V8_INTERPRETED_REGEXP - ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp)); + ASSERT(output_size >= IrregexpNumberOfRegisters(*irregexp)); // We must have done EnsureCompiledIrregexp, so we can get the number of // registers. - int* register_vector = output.start(); int number_of_capture_registers = (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; + int32_t* raw_output = &output[number_of_capture_registers]; + // We do not touch the actual capture result registers until we know there + // has been a match so that we can use those capture results to set the + // last match info. for (int i = number_of_capture_registers - 1; i >= 0; i--) { - register_vector[i] = -1; + raw_output[i] = -1; } Handle byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate); IrregexpResult result = IrregexpInterpreter::Match(isolate, byte_codes, subject, - register_vector, + raw_output, index); + if (result == RE_SUCCESS) { + // Copy capture results to the start of the registers array. + memcpy(output, raw_output, number_of_capture_registers * sizeof(int32_t)); + } if (result == RE_EXCEPTION) { ASSERT(!isolate->has_pending_exception()); isolate->StackOverflow(); @@ -612,50 +634,44 @@ int RegExpImpl::IrregexpExecRaw( } -Handle RegExpImpl::IrregexpExec(Handle jsregexp, +Handle RegExpImpl::IrregexpExec(Handle regexp, Handle subject, int previous_index, Handle last_match_info) { - Isolate* isolate = jsregexp->GetIsolate(); - ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP); + Isolate* isolate = regexp->GetIsolate(); + ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); // Prepare space for the return values. -#ifdef V8_INTERPRETED_REGEXP -#ifdef DEBUG +#if defined(V8_INTERPRETED_REGEXP) && defined(DEBUG) if (FLAG_trace_regexp_bytecodes) { - String* pattern = jsregexp->Pattern(); + String* pattern = regexp->Pattern(); PrintF("\n\nRegexp match: /%s/\n\n", *(pattern->ToCString())); PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); } #endif -#endif - int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject); + int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject); if (required_registers < 0) { // Compiling failed with an exception. ASSERT(isolate->has_pending_exception()); return Handle::null(); } - OffsetsVector registers(required_registers, isolate); + int32_t* output_registers = NULL; + if (required_registers > Isolate::kJSRegexpStaticOffsetsVectorSize) { + output_registers = NewArray(required_registers); + } + SmartArrayPointer auto_release(output_registers); + if (output_registers == NULL) { + output_registers = isolate->jsregexp_static_offsets_vector(); + } - int res = RegExpImpl::IrregexpExecRaw(jsregexp, subject, previous_index, - Vector(registers.vector(), - registers.length())); + int res = RegExpImpl::IrregexpExecRaw( + regexp, subject, previous_index, output_registers, required_registers); if (res == RE_SUCCESS) { - int capture_register_count = - (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2; - last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); - AssertNoAllocation no_gc; - int* register_vector = registers.vector(); - FixedArray* array = FixedArray::cast(last_match_info->elements()); - for (int i = 0; i < capture_register_count; i += 2) { - SetCapture(array, i, register_vector[i]); - SetCapture(array, i + 1, register_vector[i + 1]); - } - SetLastCaptureCount(array, capture_register_count); - SetLastSubject(array, *subject); - SetLastInput(array, *subject); - return last_match_info; + int capture_count = + IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())); + return SetLastMatchInfo( + last_match_info, subject, capture_count, output_registers); } if (res == RE_EXCEPTION) { ASSERT(isolate->has_pending_exception()); @@ -666,6 +682,145 @@ Handle RegExpImpl::IrregexpExec(Handle jsregexp, } +Handle RegExpImpl::SetLastMatchInfo(Handle last_match_info, + Handle subject, + int capture_count, + int32_t* match) { + int capture_register_count = (capture_count + 1) * 2; + last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); + AssertNoAllocation no_gc; + FixedArray* array = FixedArray::cast(last_match_info->elements()); + if (match != NULL) { + for (int i = 0; i < capture_register_count; i += 2) { + SetCapture(array, i, match[i]); + SetCapture(array, i + 1, match[i + 1]); + } + } + SetLastCaptureCount(array, capture_register_count); + SetLastSubject(array, *subject); + SetLastInput(array, *subject); + return last_match_info; +} + + +RegExpImpl::GlobalCache::GlobalCache(Handle regexp, + Handle subject, + bool is_global, + Isolate* isolate) { +#ifdef V8_INTERPRETED_REGEXP + bool interpreted = true; +#else + bool interpreted = false; +#endif // V8_INTERPRETED_REGEXP + + regexp_ = regexp; + subject_ = subject; + + if (regexp_->TypeTag() == JSRegExp::ATOM) { + static const int kAtomRegistersPerMatch = 2; + registers_per_match_ = kAtomRegistersPerMatch; + // There is no distinction between interpreted and native for atom regexps. + interpreted = false; + } else { + registers_per_match_ = RegExpImpl::IrregexpPrepare(regexp_, subject_); + if (registers_per_match_ < 0) { + num_matches_ = -1; // Signal exception. + return; + } + } + + if (is_global && !interpreted) { + register_array_size_ = + Max(registers_per_match_, Isolate::kJSRegexpStaticOffsetsVectorSize); + max_matches_ = register_array_size_ / registers_per_match_; + } else { + // Global loop in interpreted regexp is not implemented. We choose + // the size of the offsets vector so that it can only store one match. + register_array_size_ = registers_per_match_; + max_matches_ = 1; + } + + if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { + register_array_ = NewArray(register_array_size_); + } else { + register_array_ = isolate->jsregexp_static_offsets_vector(); + } + + // Set state so that fetching the results the first time triggers a call + // to the compiled regexp. + current_match_index_ = max_matches_ - 1; + num_matches_ = max_matches_; + ASSERT(registers_per_match_ >= 2); // Each match has at least one capture. + ASSERT_GE(register_array_size_, registers_per_match_); + int32_t* last_match = + ®ister_array_[current_match_index_ * registers_per_match_]; + last_match[0] = -1; + last_match[1] = 0; +} + + +RegExpImpl::GlobalCache::~GlobalCache() { + // Deallocate the register array if we allocated it in the constructor + // (as opposed to using the existing jsregexp_static_offsets_vector). + if (register_array_size_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { + DeleteArray(register_array_); + } +} + + +int32_t* RegExpImpl::GlobalCache::FetchNext() { + current_match_index_++; + if (current_match_index_ >= num_matches_) { + // Current batch of results exhausted. + // Fail if last batch was not even fully filled. + if (num_matches_ < max_matches_) { + num_matches_ = 0; // Signal failed match. + return NULL; + } + + int32_t* last_match = + ®ister_array_[(current_match_index_ - 1) * registers_per_match_]; + int last_end_index = last_match[1]; + + if (regexp_->TypeTag() == JSRegExp::ATOM) { + num_matches_ = RegExpImpl::AtomExecRaw(regexp_, + subject_, + last_end_index, + register_array_, + register_array_size_); + } else { + int last_start_index = last_match[0]; + if (last_start_index == last_end_index) last_end_index++; + if (last_end_index > subject_->length()) { + num_matches_ = 0; // Signal failed match. + return NULL; + } + num_matches_ = RegExpImpl::IrregexpExecRaw(regexp_, + subject_, + last_end_index, + register_array_, + register_array_size_); + } + + if (num_matches_ <= 0) return NULL; + current_match_index_ = 0; + return register_array_; + } else { + return ®ister_array_[current_match_index_ * registers_per_match_]; + } +} + + +int32_t* RegExpImpl::GlobalCache::LastSuccessfulMatch() { + int index = current_match_index_ * registers_per_match_; + if (num_matches_ == 0) { + // After a failed match we shift back by one result. + index -= registers_per_match_; + } + return ®ister_array_[index]; +} + + // ------------------------------------------------------------------- // Implementation of the Irregexp regular expression engine. // diff --git a/src/jsregexp.h b/src/jsregexp.h index 9a84237..96825ce 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -93,6 +93,14 @@ class RegExpImpl { JSRegExp::Flags flags, Handle match_pattern); + + static int AtomExecRaw(Handle regexp, + Handle subject, + int index, + int32_t* output, + int output_size); + + static Handle AtomExec(Handle regexp, Handle subject, int index, @@ -105,17 +113,11 @@ class RegExpImpl { // This ensures that the regexp is compiled for the subject, and that // the subject is flat. // Returns the number of integer spaces required by IrregexpExecOnce - // as its "registers" argument. If the regexp cannot be compiled, + // as its "registers" argument. If the regexp cannot be compiled, // an exception is set as pending, and this function returns negative. static int IrregexpPrepare(Handle regexp, Handle subject); - // Calculate the size of offsets vector for the case of global regexp - // and the number of matches this vector is able to store. - static int GlobalOffsetsVectorSize(Handle regexp, - int registers_per_match, - int* max_matches); - // Execute a regular expression on the subject, starting from index. // If matching succeeds, return the number of matches. This can be larger // than one in the case of global regular expressions. @@ -125,17 +127,57 @@ class RegExpImpl { static int IrregexpExecRaw(Handle regexp, Handle subject, int index, - Vector registers); + int32_t* output, + int output_size); // Execute an Irregexp bytecode pattern. // On a successful match, the result is a JSArray containing - // captured positions. On a failure, the result is the null value. + // captured positions. On a failure, the result is the null value. // Returns an empty handle in case of an exception. static Handle IrregexpExec(Handle regexp, Handle subject, int index, Handle lastMatchInfo); + // Set last match info. If match is NULL, then setting captures is omitted. + static Handle SetLastMatchInfo(Handle last_match_info, + Handle subject, + int capture_count, + int32_t* match); + + + class GlobalCache { + public: + GlobalCache(Handle regexp, + Handle subject, + bool is_global, + Isolate* isolate); + + ~GlobalCache(); + + // Fetch the next entry in the cache for global regexp match results. + // This does not set the last match info. Upon failure, NULL is returned. + // The cause can be checked with Result(). The previous + // result is still in available in memory when a failure happens. + int32_t* FetchNext(); + + int32_t* LastSuccessfulMatch(); + + inline bool HasException() { return num_matches_ < 0; } + + private: + int num_matches_; + int max_matches_; + int current_match_index_; + int registers_per_match_; + // Pointer to the last set of captures. + int32_t* register_array_; + int register_array_size_; + Handle regexp_; + Handle subject_; + }; + + // Array index in the lastMatchInfo array. static const int kLastCaptureCount = 0; static const int kLastSubject = 1; @@ -195,30 +237,10 @@ class RegExpImpl { static const int kRegWxpCompiledLimit = 1 * MB; private: - static String* last_ascii_string_; - static String* two_byte_cached_string_; - static bool CompileIrregexp( Handle re, Handle sample_subject, bool is_ascii); static inline bool EnsureCompiledIrregexp( Handle re, Handle sample_subject, bool is_ascii); - - - // Set the subject cache. The previous string buffer is not deleted, so the - // caller should ensure that it doesn't leak. - static void SetSubjectCache(String* subject, - char* utf8_subject, - int uft8_length, - int character_position, - int utf8_position); - - // A one element cache of the last utf8_subject string and its length. The - // subject JS String object is cached in the heap. We also cache a - // translation between position and utf8 position. - static char* utf8_subject_cache_; - static int utf8_length_cache_; - static int utf8_position_; - static int character_position_; }; @@ -1622,40 +1644,6 @@ class RegExpEngine: public AllStatic { }; -class OffsetsVector { - public: - inline OffsetsVector(int num_registers, Isolate* isolate) - : offsets_vector_length_(num_registers) { - if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { - vector_ = NewArray(offsets_vector_length_); - } else { - vector_ = isolate->jsregexp_static_offsets_vector(); - } - } - inline ~OffsetsVector() { - if (offsets_vector_length_ > Isolate::kJSRegexpStaticOffsetsVectorSize) { - DeleteArray(vector_); - vector_ = NULL; - } - } - inline int* vector() { return vector_; } - inline int length() { return offsets_vector_length_; } - - static const int kStaticOffsetsVectorSize = - Isolate::kJSRegexpStaticOffsetsVectorSize; - - private: - static Address static_offsets_vector_address(Isolate* isolate) { - return reinterpret_cast
(isolate->jsregexp_static_offsets_vector()); - } - - int* vector_; - int offsets_vector_length_; - - friend class ExternalReference; -}; - - } } // namespace v8::internal #endif // V8_JSREGEXP_H_ diff --git a/src/mips/code-stubs-mips.cc b/src/mips/code-stubs-mips.cc index abc82a7..a5c80b8 100644 --- a/src/mips/code-stubs-mips.cc +++ b/src/mips/code-stubs-mips.cc @@ -4977,7 +4977,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); __ Addu(a2, a2, Operand(2)); // a2 was a smi. // Check that the static offsets vector buffer is large enough. - __ Branch(&runtime, hi, a2, Operand(OffsetsVector::kStaticOffsetsVectorSize)); + __ Branch( + &runtime, hi, a2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize)); // a2: Number of capture registers // regexp_data: RegExp data (FixedArray) diff --git a/src/runtime.cc b/src/runtime.cc index 5b0bfc3..466ee32 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -2574,28 +2574,24 @@ class ReplacementStringBuilder { class CompiledReplacement { public: explicit CompiledReplacement(Zone* zone) - : parts_(1, zone), replacement_substrings_(0, zone), - simple_hint_(false), - zone_(zone) {} + : parts_(1, zone), replacement_substrings_(0, zone), zone_(zone) {} - void Compile(Handle replacement, + // Return whether the replacement is simple. + bool Compile(Handle replacement, int capture_count, int subject_length); + // Use Apply only if Compile returned false. void Apply(ReplacementStringBuilder* builder, int match_from, int match_to, - Handle last_match_info); + int32_t* match); // Number of distinct parts of the replacement pattern. int parts() { return parts_.length(); } - bool simple_hint() { - return simple_hint_; - } - Zone* zone() const { return zone_; } private: @@ -2656,11 +2652,11 @@ class CompiledReplacement { }; template - static bool ParseReplacementPattern(ZoneList* parts, - Vector characters, - int capture_count, - int subject_length, - Zone* zone) { + bool ParseReplacementPattern(ZoneList* parts, + Vector characters, + int capture_count, + int subject_length, + Zone* zone) { int length = characters.length(); int last = 0; for (int i = 0; i < length; i++) { @@ -2754,7 +2750,7 @@ class CompiledReplacement { } if (length > last) { if (last == 0) { - parts->Add(ReplacementPart::ReplacementString(), zone); + // Replacement is simple. Do not use Apply to do the replacement. return true; } else { parts->Add(ReplacementPart::ReplacementSubString(last, length), zone); @@ -2765,33 +2761,35 @@ class CompiledReplacement { ZoneList parts_; ZoneList > replacement_substrings_; - bool simple_hint_; Zone* zone_; }; -void CompiledReplacement::Compile(Handle replacement, +bool CompiledReplacement::Compile(Handle replacement, int capture_count, int subject_length) { { AssertNoAllocation no_alloc; String::FlatContent content = replacement->GetFlatContent(); ASSERT(content.IsFlat()); + bool simple = false; if (content.IsAscii()) { - simple_hint_ = ParseReplacementPattern(&parts_, - content.ToAsciiVector(), - capture_count, - subject_length, - zone()); + simple = ParseReplacementPattern(&parts_, + content.ToAsciiVector(), + capture_count, + subject_length, + zone()); } else { ASSERT(content.IsTwoByte()); - simple_hint_ = ParseReplacementPattern(&parts_, - content.ToUC16Vector(), - capture_count, - subject_length, - zone()); + simple = ParseReplacementPattern(&parts_, + content.ToUC16Vector(), + capture_count, + subject_length, + zone()); } + if (simple) return true; } + Isolate* isolate = replacement->GetIsolate(); // Find substrings of replacement string and create them as String objects. int substring_index = 0; @@ -2811,13 +2809,15 @@ void CompiledReplacement::Compile(Handle replacement, substring_index++; } } + return false; } void CompiledReplacement::Apply(ReplacementStringBuilder* builder, int match_from, int match_to, - Handle last_match_info) { + int32_t* match) { + ASSERT_LT(0, parts_.length()); for (int i = 0, n = parts_.length(); i < n; i++) { ReplacementPart part = parts_[i]; switch (part.tag) { @@ -2833,9 +2833,8 @@ void CompiledReplacement::Apply(ReplacementStringBuilder* builder, } case SUBJECT_CAPTURE: { int capture = part.data; - FixedArray* match_info = FixedArray::cast(last_match_info->elements()); - int from = RegExpImpl::GetCapture(match_info, capture * 2); - int to = RegExpImpl::GetCapture(match_info, capture * 2 + 1); + int from = match[capture * 2]; + int to = match[capture * 2 + 1]; if (from >= 0 && to > from) { builder->AddSubjectSlice(from, to); } @@ -2957,85 +2956,19 @@ void FindStringIndicesDispatch(Isolate* isolate, } -// Two smis before and after the match, for very long strings. -const int kMaxBuilderEntriesPerRegExpMatch = 5; - - -static void SetLastMatchInfoNoCaptures(Handle subject, - Handle last_match_info, - int match_start, - int match_end) { - // Fill last_match_info with a single capture. - last_match_info->EnsureSize(2 + RegExpImpl::kLastMatchOverhead); - AssertNoAllocation no_gc; - FixedArray* elements = FixedArray::cast(last_match_info->elements()); - RegExpImpl::SetLastCaptureCount(elements, 2); - RegExpImpl::SetLastInput(elements, *subject); - RegExpImpl::SetLastSubject(elements, *subject); - RegExpImpl::SetCapture(elements, 0, match_start); - RegExpImpl::SetCapture(elements, 1, match_end); -} - - -template -static bool SearchStringMultiple(Isolate* isolate, - Vector subject, - Vector pattern, - String* pattern_string, - FixedArrayBuilder* builder, - int* match_pos) { - int pos = *match_pos; - int subject_length = subject.length(); - int pattern_length = pattern.length(); - int max_search_start = subject_length - pattern_length; - StringSearch search(isolate, pattern); - while (pos <= max_search_start) { - if (!builder->HasCapacity(kMaxBuilderEntriesPerRegExpMatch)) { - *match_pos = pos; - return false; - } - // Position of end of previous match. - int match_end = pos + pattern_length; - int new_pos = search.Search(subject, match_end); - if (new_pos >= 0) { - // A match. - if (new_pos > match_end) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - new_pos); - } - pos = new_pos; - builder->Add(pattern_string); - } else { - break; - } - } - - if (pos < max_search_start) { - ReplacementStringBuilder::AddSubjectSlice(builder, - pos + pattern_length, - subject_length); - } - *match_pos = pos; - return true; -} - - - - template MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( Isolate* isolate, Handle subject, Handle pattern_regexp, Handle replacement, - Handle last_match_info, - Zone* zone) { + Handle last_match_info) { ASSERT(subject->IsFlat()); ASSERT(replacement->IsFlat()); - ZoneScope zone_space(isolate->runtime_zone(), DELETE_ON_EXIT); - ZoneList indices(8, isolate->runtime_zone()); + Zone* zone = isolate->runtime_zone(); + ZoneScope zone_space(zone, DELETE_ON_EXIT); + ZoneList indices(8, zone); ASSERT_EQ(JSRegExp::ATOM, pattern_regexp->TypeTag()); String* pattern = String::cast(pattern_regexp->DataAt(JSRegExp::kAtomPatternIndex)); @@ -3043,8 +2976,8 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( int pattern_len = pattern->length(); int replacement_len = replacement->length(); - FindStringIndicesDispatch(isolate, *subject, pattern, &indices, 0xffffffff, - zone); + FindStringIndicesDispatch( + isolate, *subject, pattern, &indices, 0xffffffff, zone); int matches = indices.length(); if (matches == 0) return *subject; @@ -3099,10 +3032,9 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( subject_len); } - SetLastMatchInfoNoCaptures(subject, - last_match_info, - indices.at(matches - 1), - indices.at(matches - 1) + pattern_len); + int32_t match_indices[] = { indices.at(matches - 1), + indices.at(matches - 1) + pattern_len }; + RegExpImpl::SetLastMatchInfo(last_match_info, subject, 0, match_indices); return *result; } @@ -3110,139 +3042,101 @@ MUST_USE_RESULT static MaybeObject* StringReplaceAtomRegExpWithString( MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( Isolate* isolate, - String* subject, - JSRegExp* regexp, - String* replacement, - JSArray* last_match_info, - Zone* zone) { + Handle subject, + Handle regexp, + Handle replacement, + Handle last_match_info) { ASSERT(subject->IsFlat()); ASSERT(replacement->IsFlat()); - HandleScope handles(isolate); - - int length = subject->length(); - Handle subject_handle(subject); - Handle regexp_handle(regexp); - Handle replacement_handle(replacement); - Handle last_match_info_handle(last_match_info); - Handle match = RegExpImpl::Exec(regexp_handle, - subject_handle, - 0, - last_match_info_handle); - if (match.is_null()) { - return Failure::Exception(); - } - if (match->IsNull()) { - return *subject_handle; - } - - int capture_count = regexp_handle->CaptureCount(); + bool is_global = regexp->GetFlags().is_global(); + int capture_count = regexp->CaptureCount(); + int subject_length = subject->length(); // CompiledReplacement uses zone allocation. + Zone* zone = isolate->runtime_zone(); ZoneScope zonescope(zone, DELETE_ON_EXIT); CompiledReplacement compiled_replacement(zone); - - compiled_replacement.Compile(replacement_handle, - capture_count, - length); - - bool is_global = regexp_handle->GetFlags().is_global(); + bool simple_replace = compiled_replacement.Compile(replacement, + capture_count, + subject_length); // Shortcut for simple non-regexp global replacements if (is_global && - regexp_handle->TypeTag() == JSRegExp::ATOM && - compiled_replacement.simple_hint()) { - if (subject_handle->HasOnlyAsciiChars() && - replacement_handle->HasOnlyAsciiChars()) { + regexp->TypeTag() == JSRegExp::ATOM && + simple_replace) { + if (subject->HasOnlyAsciiChars() && replacement->HasOnlyAsciiChars()) { return StringReplaceAtomRegExpWithString( - isolate, - subject_handle, - regexp_handle, - replacement_handle, - last_match_info_handle, - zone); - } else { + isolate, subject, regexp, replacement, last_match_info); + } else { return StringReplaceAtomRegExpWithString( - isolate, - subject_handle, - regexp_handle, - replacement_handle, - last_match_info_handle, - zone); + isolate, subject, regexp, replacement, last_match_info); } } + RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate); + if (global_cache.HasException()) return Failure::Exception(); + + int32_t* current_match = global_cache.FetchNext(); + if (current_match == NULL) { + if (global_cache.HasException()) return Failure::Exception(); + return *subject; + } + // Guessing the number of parts that the final result string is built // from. Global regexps can match any number of times, so we guess // conservatively. int expected_parts = (compiled_replacement.parts() + 1) * (is_global ? 4 : 1) + 1; ReplacementStringBuilder builder(isolate->heap(), - subject_handle, + subject, expected_parts); - // Index of end of last match. - int prev = 0; - - // Number of parts added by compiled replacement plus preceeding // string and possibly suffix after last match. It is possible for // all components to use two elements when encoded as two smis. const int parts_added_per_loop = 2 * (compiled_replacement.parts() + 2); - bool matched = true; + + int prev = 0; + do { - ASSERT(last_match_info_handle->HasFastObjectElements()); - // Increase the capacity of the builder before entering local handle-scope, - // so its internal buffer can safely allocate a new handle if it grows. builder.EnsureCapacity(parts_added_per_loop); - HandleScope loop_scope(isolate); - int start, end; - { - AssertNoAllocation match_info_array_is_not_in_a_handle; - FixedArray* match_info_array = - FixedArray::cast(last_match_info_handle->elements()); - - ASSERT_EQ(capture_count * 2 + 2, - RegExpImpl::GetLastCaptureCount(match_info_array)); - start = RegExpImpl::GetCapture(match_info_array, 0); - end = RegExpImpl::GetCapture(match_info_array, 1); - } + int start = current_match[0]; + int end = current_match[1]; if (prev < start) { builder.AddSubjectSlice(prev, start); } - compiled_replacement.Apply(&builder, - start, - end, - last_match_info_handle); + if (simple_replace) { + builder.AddString(replacement); + } else { + compiled_replacement.Apply(&builder, + start, + end, + current_match); + } prev = end; // Only continue checking for global regexps. if (!is_global) break; - // Continue from where the match ended, unless it was an empty match. - int next = end; - if (start == end) { - next = end + 1; - if (next > length) break; - } + current_match = global_cache.FetchNext(); + } while (current_match != NULL); - match = RegExpImpl::Exec(regexp_handle, - subject_handle, - next, - last_match_info_handle); - if (match.is_null()) { - return Failure::Exception(); - } - matched = !match->IsNull(); - } while (matched); + if (global_cache.HasException()) return Failure::Exception(); - if (prev < length) { - builder.AddSubjectSlice(prev, length); + if (prev < subject_length) { + builder.EnsureCapacity(2); + builder.AddSubjectSlice(prev, subject_length); } + RegExpImpl::SetLastMatchInfo(last_match_info, + subject, + capture_count, + global_cache.LastSuccessfulMatch()); + return *(builder.ToString()); } @@ -3250,69 +3144,51 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithString( template MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( Isolate* isolate, - String* subject, - JSRegExp* regexp, - JSArray* last_match_info, - Zone* zone) { + Handle subject, + Handle regexp, + Handle last_match_info) { ASSERT(subject->IsFlat()); - HandleScope handles(isolate); - - Handle subject_handle(subject); - Handle regexp_handle(regexp); - Handle last_match_info_handle(last_match_info); + bool is_global = regexp->GetFlags().is_global(); // Shortcut for simple non-regexp global replacements - if (regexp_handle->GetFlags().is_global() && - regexp_handle->TypeTag() == JSRegExp::ATOM) { - Handle empty_string_handle(HEAP->empty_string()); - if (subject_handle->HasOnlyAsciiChars()) { + if (is_global && + regexp->TypeTag() == JSRegExp::ATOM) { + Handle empty_string(HEAP->empty_string()); + if (subject->HasOnlyAsciiChars()) { return StringReplaceAtomRegExpWithString( isolate, - subject_handle, - regexp_handle, - empty_string_handle, - last_match_info_handle, - zone); + subject, + regexp, + empty_string, + last_match_info); } else { return StringReplaceAtomRegExpWithString( isolate, - subject_handle, - regexp_handle, - empty_string_handle, - last_match_info_handle, - zone); + subject, + regexp, + empty_string, + last_match_info); } } - Handle match = RegExpImpl::Exec(regexp_handle, - subject_handle, - 0, - last_match_info_handle); - if (match.is_null()) return Failure::Exception(); - if (match->IsNull()) return *subject_handle; - - ASSERT(last_match_info_handle->HasFastObjectElements()); + RegExpImpl::GlobalCache global_cache(regexp, subject, is_global, isolate); + if (global_cache.HasException()) return Failure::Exception(); - int start, end; - { - AssertNoAllocation match_info_array_is_not_in_a_handle; - FixedArray* match_info_array = - FixedArray::cast(last_match_info_handle->elements()); - - start = RegExpImpl::GetCapture(match_info_array, 0); - end = RegExpImpl::GetCapture(match_info_array, 1); + int32_t* current_match = global_cache.FetchNext(); + if (current_match == NULL) { + if (global_cache.HasException()) return Failure::Exception(); + return *subject; } - bool global = regexp_handle->GetFlags().is_global(); + int start = current_match[0]; + int end = current_match[1]; + int capture_count = regexp->CaptureCount(); + int subject_length = subject->length(); - if (start == end && !global) return *subject_handle; + int new_length = subject_length - (end - start); + if (new_length == 0) return isolate->heap()->empty_string(); - int length = subject_handle->length(); - int new_length = length - (end - start); - if (new_length == 0) { - return isolate->heap()->empty_string(); - } Handle answer; if (ResultSeqString::kHasAsciiEncoding) { answer = Handle::cast( @@ -3322,73 +3198,55 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( isolate->factory()->NewRawTwoByteString(new_length)); } - // If the regexp isn't global, only match once. - if (!global) { - if (start > 0) { - String::WriteToFlat(*subject_handle, - answer->GetChars(), - 0, - start); - } - if (end < length) { - String::WriteToFlat(*subject_handle, - answer->GetChars() + start, - end, - length); + if (!is_global) { + RegExpImpl::SetLastMatchInfo( + last_match_info, subject, capture_count, current_match); + if (start == end) { + return *subject; + } else { + if (start > 0) { + String::WriteToFlat(*subject, answer->GetChars(), 0, start); + } + if (end < subject_length) { + String::WriteToFlat( + *subject, answer->GetChars() + start, end, subject_length); + } + return *answer; } - return *answer; } - int prev = 0; // Index of end of last match. - int next = 0; // Start of next search (prev unless last match was empty). + int prev = 0; int position = 0; do { + start = current_match[0]; + end = current_match[1]; if (prev < start) { // Add substring subject[prev;start] to answer string. - String::WriteToFlat(*subject_handle, - answer->GetChars() + position, - prev, - start); + String::WriteToFlat( + *subject, answer->GetChars() + position, prev, start); position += start - prev; } prev = end; - next = end; - // Continue from where the match ended, unless it was an empty match. - if (start == end) { - next++; - if (next > length) break; - } - match = RegExpImpl::Exec(regexp_handle, - subject_handle, - next, - last_match_info_handle); - if (match.is_null()) return Failure::Exception(); - if (match->IsNull()) break; - - ASSERT(last_match_info_handle->HasFastObjectElements()); - HandleScope loop_scope(isolate); - { - AssertNoAllocation match_info_array_is_not_in_a_handle; - FixedArray* match_info_array = - FixedArray::cast(last_match_info_handle->elements()); - start = RegExpImpl::GetCapture(match_info_array, 0); - end = RegExpImpl::GetCapture(match_info_array, 1); - } - } while (true); - if (prev < length) { + current_match = global_cache.FetchNext(); + } while (current_match != NULL); + + if (global_cache.HasException()) return Failure::Exception(); + + RegExpImpl::SetLastMatchInfo(last_match_info, + subject, + capture_count, + global_cache.LastSuccessfulMatch()); + + if (prev < subject_length) { // Add substring subject[prev;length] to answer string. - String::WriteToFlat(*subject_handle, - answer->GetChars() + position, - prev, - length); - position += length - prev; + String::WriteToFlat( + *subject, answer->GetChars() + position, prev, subject_length); + position += subject_length - prev; } - if (position == 0) { - return isolate->heap()->empty_string(); - } + if (position == 0) return isolate->heap()->empty_string(); // Shorten string and fill int string_size = ResultSeqString::SizeFor(position); @@ -3411,50 +3269,31 @@ MUST_USE_RESULT static MaybeObject* StringReplaceRegExpWithEmptyString( RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { ASSERT(args.length() == 4); - CONVERT_ARG_CHECKED(String, subject, 0); - if (!subject->IsFlat()) { - Object* flat_subject; - { MaybeObject* maybe_flat_subject = subject->TryFlatten(); - if (!maybe_flat_subject->ToObject(&flat_subject)) { - return maybe_flat_subject; - } - } - subject = String::cast(flat_subject); - } + HandleScope scope(isolate); - CONVERT_ARG_CHECKED(String, replacement, 2); - if (!replacement->IsFlat()) { - Object* flat_replacement; - { MaybeObject* maybe_flat_replacement = replacement->TryFlatten(); - if (!maybe_flat_replacement->ToObject(&flat_replacement)) { - return maybe_flat_replacement; - } - } - replacement = String::cast(flat_replacement); - } + CONVERT_ARG_HANDLE_CHECKED(String, subject, 0); + CONVERT_ARG_HANDLE_CHECKED(String, replacement, 2); + CONVERT_ARG_HANDLE_CHECKED(JSRegExp, regexp, 1); + CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); - CONVERT_ARG_CHECKED(JSRegExp, regexp, 1); - CONVERT_ARG_CHECKED(JSArray, last_match_info, 3); + if (!subject->IsFlat()) subject = FlattenGetString(subject); + + if (!replacement->IsFlat()) replacement = FlattenGetString(replacement); ASSERT(last_match_info->HasFastObjectElements()); - Zone* zone = isolate->runtime_zone(); if (replacement->length() == 0) { if (subject->HasOnlyAsciiChars()) { return StringReplaceRegExpWithEmptyString( - isolate, subject, regexp, last_match_info, zone); + isolate, subject, regexp, last_match_info); } else { return StringReplaceRegExpWithEmptyString( - isolate, subject, regexp, last_match_info, zone); + isolate, subject, regexp, last_match_info); } } - return StringReplaceRegExpWithString(isolate, - subject, - regexp, - replacement, - last_match_info, - zone); + return StringReplaceRegExpWithString( + isolate, subject, regexp, replacement, last_match_info); } @@ -3777,46 +3616,45 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { CONVERT_ARG_HANDLE_CHECKED(JSArray, regexp_info, 2); HandleScope handles; - Handle match = RegExpImpl::Exec(regexp, subject, 0, regexp_info); + RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); + if (global_cache.HasException()) return Failure::Exception(); - if (match.is_null()) { - return Failure::Exception(); - } - if (match->IsNull()) { - return isolate->heap()->null_value(); - } - int length = subject->length(); + int capture_count = regexp->CaptureCount(); Zone* zone = isolate->runtime_zone(); ZoneScope zone_space(zone, DELETE_ON_EXIT); ZoneList offsets(8, zone); - int start; - int end; - do { - { - AssertNoAllocation no_alloc; - FixedArray* elements = FixedArray::cast(regexp_info->elements()); - start = Smi::cast(elements->get(RegExpImpl::kFirstCapture))->value(); - end = Smi::cast(elements->get(RegExpImpl::kFirstCapture + 1))->value(); - } - offsets.Add(start, zone); - offsets.Add(end, zone); - if (start == end) if (++end > length) break; - match = RegExpImpl::Exec(regexp, subject, end, regexp_info); - if (match.is_null()) { - return Failure::Exception(); - } - } while (!match->IsNull()); + + while (true) { + int32_t* match = global_cache.FetchNext(); + if (match == NULL) break; + offsets.Add(match[0], zone); // start + offsets.Add(match[1], zone); // end + } + + if (global_cache.HasException()) return Failure::Exception(); + + if (offsets.length() == 0) { + // Not a single match. + return isolate->heap()->null_value(); + } + + RegExpImpl::SetLastMatchInfo(regexp_info, + subject, + capture_count, + global_cache.LastSuccessfulMatch()); + int matches = offsets.length() / 2; Handle elements = isolate->factory()->NewFixedArray(matches); - Handle substring = isolate->factory()-> - NewSubString(subject, offsets.at(0), offsets.at(1)); + Handle substring = + isolate->factory()->NewSubString(subject, offsets.at(0), offsets.at(1)); elements->set(0, *substring); - for (int i = 1; i < matches ; i++) { + for (int i = 1; i < matches; i++) { + HandleScope temp_scope(isolate); int from = offsets.at(i * 2); int to = offsets.at(i * 2 + 1); - Handle substring = isolate->factory()-> - NewProperSubString(subject, from, to); + Handle substring = + isolate->factory()->NewProperSubString(subject, from, to); elements->set(i, *substring); } Handle result = isolate->factory()->NewJSArrayWithElements(elements); @@ -3825,294 +3663,104 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringMatch) { } -static bool SearchStringMultiple(Isolate* isolate, - Handle subject, - Handle pattern, - Handle last_match_info, - FixedArrayBuilder* builder) { - ASSERT(subject->IsFlat()); - ASSERT(pattern->IsFlat()); - - // Treating as if a previous match was before first character. - int match_pos = -pattern->length(); - - for (;;) { // Break when search complete. - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - AssertNoAllocation no_gc; - String::FlatContent subject_content = subject->GetFlatContent(); - String::FlatContent pattern_content = pattern->GetFlatContent(); - if (subject_content.IsAscii()) { - Vector subject_vector = subject_content.ToAsciiVector(); - if (pattern_content.IsAscii()) { - if (SearchStringMultiple(isolate, - subject_vector, - pattern_content.ToAsciiVector(), - *pattern, - builder, - &match_pos)) break; - } else { - if (SearchStringMultiple(isolate, - subject_vector, - pattern_content.ToUC16Vector(), - *pattern, - builder, - &match_pos)) break; - } - } else { - Vector subject_vector = subject_content.ToUC16Vector(); - if (pattern_content.IsAscii()) { - if (SearchStringMultiple(isolate, - subject_vector, - pattern_content.ToAsciiVector(), - *pattern, - builder, - &match_pos)) break; - } else { - if (SearchStringMultiple(isolate, - subject_vector, - pattern_content.ToUC16Vector(), - *pattern, - builder, - &match_pos)) break; - } - } - } - - if (match_pos >= 0) { - SetLastMatchInfoNoCaptures(subject, - last_match_info, - match_pos, - match_pos + pattern->length()); - return true; - } - return false; // No matches at all. -} - - -static int SearchRegExpNoCaptureMultiple( - Isolate* isolate, - Handle subject, - Handle regexp, - Handle last_match_array, - FixedArrayBuilder* builder) { - ASSERT(subject->IsFlat()); - ASSERT(regexp->CaptureCount() == 0); - int match_start = -1; - int match_end = 0; - int pos = 0; - int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject); - if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION; - - int max_matches; - int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp, - registers_per_match, - &max_matches); - OffsetsVector registers(num_registers, isolate); - Vector register_vector(registers.vector(), registers.length()); - int subject_length = subject->length(); - bool first = true; - for (;;) { // Break on failure, return on exception. - int num_matches = RegExpImpl::IrregexpExecRaw(regexp, - subject, - pos, - register_vector); - if (num_matches > 0) { - for (int match_index = 0; match_index < num_matches; match_index++) { - int32_t* current_match = ®ister_vector[match_index * 2]; - match_start = current_match[0]; - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - if (match_end < match_start) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - match_start); - } - match_end = current_match[1]; - HandleScope loop_scope(isolate); - if (!first) { - builder->Add(*isolate->factory()->NewProperSubString(subject, - match_start, - match_end)); - } else { - builder->Add(*isolate->factory()->NewSubString(subject, - match_start, - match_end)); - first = false; - } - } - - // If we did not get the maximum number of matches, we can stop here - // since there are no matches left. - if (num_matches < max_matches) break; - - if (match_start != match_end) { - pos = match_end; - } else { - pos = match_end + 1; - if (pos > subject_length) break; - } - } else if (num_matches == 0) { - break; - } else { - ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION); - return RegExpImpl::RE_EXCEPTION; - } - } - - if (match_start >= 0) { - if (match_end < subject_length) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - subject_length); - } - SetLastMatchInfoNoCaptures(subject, - last_match_array, - match_start, - match_end); - return RegExpImpl::RE_SUCCESS; - } else { - return RegExpImpl::RE_FAILURE; // No matches at all. - } -} - - // Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain // separate last match info. See comment on that function. +template static int SearchRegExpMultiple( Isolate* isolate, Handle subject, Handle regexp, Handle last_match_array, - FixedArrayBuilder* builder, - Zone* zone) { - + FixedArrayBuilder* builder) { ASSERT(subject->IsFlat()); - int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject); - if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION; + ASSERT_NE(has_capture, regexp->CaptureCount() == 0); - int max_matches; - int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp, - registers_per_match, - &max_matches); - OffsetsVector registers(num_registers, isolate); - Vector register_vector(registers.vector(), registers.length()); - - int num_matches = RegExpImpl::IrregexpExecRaw(regexp, - subject, - 0, - register_vector); + RegExpImpl::GlobalCache global_cache(regexp, subject, true, isolate); + if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION; int capture_count = regexp->CaptureCount(); int subject_length = subject->length(); // Position to search from. - int pos = 0; - // End of previous match. Differs from pos if match was empty. + int match_start = -1; int match_end = 0; bool first = true; - if (num_matches > 0) { - do { - int match_start = 0; - for (int match_index = 0; match_index < num_matches; match_index++) { - int32_t* current_match = - ®ister_vector[match_index * registers_per_match]; - match_start = current_match[0]; - builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); - if (match_end < match_start) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - match_start); - } - match_end = current_match[1]; - - { - // Avoid accumulating new handles inside loop. - HandleScope temp_scope(isolate); - // Arguments array to replace function is match, captures, index and - // subject, i.e., 3 + capture count in total. - Handle elements = - isolate->factory()->NewFixedArray(3 + capture_count); - Handle match; - if (!first) { - match = isolate->factory()->NewProperSubString(subject, - match_start, - match_end); - } else { - match = isolate->factory()->NewSubString(subject, - match_start, - match_end); - } - elements->set(0, *match); - for (int i = 1; i <= capture_count; i++) { - int start = current_match[i * 2]; - if (start >= 0) { - int end = current_match[i * 2 + 1]; - ASSERT(start <= end); - Handle substring; - if (!first) { - substring = - isolate->factory()->NewProperSubString(subject, start, end); - } else { - substring = - isolate->factory()->NewSubString(subject, start, end); - } - elements->set(i, *substring); - } else { - ASSERT(current_match[i * 2 + 1] < 0); - elements->set(i, isolate->heap()->undefined_value()); - } - } - elements->set(capture_count + 1, Smi::FromInt(match_start)); - elements->set(capture_count + 2, *subject); - builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); - } + // Two smis before and after the match, for very long strings. + static const int kMaxBuilderEntriesPerRegExpMatch = 5; + + while (true) { + int32_t* current_match = global_cache.FetchNext(); + if (current_match == NULL) break; + match_start = current_match[0]; + builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch); + if (match_end < match_start) { + ReplacementStringBuilder::AddSubjectSlice(builder, + match_end, + match_start); + } + match_end = current_match[1]; + { + // Avoid accumulating new handles inside loop. + HandleScope temp_scope(isolate); + Handle match; + if (!first) { + match = isolate->factory()->NewProperSubString(subject, + match_start, + match_end); + } else { + match = isolate->factory()->NewSubString(subject, + match_start, + match_end); first = false; } - // If we did not get the maximum number of matches, we can stop here - // since there are no matches left. - if (num_matches < max_matches) break; - - if (match_end > match_start) { - pos = match_end; - } else { - pos = match_end + 1; - if (pos > subject_length) { - break; + if (has_capture) { + // Arguments array to replace function is match, captures, index and + // subject, i.e., 3 + capture count in total. + Handle elements = + isolate->factory()->NewFixedArray(3 + capture_count); + + elements->set(0, *match); + for (int i = 1; i <= capture_count; i++) { + int start = current_match[i * 2]; + if (start >= 0) { + int end = current_match[i * 2 + 1]; + ASSERT(start <= end); + Handle substring = + isolate->factory()->NewSubString(subject, start, end); + elements->set(i, *substring); + } else { + ASSERT(current_match[i * 2 + 1] < 0); + elements->set(i, isolate->heap()->undefined_value()); + } } + elements->set(capture_count + 1, Smi::FromInt(match_start)); + elements->set(capture_count + 2, *subject); + builder->Add(*isolate->factory()->NewJSArrayWithElements(elements)); + } else { + builder->Add(*match); } + } + } - num_matches = RegExpImpl::IrregexpExecRaw(regexp, - subject, - pos, - register_vector); - } while (num_matches > 0); - - if (num_matches != RegExpImpl::RE_EXCEPTION) { - // Finished matching, with at least one match. - if (match_end < subject_length) { - ReplacementStringBuilder::AddSubjectSlice(builder, - match_end, - subject_length); - } + if (global_cache.HasException()) return RegExpImpl::RE_EXCEPTION; - int last_match_capture_count = (capture_count + 1) * 2; - int last_match_array_size = - last_match_capture_count + RegExpImpl::kLastMatchOverhead; - last_match_array->EnsureSize(last_match_array_size); - AssertNoAllocation no_gc; - FixedArray* elements = FixedArray::cast(last_match_array->elements()); - // We have to set this even though the rest of the last match array is - // ignored. - RegExpImpl::SetLastCaptureCount(elements, last_match_capture_count); - // These are also read without consulting the override. - RegExpImpl::SetLastSubject(elements, *subject); - RegExpImpl::SetLastInput(elements, *subject); - return RegExpImpl::RE_SUCCESS; + if (match_start >= 0) { + // Finished matching, with at least one match. + if (match_end < subject_length) { + ReplacementStringBuilder::AddSubjectSlice(builder, + match_end, + subject_length); } + + RegExpImpl::SetLastMatchInfo( + last_match_array, subject, capture_count, NULL); + + return RegExpImpl::RE_SUCCESS; + } else { + return RegExpImpl::RE_FAILURE; // No matches at all. } - // No matches at all, return failure or exception result directly. - return num_matches; } @@ -4141,34 +3789,15 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { } FixedArrayBuilder builder(result_elements); - if (regexp->TypeTag() == JSRegExp::ATOM) { - Handle pattern( - String::cast(regexp->DataAt(JSRegExp::kAtomPatternIndex))); - ASSERT(pattern->IsFlat()); - if (SearchStringMultiple(isolate, subject, pattern, - last_match_info, &builder)) { - return *builder.ToJSArray(result_array); - } - return isolate->heap()->null_value(); - } - - ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); - int result; if (regexp->CaptureCount() == 0) { - result = SearchRegExpNoCaptureMultiple(isolate, - subject, - regexp, - last_match_info, - &builder); + result = SearchRegExpMultiple( + isolate, subject, regexp, last_match_info, &builder); } else { - result = SearchRegExpMultiple(isolate, - subject, - regexp, - last_match_info, - &builder, - isolate->runtime_zone()); + result = SearchRegExpMultiple( + isolate, subject, regexp, last_match_info, &builder); } + if (result == RegExpImpl::RE_SUCCESS) return *builder.ToJSArray(result_array); if (result == RegExpImpl::RE_FAILURE) return isolate->heap()->null_value(); ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION); diff --git a/src/unicode-inl.h b/src/unicode-inl.h index 9c0ebf9..ec9c69f 100644 --- a/src/unicode-inl.h +++ b/src/unicode-inl.h @@ -29,6 +29,7 @@ #define V8_UNICODE_INL_H_ #include "unicode.h" +#include "checks.h" namespace unibrow { @@ -144,6 +145,7 @@ uchar CharacterStream::GetNext() { } else { remaining_--; } + ASSERT(BoundsCheck(cursor_)); return result; } diff --git a/src/unicode.h b/src/unicode.h index 94ab1b4..91b16c9 100644 --- a/src/unicode.h +++ b/src/unicode.h @@ -201,6 +201,7 @@ class CharacterStream { protected: virtual void FillBuffer() = 0; + virtual bool BoundsCheck(unsigned offset) = 0; // The number of characters left in the current buffer unsigned remaining_; // The current offset within the buffer @@ -228,6 +229,9 @@ class InputBuffer : public CharacterStream { InputBuffer() { } explicit InputBuffer(Input input) { Reset(input); } virtual void FillBuffer(); + virtual bool BoundsCheck(unsigned offset) { + return (buffer_ != util_buffer_) || (offset < kSize); + } // A custom offset that can be used by the string implementation to // mark progress within the encoded string. diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc index 832616e..3fa93b2 100644 --- a/src/x64/code-stubs-x64.cc +++ b/src/x64/code-stubs-x64.cc @@ -2791,7 +2791,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // Calculate number of capture registers (number_of_captures + 1) * 2. __ leal(rdx, Operand(rdx, rdx, times_1, 2)); // Check that the static offsets vector buffer is large enough. - __ cmpl(rdx, Immediate(OffsetsVector::kStaticOffsetsVectorSize)); + __ cmpl(rdx, Immediate(Isolate::kJSRegexpStaticOffsetsVectorSize)); __ j(above, &runtime); // rax: RegExp data (FixedArray) diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc index 50356e7..e433b92 100644 --- a/test/cctest/test-regexp.cc +++ b/test/cctest/test-regexp.cc @@ -267,6 +267,7 @@ TEST(Parser) { CHECK_PARSE_EQ("\\u003z", "'u003z'"); CHECK_PARSE_EQ("foo[z]*", "(: 'foo' (# 0 - g [z]))"); + CHECK_SIMPLE("", false); CHECK_SIMPLE("a", true); CHECK_SIMPLE("a|b", false); CHECK_SIMPLE("a\\n", false); @@ -1349,7 +1350,7 @@ TEST(MacroAssembler) { V8::Initialize(NULL); byte codes[1024]; RegExpMacroAssemblerIrregexp m(Vector(codes, 1024), - Isolate::Current()->zone()); + Isolate::Current()->runtime_zone()); // ^f(o)o. Label fail, fail2, start; uc16 foo_chars[3]; diff --git a/test/cctest/test-strings.cc b/test/cctest/test-strings.cc index c4f72f4..4557100 100644 --- a/test/cctest/test-strings.cc +++ b/test/cctest/test-strings.cc @@ -1,4 +1,4 @@ -// Copyright 2011 the V8 project authors. All rights reserved. +// Copyright 2012 the V8 project authors. All rights reserved. // Check that we can traverse very deep stacks of ConsStrings using // StringInputBuffer. Check that Get(int) works on very deep stacks @@ -691,3 +691,20 @@ TEST(RegExpOverflow) { CHECK(result.IsEmpty()); CHECK(context->HasOutOfMemoryException()); } + + +TEST(StringReplaceAtomTwoByteResult) { + InitializeVM(); + HandleScope scope; + LocalContext context; + v8::Local result = CompileRun( + "var subject = 'ascii~only~string~'; " + "var replace = '\x80'; " + "subject.replace(/~/g, replace); "); + CHECK(result->IsString()); + Handle string = v8::Utils::OpenHandle(v8::String::Cast(*result)); + CHECK(string->IsSeqTwoByteString()); + + v8::Local expected = v8_str("ascii\x80only\x80string\x80"); + CHECK(expected->Equals(result)); +} diff --git a/test/mjsunit/regexp-global.js b/test/mjsunit/regexp-global.js index 1652774..093dba1 100644 --- a/test/mjsunit/regexp-global.js +++ b/test/mjsunit/regexp-global.js @@ -239,4 +239,16 @@ for (var m = 0; m < 200; m++) { // Test 3a: String.match. test_match(test_3_expectation, subject, /a1/g); -} \ No newline at end of file +} + + +// Test String hashing (compiling regular expression includes hashing). +var crosscheck = "\x80"; +for (var i = 0; i < 12; i++) crosscheck += crosscheck; +new RegExp(crosscheck); + +var subject = "ascii~only~string~here~"; +var replacement = "\x80"; +var result = subject.replace(/~/g, replacement); +for (var i = 0; i < 5; i++) result += result; +new RegExp(result);