From e9e8628380329d95a2aed91a6690882443023cf2 Mon Sep 17 00:00:00 2001 From: "kasperl@chromium.org" Date: Thu, 5 Mar 2009 15:23:17 +0000 Subject: [PATCH] Revert revisions 1383, 1384, 1391, 1398, 1401, 1402, 1418, and 1419 from bleeding_edge until we have a fix for the crashers we see on the distributed test infra- structure. We know that revision 1383 is causing issues, but I had to revert some of the other recent RegExp changes in order to get this part out. Review URL: http://codereview.chromium.org/39186 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1429 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/factory.cc | 32 +- src/factory.h | 20 +- src/jsregexp.cc | 590 +++++++++++++++------------------- src/jsregexp.h | 103 ++---- src/macros.py | 21 +- src/objects-debug.cc | 15 +- src/objects-inl.h | 7 - src/objects.cc | 16 - src/objects.h | 44 +-- src/regexp-delay.js | 151 ++++----- src/runtime.cc | 23 +- src/runtime.h | 4 +- src/string.js | 157 ++++----- test/mjsunit/regexp-static.js | 5 - test/mjsunit/regexp-string-methods.js | 51 --- tools/js2c.py | 4 +- 16 files changed, 440 insertions(+), 803 deletions(-) delete mode 100644 test/mjsunit/regexp-string-methods.js diff --git a/src/factory.cc b/src/factory.cc index 2cf411e..ec52520 100644 --- a/src/factory.cc +++ b/src/factory.cc @@ -826,13 +826,12 @@ Handle Factory::ObjectLiteralMapFromCache(Handle context, } -void Factory::SetRegExpAtomData(Handle regexp, - JSRegExp::Type type, - Handle source, - JSRegExp::Flags flags, - Handle data) { - Handle store = NewFixedArray(JSRegExp::kAtomDataSize); - +void Factory::SetRegExpData(Handle regexp, + JSRegExp::Type type, + Handle source, + JSRegExp::Flags flags, + Handle data) { + Handle store = NewFixedArray(JSRegExp::kDataSize); store->set(JSRegExp::kTagIndex, Smi::FromInt(type)); store->set(JSRegExp::kSourceIndex, *source); store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value())); @@ -840,25 +839,6 @@ void Factory::SetRegExpAtomData(Handle regexp, regexp->set_data(*store); } -void Factory::SetRegExpIrregexpData(Handle regexp, - JSRegExp::Type type, - Handle source, - JSRegExp::Flags flags, - int capture_count) { - Handle store = NewFixedArray(JSRegExp::kIrregexpDataSize); - - store->set(JSRegExp::kTagIndex, Smi::FromInt(type)); - store->set(JSRegExp::kSourceIndex, *source); - store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value())); - store->set(JSRegExp::kIrregexpASCIICodeIndex, Heap::the_hole_value()); - store->set(JSRegExp::kIrregexpUC16CodeIndex, Heap::the_hole_value()); - store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(0)); - store->set(JSRegExp::kIrregexpCaptureCountIndex, - Smi::FromInt(capture_count)); - regexp->set_data(*store); -} - - void Factory::ConfigureInstance(Handle desc, Handle instance, diff --git a/src/factory.h b/src/factory.h index 143235c..f282896 100644 --- a/src/factory.h +++ b/src/factory.h @@ -316,20 +316,12 @@ class Factory : public AllStatic { Handle keys); // Creates a new FixedArray that holds the data associated with the - // atom regexp and stores it in the regexp. - static void SetRegExpAtomData(Handle regexp, - JSRegExp::Type type, - Handle source, - JSRegExp::Flags flags, - Handle match_pattern); - - // Creates a new FixedArray that holds the data associated with the - // irregexp regexp and stores it in the regexp. - static void SetRegExpIrregexpData(Handle regexp, - JSRegExp::Type type, - Handle source, - JSRegExp::Flags flags, - int capture_count); + // regexp and stores it in the regexp. + static void SetRegExpData(Handle regexp, + JSRegExp::Type type, + Handle source, + JSRegExp::Flags flags, + Handle data); private: static Handle NewFunctionHelper(Handle name, diff --git a/src/jsregexp.cc b/src/jsregexp.cc index a2f0589..324d0f9 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -213,54 +213,55 @@ Handle RegExpImpl::Compile(Handle re, Handle result; if (in_cache) { re->set_data(*cached); - return re; - } - FlattenString(pattern); - ZoneScope zone_scope(DELETE_ON_EXIT); - RegExpCompileData parse_result; - FlatStringReader reader(pattern); - if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { - // Throw an exception if we fail to parse the pattern. - ThrowRegExpException(re, - pattern, - parse_result.error, - "malformed_regexp"); - return Handle::null(); - } - - if (parse_result.simple && !flags.is_ignore_case()) { - // Parse-tree is a single atom that is equal to the pattern. - AtomCompile(re, pattern, flags, pattern); - } else if (parse_result.tree->IsAtom() && - !flags.is_ignore_case() && - parse_result.capture_count == 0) { - RegExpAtom* atom = parse_result.tree->AsAtom(); - Vector atom_pattern = atom->data(); - Handle atom_string = Factory::NewStringFromTwoByte(atom_pattern); - AtomCompile(re, pattern, flags, atom_string); + result = re; } else { - IrregexpPrepare(re, pattern, flags, parse_result.capture_count); + FlattenString(pattern); + ZoneScope zone_scope(DELETE_ON_EXIT); + RegExpCompileData parse_result; + FlatStringReader reader(pattern); + if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { + // Throw an exception if we fail to parse the pattern. + ThrowRegExpException(re, + pattern, + parse_result.error, + "malformed_regexp"); + return Handle::null(); + } + + if (parse_result.simple && !flags.is_ignore_case()) { + // Parse-tree is a single atom that is equal to the pattern. + result = AtomCompile(re, pattern, flags, pattern); + } else if (parse_result.tree->IsAtom() && + !flags.is_ignore_case() && + parse_result.capture_count == 0) { + RegExpAtom* atom = parse_result.tree->AsAtom(); + Vector atom_pattern = atom->data(); + Handle atom_string = Factory::NewStringFromTwoByte(atom_pattern); + result = AtomCompile(re, pattern, flags, atom_string); + } else { + result = IrregexpPrepare(re, pattern, flags); + } + Object* data = re->data(); + if (data->IsFixedArray()) { + // If compilation succeeded then the data is set on the regexp + // and we can store it in the cache. + Handle data(FixedArray::cast(re->data())); + CompilationCache::PutRegExp(pattern, flags, data); + } } - ASSERT(re->data()->IsFixedArray()); - // Compilation succeeded so the data is set on the regexp - // and we can store it in the cache. - Handle data(FixedArray::cast(re->data())); - CompilationCache::PutRegExp(pattern, flags, data); - return re; + return result; } Handle RegExpImpl::Exec(Handle regexp, Handle subject, - int index, - Handle last_match_info) { + Handle index) { switch (regexp->TypeTag()) { case JSRegExp::ATOM: - return AtomExec(regexp, subject, index, last_match_info); + return AtomExec(regexp, subject, index); case JSRegExp::IRREGEXP: { - Handle result = - IrregexpExec(regexp, subject, index, last_match_info); + Handle result = IrregexpExec(regexp, subject, index); ASSERT(!result.is_null() || Top::has_pending_exception()); return result; } @@ -272,14 +273,12 @@ Handle RegExpImpl::Exec(Handle regexp, Handle RegExpImpl::ExecGlobal(Handle regexp, - Handle subject, - Handle last_match_info) { + Handle subject) { switch (regexp->TypeTag()) { case JSRegExp::ATOM: - return AtomExecGlobal(regexp, subject, last_match_info); + return AtomExecGlobal(regexp, subject); case JSRegExp::IRREGEXP: { - Handle result = - IrregexpExecGlobal(regexp, subject, last_match_info); + Handle result = IrregexpExecGlobal(regexp, subject); ASSERT(!result.is_null() || Top::has_pending_exception()); return result; } @@ -293,95 +292,60 @@ Handle RegExpImpl::ExecGlobal(Handle regexp, // RegExp Atom implementation: Simple string search using indexOf. -void RegExpImpl::AtomCompile(Handle re, - Handle pattern, - JSRegExp::Flags flags, - Handle match_pattern) { - Factory::SetRegExpAtomData(re, - JSRegExp::ATOM, - pattern, - flags, - match_pattern); -} - - -static void SetAtomLastCapture(FixedArray* array, - String* subject, - int from, - int to) { - NoHandleAllocation no_handles; - RegExpImpl::SetLastCaptureCount(array, 2); - RegExpImpl::SetLastSubject(array, subject); - RegExpImpl::SetLastInput(array, subject); - RegExpImpl::SetCapture(array, 0, from); - RegExpImpl::SetCapture(array, 1, to); +Handle RegExpImpl::AtomCompile(Handle re, + Handle pattern, + JSRegExp::Flags flags, + Handle match_pattern) { + Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); + return re; } Handle RegExpImpl::AtomExec(Handle re, Handle subject, - int index, - Handle last_match_info) { + Handle index) { Handle needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); - uint32_t start_index = index; + uint32_t start_index; + if (!Array::IndexFromObject(*index, &start_index)) { + return Handle(Smi::FromInt(-1)); + } int value = Runtime::StringMatch(subject, needle, start_index); if (value == -1) return Factory::null_value(); - ASSERT(last_match_info->HasFastElements()); - { - NoHandleAllocation no_handles; - FixedArray* array = last_match_info->elements(); - SetAtomLastCapture(array, *subject, value, value + needle->length()); - } - return last_match_info; + Handle array = Factory::NewFixedArray(2); + array->set(0, Smi::FromInt(value)); + array->set(1, Smi::FromInt(value + needle->length())); + return Factory::NewJSArrayWithElements(array); } Handle RegExpImpl::AtomExecGlobal(Handle re, - Handle subject, - Handle last_match_info) { + Handle subject) { Handle needle(String::cast(re->DataAt(JSRegExp::kAtomPatternIndex))); - ASSERT(last_match_info->HasFastElements()); Handle result = Factory::NewJSArray(1); int index = 0; int match_count = 0; int subject_length = subject->length(); int needle_length = needle->length(); - int last_value = -1; while (true) { - HandleScope scope; int value = -1; if (index + needle_length <= subject_length) { value = Runtime::StringMatch(subject, needle, index); } - if (value == -1) { - if (last_value != -1) { - Handle array(last_match_info->elements()); - SetAtomLastCapture(*array, - *subject, - last_value, - last_value + needle->length()); - } - break; - } - + if (value == -1) break; + HandleScope scope; int end = value + needle_length; - // Create an array that looks like the static last_match_info array - // that is attached to the global RegExp object. We will be returning - // an array of these. - Handle array = Factory::NewFixedArray(kFirstCapture + 2); - SetCapture(*array, 0, value); - SetCapture(*array, 1, end); - SetLastCaptureCount(*array, 2); + Handle array = Factory::NewFixedArray(2); + array->set(0, Smi::FromInt(value)); + array->set(1, Smi::FromInt(end)); Handle pair = Factory::NewJSArrayWithElements(array); SetElement(result, match_count, pair); match_count++; index = end; if (needle_length == 0) index++; - last_value = value; } return result; } @@ -390,29 +354,23 @@ Handle RegExpImpl::AtomExecGlobal(Handle re, // Irregexp implementation. -// Ensures that the regexp object contains a compiled version of the -// source for either ASCII or non-ASCII strings. -// If the compiled version doesn't already exist, it is compiled +// Retrieves a compiled version of the regexp for either ASCII or non-ASCII +// strings. If the compiled version doesn't already exist, it is compiled // from the source pattern. -// If compilation fails, an exception is thrown and this function -// returns false. -bool RegExpImpl::EnsureCompiledIrregexp(Handle re, - bool is_ascii) { - int index; - if (is_ascii) { - index = JSRegExp::kIrregexpASCIICodeIndex; - } else { - index = JSRegExp::kIrregexpUC16CodeIndex; - } - Object* entry = re->DataAt(index); - if (!entry->IsTheHole()) { - // A value has already been compiled. - if (entry->IsJSObject()) { - // If it's a JS value, it's an error. - Top::Throw(entry); - return false; - } - return true; +// Irregexp is not feature complete yet. If there is something in the +// regexp that the compiler cannot currently handle, an empty +// handle is returned, but no exception is thrown. +static Handle GetCompiledIrregexp(Handle re, + bool is_ascii) { + ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); + Handle alternatives( + FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); + ASSERT_EQ(2, alternatives->length()); + + int index = is_ascii ? 0 : 1; + Object* entry = alternatives->get(index); + if (!entry->IsNull()) { + return Handle(FixedArray::cast(entry)); } // Compile the RegExp. @@ -434,115 +392,77 @@ bool RegExpImpl::EnsureCompiledIrregexp(Handle re, pattern, compile_data.error, "malformed_regexp"); - return false; + return Handle::null(); } - RegExpEngine::CompilationResult result = + Handle compiled_entry = RegExpEngine::Compile(&compile_data, flags.is_ignore_case(), flags.is_multiline(), pattern, is_ascii); - if (result.error_message != NULL) { - // Unable to compile regexp. - Handle array = Factory::NewJSArray(2); - SetElement(array, 0, pattern); - SetElement(array, - 1, - Factory::NewStringFromUtf8(CStrVector(result.error_message))); - Handle regexp_err = - Factory::NewSyntaxError("malformed_regexp", array); - Top::Throw(*regexp_err); - re->SetDataAt(index, *regexp_err); - return false; - } - - NoHandleAllocation no_handles; - - FixedArray* data = FixedArray::cast(re->data()); - data->set(index, result.code); - int register_max = IrregexpMaxRegisterCount(data); - if (result.num_registers > register_max) { - SetIrregexpMaxRegisterCount(data, result.num_registers); + if (!compiled_entry.is_null()) { + alternatives->set(index, *compiled_entry); } - - return true; -} - - -int RegExpImpl::IrregexpMaxRegisterCount(FixedArray* re) { - return Smi::cast( - re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); -} - - -void RegExpImpl::SetIrregexpMaxRegisterCount(FixedArray* re, int value) { - re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(value)); + return compiled_entry; } -int RegExpImpl::IrregexpNumberOfCaptures(FixedArray* re) { - return Smi::cast(re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); +int RegExpImpl::IrregexpNumberOfCaptures(Handle irre) { + return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); } -int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) { - return Smi::cast(re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); +int RegExpImpl::IrregexpNumberOfRegisters(Handle irre) { + return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); } -ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) { - int index; - if (is_ascii) { - index = JSRegExp::kIrregexpASCIICodeIndex; - } else { - index = JSRegExp::kIrregexpUC16CodeIndex; - } - return ByteArray::cast(re->get(index)); +Handle RegExpImpl::IrregexpByteCode(Handle irre) { + ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() + == RegExpMacroAssembler::kBytecodeImplementation); + return Handle(ByteArray::cast(irre->get(kIrregexpCodeIndex))); } -Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) { - int index; - if (is_ascii) { - index = JSRegExp::kIrregexpASCIICodeIndex; - } else { - index = JSRegExp::kIrregexpUC16CodeIndex; - } - return Code::cast(re->get(index)); +Handle RegExpImpl::IrregexpNativeCode(Handle irre) { + ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() + != RegExpMacroAssembler::kBytecodeImplementation); + return Handle(Code::cast(irre->get(kIrregexpCodeIndex))); } -void RegExpImpl::IrregexpPrepare(Handle re, - Handle pattern, - JSRegExp::Flags flags, - int capture_count) { - // Initialize compiled code entries to null. - Factory::SetRegExpIrregexpData(re, - JSRegExp::IRREGEXP, - pattern, - flags, - capture_count); +HandleRegExpImpl::IrregexpPrepare(Handle re, + Handle pattern, + JSRegExp::Flags flags) { + // Make space for ASCII and UC16 versions. + Handle alternatives = Factory::NewFixedArray(2); + alternatives->set_null(0); + alternatives->set_null(1); + Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); + return re; } Handle RegExpImpl::IrregexpExec(Handle regexp, Handle subject, - int index, - Handle last_match_info) { + Handle index) { ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); + ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); - if (!EnsureCompiledIrregexp(regexp, is_ascii)) { + Handle irregexp = GetCompiledIrregexp(regexp, is_ascii); + if (irregexp.is_null()) { + // We can't handle the RegExp with IRRegExp. return Handle::null(); } // Prepare space for the return values. - Handle re_data(FixedArray::cast(regexp->data())); - int number_of_capture_registers = - (IrregexpNumberOfCaptures(*re_data) + 1) * 2; - OffsetsVector offsets(number_of_capture_registers); + int number_of_registers = IrregexpNumberOfRegisters(irregexp); + OffsetsVector offsets(number_of_registers); - int previous_index = index; + int num_captures = IrregexpNumberOfCaptures(irregexp); + + int previous_index = static_cast(DoubleToInteger(index->Number())); #ifdef DEBUG if (FLAG_trace_regexp_bytecodes) { @@ -556,11 +476,8 @@ Handle RegExpImpl::IrregexpExec(Handle regexp, FlattenString(subject); } - last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); - - return IrregexpExecOnce(re_data, - number_of_capture_registers, - last_match_info, + return IrregexpExecOnce(irregexp, + num_captures, subject, previous_index, offsets.vector(), @@ -569,33 +486,29 @@ Handle RegExpImpl::IrregexpExec(Handle regexp, Handle RegExpImpl::IrregexpExecGlobal(Handle regexp, - Handle subject, - Handle last_match_info) { + Handle subject) { ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); - Handle irregexp(FixedArray::cast(regexp->data())); bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); - if (!EnsureCompiledIrregexp(regexp, is_ascii)) { + Handle irregexp = GetCompiledIrregexp(regexp, is_ascii); + if (irregexp.is_null()) { return Handle::null(); } // Prepare space for the return values. - int number_of_capture_registers = - (IrregexpNumberOfCaptures(*irregexp) + 1) * 2; - OffsetsVector offsets(number_of_capture_registers); + int number_of_registers = IrregexpNumberOfRegisters(irregexp); + OffsetsVector offsets(number_of_registers); int previous_index = 0; Handle result = Factory::NewJSArray(0); - int result_length = 0; + int i = 0; Handle matches; if (!subject->IsFlat(StringShape(*subject))) { FlattenString(subject); } - last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); - while (true) { if (previous_index > subject->length() || previous_index < 0) { // Per ECMA-262 15.10.6.2, if the previous index is greater than the @@ -610,10 +523,8 @@ Handle RegExpImpl::IrregexpExecGlobal(Handle regexp, PrintF("\n\nSubject string: '%s'\n\n", *(subject->ToCString())); } #endif - HandleScope scope; matches = IrregexpExecOnce(irregexp, - number_of_capture_registers, - last_match_info, + IrregexpNumberOfCaptures(irregexp), subject, previous_index, offsets.vector(), @@ -625,25 +536,12 @@ Handle RegExpImpl::IrregexpExecGlobal(Handle regexp, } if (matches->IsJSArray()) { - // Create an array that looks like the static last_match_info array - // that is attached to the global RegExp object. We will be returning - // an array of these. - Handle matches_array(JSArray::cast(*matches)->elements()); - Handle latest_match = - Factory::NewJSArray(kFirstCapture + number_of_capture_registers); - Handle latest_match_array(latest_match->elements()); - - for (int i = 0; i < number_of_capture_registers; i++) { - SetCapture(*latest_match_array, i, GetCapture(*matches_array, i)); - } - SetLastCaptureCount(*latest_match_array, number_of_capture_registers); - - SetElement(result, result_length, latest_match); - result_length++; - previous_index = GetCapture(*matches_array, 1); - if (GetCapture(*matches_array, 0) == previous_index) + SetElement(result, i, matches); + i++; + previous_index = offsets.vector()[1]; + if (offsets.vector()[0] == offsets.vector()[1]) { previous_index++; - + } } else { ASSERT(matches->IsNull()); return result; @@ -653,124 +551,131 @@ Handle RegExpImpl::IrregexpExecGlobal(Handle regexp, } -Handle RegExpImpl::IrregexpExecOnce(Handle regexp, - int number_of_capture_registers, - Handle last_match_info, +Handle RegExpImpl::IrregexpExecOnce(Handle irregexp, + int num_captures, Handle subject, int previous_index, int* offsets_vector, int offsets_vector_length) { - StringShape shape(*subject); - ASSERT(subject->IsFlat(shape)); - bool is_ascii = shape.IsAsciiRepresentation(); + ASSERT(subject->IsFlat(StringShape(*subject))); bool rc; - if (FLAG_regexp_native) { + int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); + + switch (tag) { + case RegExpMacroAssembler::kIA32Implementation: { #ifndef ARM - Handle code(IrregexpNativeCode(*regexp, is_ascii)); - - // Character offsets into string. - int start_offset = previous_index; - int end_offset = subject->length(shape); - - if (shape.IsCons()) { - subject = Handle(ConsString::cast(*subject)->first()); - } else if (shape.IsSliced()) { - SlicedString* slice = SlicedString::cast(*subject); - start_offset += slice->start(); - end_offset += slice->start(); - subject = Handle(slice->buffer()); - } + Handle code = IrregexpNativeCode(irregexp); - // String is now either Sequential or External - StringShape flatshape(*subject); - bool is_ascii = flatshape.IsAsciiRepresentation(); - int char_size_shift = is_ascii ? 0 : 1; + StringShape shape(*subject); - RegExpMacroAssemblerIA32::Result res; + // Character offsets into string. + int start_offset = previous_index; + int end_offset = subject->length(shape); - if (flatshape.IsExternal()) { - const byte* address; - if (is_ascii) { - ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); - address = reinterpret_cast(ext->resource()->data()); - } else { - ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); - address = reinterpret_cast(ext->resource()->data()); + if (shape.IsCons()) { + subject = Handle(ConsString::cast(*subject)->first()); + } else if (shape.IsSliced()) { + SlicedString* slice = SlicedString::cast(*subject); + start_offset += slice->start(); + end_offset += slice->start(); + subject = Handle(slice->buffer()); } - res = RegExpMacroAssemblerIA32::Execute( - *code, - const_cast(&address), - start_offset << char_size_shift, - end_offset << char_size_shift, - offsets_vector, - previous_index == 0); - } else { // Sequential string - ASSERT(StringShape(*subject).IsSequential()); - Address char_address = - is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() - : SeqTwoByteString::cast(*subject)->GetCharsAddress(); - int byte_offset = char_address - reinterpret_cast
(*subject); - res = RegExpMacroAssemblerIA32::Execute( - *code, - reinterpret_cast(subject.location()), - byte_offset + (start_offset << char_size_shift), - byte_offset + (end_offset << char_size_shift), - offsets_vector, - previous_index == 0); - } - if (res == RegExpMacroAssemblerIA32::EXCEPTION) { - ASSERT(Top::has_pending_exception()); - return Handle::null(); - } - rc = (res == RegExpMacroAssemblerIA32::SUCCESS); + // String is now either Sequential or External + StringShape flatshape(*subject); + bool is_ascii = flatshape.IsAsciiRepresentation(); + int char_size_shift = is_ascii ? 0 : 1; + + RegExpMacroAssemblerIA32::Result res; - if (rc) { - // Capture values are relative to start_offset only. - for (int i = 0; i < offsets_vector_length; i++) { - if (offsets_vector[i] >= 0) { - offsets_vector[i] += previous_index; + if (flatshape.IsExternal()) { + const byte* address; + if (is_ascii) { + ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); + address = reinterpret_cast(ext->resource()->data()); + } else { + ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); + address = reinterpret_cast(ext->resource()->data()); } + res = RegExpMacroAssemblerIA32::Execute( + *code, + const_cast(&address), + start_offset << char_size_shift, + end_offset << char_size_shift, + offsets_vector, + previous_index == 0); + } else { // Sequential string + ASSERT(StringShape(*subject).IsSequential()); + Address char_address = + is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() + : SeqTwoByteString::cast(*subject)->GetCharsAddress(); + int byte_offset = char_address - reinterpret_cast
(*subject); + res = RegExpMacroAssemblerIA32::Execute( + *code, + reinterpret_cast(subject.location()), + byte_offset + (start_offset << char_size_shift), + byte_offset + (end_offset << char_size_shift), + offsets_vector, + previous_index == 0); } - } - } else { + + if (res == RegExpMacroAssemblerIA32::EXCEPTION) { + ASSERT(Top::has_pending_exception()); + return Handle::null(); + } + rc = (res == RegExpMacroAssemblerIA32::SUCCESS); + + if (rc) { + // Capture values are relative to start_offset only. + for (int i = 0; i < offsets_vector_length; i++) { + if (offsets_vector[i] >= 0) { + offsets_vector[i] += previous_index; + } + } + } + break; #else - // Unimplemented on ARM, fall through to bytecode. - } - { + UNIMPLEMENTED(); + rc = false; + break; #endif - for (int i = number_of_capture_registers - 1; i >= 0; i--) { - offsets_vector[i] = -1; } - Handle byte_codes(IrregexpByteCode(*regexp, is_ascii)); + case RegExpMacroAssembler::kBytecodeImplementation: { + for (int i = (num_captures + 1) * 2 - 1; i >= 0; i--) { + offsets_vector[i] = -1; + } + Handle byte_codes = IrregexpByteCode(irregexp); - rc = IrregexpInterpreter::Match(byte_codes, - subject, - offsets_vector, - previous_index); + rc = IrregexpInterpreter::Match(byte_codes, + subject, + offsets_vector, + previous_index); + break; + } + case RegExpMacroAssembler::kARMImplementation: + default: + UNREACHABLE(); + rc = false; + break; } if (!rc) { return Factory::null_value(); } - FixedArray* array = last_match_info->elements(); - ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); + Handle array = Factory::NewFixedArray(2 * (num_captures+1)); // The captures come in (start, end+1) pairs. - for (int i = 0; i < number_of_capture_registers; i += 2) { - SetCapture(array, i, offsets_vector[i]); - SetCapture(array, i + 1, offsets_vector[i + 1]); + for (int i = 0; i < 2 * (num_captures + 1); i += 2) { + array->set(i, Smi::FromInt(offsets_vector[i])); + array->set(i + 1, Smi::FromInt(offsets_vector[i + 1])); } - SetLastCaptureCount(array, number_of_capture_registers); - SetLastSubject(array, *subject); - SetLastInput(array, *subject); - return last_match_info; + return Factory::NewJSArrayWithElements(array); } // ------------------------------------------------------------------- -// Implementation of the Irregexp regular expression engine. +// Implmentation of the Irregexp regular expression engine. // // The Irregexp regular expression engine is intended to be a complete // implementation of ECMAScript regular expressions. It generates either @@ -987,10 +892,10 @@ class RegExpCompiler { return next_register_++; } - RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, - RegExpNode* start, - int capture_count, - Handle pattern); + Handle Assemble(RegExpMacroAssembler* assembler, + RegExpNode* start, + int capture_count, + Handle pattern); inline void AddWork(RegExpNode* node) { work_list_->Add(node); } @@ -1035,8 +940,15 @@ class RecursionCheck { }; -static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { - return RegExpEngine::CompilationResult("RegExp too big"); +static Handle IrregexpRegExpTooBig(Handle pattern) { + Handle array = Factory::NewJSArray(2); + SetElement(array, 0, pattern); + const char* message = "RegExp too big"; + SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); + Handle regexp_err = + Factory::NewSyntaxError("malformed_regexp", array); + Top::Throw(*regexp_err); + return Handle(); } @@ -1054,7 +966,7 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) } -RegExpEngine::CompilationResult RegExpCompiler::Assemble( +Handle RegExpCompiler::Assemble( RegExpMacroAssembler* macro_assembler, RegExpNode* start, int capture_count, @@ -1076,17 +988,24 @@ RegExpEngine::CompilationResult RegExpCompiler::Assemble( while (!work_list.is_empty()) { work_list.RemoveLast()->Emit(this, &new_trace); } - if (reg_exp_too_big_) return IrregexpRegExpTooBig(); - + if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); + Handle array = + Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); + array->set(RegExpImpl::kIrregexpImplementationIndex, + Smi::FromInt(macro_assembler_->Implementation())); + array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, + Smi::FromInt(next_register_)); + array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, + Smi::FromInt(capture_count)); Handle code = macro_assembler_->GetCode(pattern); - + array->set(RegExpImpl::kIrregexpCodeIndex, *code); work_list_ = NULL; #ifdef DEBUG if (FLAG_trace_regexp_assembler) { delete macro_assembler_; } #endif - return RegExpEngine::CompilationResult(*code, next_register_); + return array; } @@ -3804,6 +3723,9 @@ RegExpNode* RegExpQuantifier::ToNode(int min, // | // [if r >= f] \----> ... // + // + // TODO(someone): clear captures on repetition and handle empty + // matches. // 15.10.2.5 RepeatMatcher algorithm. // The parser has already eliminated the case where max is 0. In the case @@ -4670,13 +4592,13 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) { } -RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, - bool ignore_case, - bool is_multiline, - Handle pattern, - bool is_ascii) { +Handle RegExpEngine::Compile(RegExpCompileData* data, + bool ignore_case, + bool is_multiline, + Handle pattern, + bool is_ascii) { if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { - return IrregexpRegExpTooBig(); + return IrregexpRegExpTooBig(pattern); } RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); // Wrap the body of the regexp in capture #0. diff --git a/src/jsregexp.h b/src/jsregexp.h index bf2e250..fbacff3 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -51,7 +51,6 @@ class RegExpImpl { // Parses the RegExp pattern and prepares the JSRegExp object with // generic data and choice of implementation - as well as what // the implementation wants to store in the data field. - // Returns false if compilation fails. static Handle Compile(Handle re, Handle pattern, Handle flags); @@ -60,45 +59,38 @@ class RegExpImpl { // This function calls the garbage collector if necessary. static Handle Exec(Handle regexp, Handle subject, - int index, - Handle lastMatchInfo); + Handle index); // Call RegExp.prototyp.exec(string) in a loop. // Used by String.prototype.match and String.prototype.replace. // This function calls the garbage collector if necessary. static Handle ExecGlobal(Handle regexp, - Handle subject, - Handle lastMatchInfo); + Handle subject); // Prepares a JSRegExp object with Irregexp-specific data. - static void IrregexpPrepare(Handle re, - Handle pattern, - JSRegExp::Flags flags, - int capture_register_count); + static Handle IrregexpPrepare(Handle re, + Handle pattern, + JSRegExp::Flags flags); - static void AtomCompile(Handle re, - Handle pattern, - JSRegExp::Flags flags, - Handle match_pattern); + static Handle AtomCompile(Handle re, + Handle pattern, + JSRegExp::Flags flags, + Handle match_pattern); static Handle AtomExec(Handle regexp, Handle subject, - int index, - Handle lastMatchInfo); + Handle index); static Handle AtomExecGlobal(Handle regexp, - Handle subject, - Handle lastMatchInfo); + Handle subject); // Execute an Irregexp bytecode pattern. static Handle IrregexpExec(Handle regexp, Handle subject, - int index, - Handle lastMatchInfo); + Handle index); static Handle IrregexpExecGlobal(Handle regexp, - Handle subject, - Handle lastMatchInfo); + Handle subject); static void NewSpaceCollectionPrologue(); static void OldSpaceCollectionPrologue(); @@ -109,49 +101,26 @@ class RegExpImpl { static Handle StringToTwoByte(Handle pattern); static Handle CachedStringToTwoByte(Handle pattern); - // Offsets in the lastMatchInfo array. - static const int kLastCaptureCount = 0; - static const int kLastSubject = 1; - static const int kLastInput = 2; - static const int kFirstCapture = 1; - static const int kLastMatchOverhead = 3; - static int GetCapture(FixedArray* array, int index) { - return Smi::cast(array->get(index + kFirstCapture))->value(); - } - static void SetLastCaptureCount(FixedArray* array, int to) { - array->set(kLastCaptureCount, Smi::FromInt(to)); - } - static void SetLastSubject(FixedArray* array, String* to) { - int capture_count = GetLastCaptureCount(array); - array->set(capture_count + kLastSubject, to); - } - static void SetLastInput(FixedArray* array, String* to) { - int capture_count = GetLastCaptureCount(array); - array->set(capture_count + kLastInput, to); - } - static void SetCapture(FixedArray* array, int index, int to) { - array->set(index + kFirstCapture, Smi::FromInt(to)); - } + static const int kIrregexpImplementationIndex = 0; + static const int kIrregexpNumberOfCapturesIndex = 1; + static const int kIrregexpNumberOfRegistersIndex = 2; + static const int kIrregexpCodeIndex = 3; + static const int kIrregexpDataLength = 4; private: static String* last_ascii_string_; static String* two_byte_cached_string_; - static bool EnsureCompiledIrregexp(Handle re, bool is_ascii); - - static int IrregexpMaxRegisterCount(FixedArray* re); - static void SetIrregexpMaxRegisterCount(FixedArray* re, int value); - static int IrregexpNumberOfCaptures(FixedArray* re); - static int IrregexpNumberOfRegisters(FixedArray* re); - static ByteArray* IrregexpByteCode(FixedArray* re, bool is_ascii); - static Code* IrregexpNativeCode(FixedArray* re, bool is_ascii); + static int IrregexpNumberOfCaptures(Handle re); + static int IrregexpNumberOfRegisters(Handle re); + static Handle IrregexpByteCode(Handle re); + static Handle IrregexpNativeCode(Handle re); // On a successful match, the result is a JSArray containing // captured positions. On a failure, the result is the null value. // Returns an empty handle in case of an exception. static Handle IrregexpExecOnce(Handle regexp, int num_captures, - Handle lastMatchInfo, Handle subject16, int previous_index, int* ovector, @@ -165,10 +134,6 @@ class RegExpImpl { int character_position, int utf8_position); - // Used to access the lastMatchInfo array. - static int GetLastCaptureCount(FixedArray* array) { - return Smi::cast(array->get(kLastCaptureCount))->value(); - } // A one element cache of the last utf8_subject string and its length. The // subject JS String object is cached in the heap. We also cache a // translation between position and utf8 position. @@ -1354,25 +1319,11 @@ struct RegExpCompileData { class RegExpEngine: public AllStatic { public: - struct CompilationResult { - explicit CompilationResult(const char* error_message) - : error_message(error_message), - code(Heap::the_hole_value()), - num_registers(0) {} - CompilationResult(Object* code, int registers) - : error_message(NULL), - code(code), - num_registers(registers) {} - const char* error_message; - Object* code; - int num_registers; - }; - - static CompilationResult Compile(RegExpCompileData* input, - bool ignore_case, - bool multiline, - Handle pattern, - bool is_ascii); + static Handle Compile(RegExpCompileData* input, + bool ignore_case, + bool multiline, + Handle pattern, + bool is_ascii); static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); }; diff --git a/src/macros.py b/src/macros.py index a3db5f9..b036c63 100644 --- a/src/macros.py +++ b/src/macros.py @@ -1,4 +1,4 @@ -# Copyright 2006-2009 the V8 project authors. All rights reserved. +# Copyright 2006-2008 the V8 project authors. All rights reserved. # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: @@ -99,22 +99,3 @@ python macro CHAR_CODE(str) = ord(str[1]); # Accessors for original global properties that ensure they have been loaded. const ORIGINAL_REGEXP = (global.RegExp, $RegExp); const ORIGINAL_DATE = (global.Date, $Date); - -# Constants used on an array to implement the properties of the RegExp object. -const REGEXP_NUMBER_OF_CAPTURES = 0; -const REGEXP_FIRST_CAPTURE = 1; - -# We can't put macros in macros so we use constants here. -# REGEXP_NUMBER_OF_CAPTURES -macro NUMBER_OF_CAPTURES(array) = ((array)[0]); - -# Last input and last subject are after the captures so we can omit them on -# results returned from global searches. Beware - these evaluate their -# arguments twice. -macro LAST_SUBJECT(array) = ((array)[(array)[0] + 1]); -macro LAST_INPUT(array) = ((array)[(array)[0] + 2]); - -# REGEXP_FIRST_CAPTURE -macro CAPTURE(index) = (1 + (index)); -const CAPTURE0 = 1; -const CAPTURE1 = 2; diff --git a/src/objects-debug.cc b/src/objects-debug.cc index 5b1e0b3..2161238 100644 --- a/src/objects-debug.cc +++ b/src/objects-debug.cc @@ -696,20 +696,9 @@ void JSRegExp::JSRegExpVerify() { break; } case JSRegExp::IRREGEXP: { - bool is_native = FLAG_regexp_native; -#ifdef ARM - // No native regexp on arm yet. - is_native = false; -#endif FixedArray* arr = FixedArray::cast(data()); - Object* ascii_data = arr->get(JSRegExp::kIrregexpASCIICodeIndex); - ASSERT(ascii_data->IsTheHole() - || (is_native ? ascii_data->IsCode() : ascii_data->IsByteArray())); - Object* uc16_data = arr->get(JSRegExp::kIrregexpUC16CodeIndex); - ASSERT(uc16_data->IsTheHole() - || (is_native ? uc16_data->IsCode() : uc16_data->IsByteArray())); - ASSERT(arr->get(JSRegExp::kIrregexpCaptureCountIndex)->IsSmi()); - ASSERT(arr->get(JSRegExp::kIrregexpMaxRegisterCountIndex)->IsSmi()); + Object* irregexp_data = arr->get(JSRegExp::kIrregexpDataIndex); + ASSERT(irregexp_data->IsFixedArray()); break; } default: diff --git a/src/objects-inl.h b/src/objects-inl.h index 18017c3..9705b75 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -2337,13 +2337,6 @@ Object* JSRegExp::DataAt(int index) { } -void JSRegExp::SetDataAt(int index, Object* value) { - ASSERT(TypeTag() != NOT_COMPILED); - ASSERT(index >= kDataIndex); // Only implementation data can be set this way. - FixedArray::cast(data())->set(index, value); -} - - bool JSObject::HasFastElements() { return !elements()->IsDictionary(); } diff --git a/src/objects.cc b/src/objects.cc index 5e6a2db..583dca2 100644 --- a/src/objects.cc +++ b/src/objects.cc @@ -4882,22 +4882,6 @@ Object* JSArray::Initialize(int capacity) { } -void JSArray::EnsureSize(int required_size) { - Handle self(this); - ASSERT(HasFastElements()); - if (elements()->length() >= required_size) return; - Handle old_backing(elements()); - int old_size = old_backing->length(); - // Doubling in size would be overkill, but leave some slack to avoid - // constantly growing. - int new_size = required_size + (required_size >> 3); - Handle new_backing = Factory::NewFixedArray(new_size); - // Can't use this any more now because we may have had a GC! - for (int i = 0; i < old_size; i++) new_backing->set(i, old_backing->get(i)); - self->SetContent(*new_backing); -} - - // Computes the new capacity when expanding the elements of a JSObject. static int NewElementsCapacity(int old_capacity) { // (old_capacity + 50%) + 16 diff --git a/src/objects.h b/src/objects.h index 4b2a5f2..2c966d6 100644 --- a/src/objects.h +++ b/src/objects.h @@ -2957,19 +2957,6 @@ class JSValue: public JSObject { }; // Regular expressions -// The regular expression holds a single reference to a FixedArray in -// the kDataOffset field. -// The FixedArray contains the following data: -// - tag : type of regexp implementation (not compiled yet, atom or irregexp) -// - reference to the original source string -// - reference to the original flag string -// If it is an atom regexp -// - a reference to a literal string to search for -// If it is an irregexp regexp: -// - a reference to code for ASCII inputs (bytecode or compiled). -// - a reference to code for UC16 inputs (bytecode or compiled). -// - max number of registers used by irregexp implementations. -// - number of capture registers (output values) of the regexp. class JSRegExp: public JSObject { public: // Meaning of Type: @@ -2997,8 +2984,6 @@ class JSRegExp: public JSObject { inline Flags GetFlags(); inline String* Pattern(); inline Object* DataAt(int index); - // Set implementation data after the object has been prepared. - inline void SetDataAt(int index, Object* value); static inline JSRegExp* cast(Object* obj); @@ -3010,29 +2995,14 @@ class JSRegExp: public JSObject { static const int kDataOffset = JSObject::kHeaderSize; static const int kSize = kDataOffset + kIntSize; - // Indices in the data array. static const int kTagIndex = 0; static const int kSourceIndex = kTagIndex + 1; static const int kFlagsIndex = kSourceIndex + 1; - static const int kDataIndex = kFlagsIndex + 1; - // The data fields are used in different ways depending on the - // value of the tag. - // Atom regexps (literal strings). - static const int kAtomPatternIndex = kDataIndex; - - static const int kAtomDataSize = kAtomPatternIndex + 1; - - // Irregexp compiled code or bytecode for ASCII. - static const int kIrregexpASCIICodeIndex = kDataIndex; - // Irregexp compiled code or bytecode for UC16. - static const int kIrregexpUC16CodeIndex = kDataIndex + 1; - // Maximal number of registers used by either ASCII or UC16. - // Only used to check that there is enough stack space - static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2; - // Number of captures in the compiled regexp. - static const int kIrregexpCaptureCountIndex = kDataIndex + 3; - - static const int kIrregexpDataSize = kIrregexpCaptureCountIndex + 1; + // These two are the same since the same entry is shared for + // different purposes in different types of regexps. + static const int kAtomPatternIndex = kFlagsIndex + 1; + static const int kIrregexpDataIndex = kFlagsIndex + 1; + static const int kDataSize = kAtomPatternIndex + 1; }; @@ -3827,10 +3797,6 @@ class JSArray: public JSObject { // Casting. static inline JSArray* cast(Object* obj); - // Uses handles. Ensures that the fixed array backing the JSArray has at - // least the stated size. - void EnsureSize(int minimum_size_of_backing_fixed_array); - // Dispatched behavior. #ifdef DEBUG void JSArrayPrint(); diff --git a/src/regexp-delay.js b/src/regexp-delay.js index 098e557..f1ded9d 100644 --- a/src/regexp-delay.js +++ b/src/regexp-delay.js @@ -1,4 +1,4 @@ -// Copyright 2006-2009 the V8 project authors. All rights reserved. +// Copyright 2006-2008 the V8 project authors. All rights reserved. // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions are // met: @@ -52,7 +52,7 @@ function DoConstructRegExp(object, pattern, flags, isConstructorCall) { var multiline = false; for (var i = 0; i < flags.length; i++) { - var c = StringCharAt.call(flags, i); + var c = flags.charAt(i); switch (c) { case 'g': // Allow duplicate flags to be consistent with JSC and others. @@ -117,15 +117,15 @@ function RegExpConstructor(pattern, flags) { // Deprecated RegExp.prototype.compile method. We behave like the constructor // were called again. In SpiderMonkey, this method returns the regexp object. -// In JSC, it returns undefined. For compatibility with JSC, we match their +// In KJS, it returns undefined. For compatibility with KJS, we match their // behavior. function CompileRegExp(pattern, flags) { - // Both JSC and SpiderMonkey treat a missing pattern argument as the + // Both KJS and SpiderMonkey treat a missing pattern argument as the // empty subject string, and an actual undefined value passed as the - // pattern as the string 'undefined'. Note that JSC is inconsistent + // patter as the string 'undefined'. Note that KJS is inconsistent // here, treating undefined values differently in // RegExp.prototype.compile and in the constructor, where they are - // the empty string. For compatibility with JSC, we match their + // the empty string. For compatibility with KJS, we match their // behavior. if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { DoConstructRegExp(this, 'undefined', flags, false); @@ -135,20 +135,32 @@ function CompileRegExp(pattern, flags) { } +// DoRegExpExec and DoRegExpExecGlobal are wrappers around the runtime +// %RegExp and %RegExpGlobal functions that ensure that the static +// properties of the RegExp constructor are set. function DoRegExpExec(regexp, string, index) { - return %RegExpExec(regexp, string, index, lastMatchInfo); + var matchIndices = %RegExpExec(regexp, string, index); + if (!IS_NULL(matchIndices)) { + regExpCaptures = matchIndices; + regExpSubject = regExpInput = string; + } + return matchIndices; } function DoRegExpExecGlobal(regexp, string) { - // Returns an array of arrays of substring indices. - return %RegExpExecGlobal(regexp, string, lastMatchInfo); + // Here, matchIndices is an array of arrays of substring indices. + var matchIndices = %RegExpExecGlobal(regexp, string); + if (matchIndices.length != 0) { + regExpCaptures = matchIndices[matchIndices.length - 1]; + regExpSubject = regExpInput = string; + } + return matchIndices; } function RegExpExec(string) { if (%_ArgumentsLength() == 0) { - var regExpInput = LAST_INPUT(lastMatchInfo); if (IS_UNDEFINED(regExpInput)) { throw MakeError('no_input_to_regexp', [this]); } @@ -165,21 +177,23 @@ function RegExpExec(string) { } %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); - // matchIndices is either null or the lastMatchInfo array. - var matchIndices = %RegExpExec(this, s, i, lastMatchInfo); + // matchIndices is an array of integers with length of captures*2, + // each pair of integers specified the start and the end of index + // in the string. + var matchIndices = DoRegExpExec(this, s, i); if (matchIndices == null) { if (this.global) this.lastIndex = 0; return matchIndices; // no match } - var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; + var numResults = matchIndices.length >> 1; var result = new $Array(numResults); for (var i = 0; i < numResults; i++) { - var matchStart = lastMatchInfo[CAPTURE(i << 1)]; - var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)]; + var matchStart = matchIndices[2*i]; + var matchEnd = matchIndices[2*i + 1]; if (matchStart != -1 && matchEnd != -1) { - result[i] = SubString(s, matchStart, matchEnd); + result[i] = s.slice(matchStart, matchEnd); } else { // Make sure the element is present. Avoid reading the undefined // property from the global object since this may change. @@ -188,46 +202,16 @@ function RegExpExec(string) { } if (this.global) - this.lastIndex = lastMatchInfo[CAPTURE1]; - result.index = lastMatchInfo[CAPTURE0]; + this.lastIndex = matchIndices[1]; + result.index = matchIndices[0]; result.input = s; return result; } -// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be -// that test is defined in terms of String.prototype.exec even if the method is -// called on a non-RegExp object. However, it probably means the original -// value of String.prototype.exec, which is what everybody else implements. function RegExpTest(string) { - if (%_ArgumentsLength() == 0) { - var regExpInput = LAST_INPUT(lastMatchInfo); - if (IS_UNDEFINED(regExpInput)) { - throw MakeError('no_input_to_regexp', [this]); - } - string = regExpInput; - } - var s = ToString(string); - var length = s.length; - var lastIndex = this.lastIndex; - var i = this.global ? TO_INTEGER(lastIndex) : 0; - - if (i < 0 || i > s.length) { - this.lastIndex = 0; - return false; - } - - %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); - // matchIndices is either null or the lastMatchInfo array. - var matchIndices = %RegExpExec(this, s, i, lastMatchInfo); - - if (matchIndices == null) { - if (this.global) this.lastIndex = 0; - return false; - } - - if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; - return true; + var result = (%_ArgumentsLength() == 0) ? this.exec() : this.exec(string); + return result != null; } @@ -252,69 +236,56 @@ function RegExpToString() { // on the captures array of the last successful match and the subject string // of the last successful match. function RegExpGetLastMatch() { - var regExpSubject = LAST_SUBJECT(lastMatchInfo); - return SubString(regExpSubject, - lastMatchInfo[CAPTURE0], - lastMatchInfo[CAPTURE1]); + return regExpSubject.slice(regExpCaptures[0], regExpCaptures[1]); } function RegExpGetLastParen() { - var length = NUMBER_OF_CAPTURES(lastMatchInfo); - if (length <= 2) return ''; // There were no captures. + var length = regExpCaptures.length; + if (length <= 2) return ''; // There were no captures. // We match the SpiderMonkey behavior: return the substring defined by the // last pair (after the first pair) of elements of the capture array even if // it is empty. - var regExpSubject = LAST_SUBJECT(lastMatchInfo); - return SubString(regExpSubject, - lastMatchInfo[CAPTURE(length - 2)], - lastMatchInfo[CAPTURE(length - 1)]); + return regExpSubject.slice(regExpCaptures[length - 2], + regExpCaptures[length - 1]); } function RegExpGetLeftContext() { - return SubString(LAST_SUBJECT(lastMatchInfo), - 0, - lastMatchInfo[CAPTURE0]); + return regExpSubject.slice(0, regExpCaptures[0]); } function RegExpGetRightContext() { - var subject = LAST_SUBJECT(lastMatchInfo); - return SubString(subject, - lastMatchInfo[CAPTURE1], - subject.length); + return regExpSubject.slice(regExpCaptures[1], regExpSubject.length); } // The properties $1..$9 are the first nine capturing substrings of the last // successful match, or ''. The function RegExpMakeCaptureGetter will be -// called with indeces from 1 to 9. +// called with an index greater than or equal to 1 but it actually works for +// any non-negative index. function RegExpMakeCaptureGetter(n) { return function() { var index = n * 2; - if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; - var matchStart = lastMatchInfo[CAPTURE(index)]; - var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; + if (index >= regExpCaptures.length) return ''; + var matchStart = regExpCaptures[index]; + var matchEnd = regExpCaptures[index + 1]; if (matchStart == -1 || matchEnd == -1) return ''; - return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); + return regExpSubject.slice(matchStart, matchEnd); }; } -// Property of the builtins object for recording the result of the last -// regexp match. The property lastMatchInfo includes the matchIndices -// array of the last successful regexp match (an array of start/end index -// pairs for the match and all the captured substrings), the invariant is -// that there are at least two capture indeces. The array also contains -// the subject string for the last successful match. -var lastMatchInfo = [ - 2, // REGEXP_NUMBER_OF_CAPTURES - 0, // REGEXP_FIRST_CAPTURE + 0 - 0, // REGEXP_FIRST_CAPTURE + 1 - "", // Last subject. - void 0, // Last input - settable with RegExpSetInput. -]; +// Properties of the builtins object for recording the result of the last +// regexp match. The property regExpCaptures is the matchIndices array of the +// last successful regexp match (an array of start/end index pairs for the +// match and all the captured substrings), the invariant is that there is at +// least two elements. The property regExpSubject is the subject string for +// the last successful match. +var regExpCaptures = [0, 0]; +var regExpSubject = ''; +var regExpInput; // ------------------------------------------------------------------- @@ -332,23 +303,19 @@ function SetupRegExp() { )); // The spec says nothing about the length of exec and test, but - // SpiderMonkey and JSC have length equal to 0. + // SpiderMonkey and KJS have length equal to 0. %FunctionSetLength($RegExp.prototype.exec, 0); %FunctionSetLength($RegExp.prototype.test, 0); // The length of compile is 1 in SpiderMonkey. %FunctionSetLength($RegExp.prototype.compile, 1); // The properties input, $input, and $_ are aliases for each other. When this - // value is set the value it is set to is coerced to a string. + // value is set the value it is set to is coerced to a string. // Getter and setter for the input. function RegExpGetInput() { - var regExpInput = LAST_INPUT(lastMatchInfo); return IS_UNDEFINED(regExpInput) ? "" : regExpInput; } - function RegExpSetInput(string) { - lastMatchInfo[lastMatchInfo[REGEXP_NUMBER_OF_CAPTURES] + 2] = - ToString(string); - }; + function RegExpSetInput(string) { regExpInput = ToString(string); } %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); diff --git a/src/runtime.cc b/src/runtime.cc index 06c7049..0a99141 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -858,21 +858,14 @@ static Object* Runtime_InitializeConstContextSlot(Arguments args) { static Object* Runtime_RegExpExec(Arguments args) { HandleScope scope; - ASSERT(args.length() == 4); + ASSERT(args.length() == 3); CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]); Handle regexp(raw_regexp); CONVERT_CHECKED(String, raw_subject, args[1]); Handle subject(raw_subject); - // Due to the way the JS files are constructed this must be less than the - // length of a string, i.e. it is always a Smi. We check anyway for security. - CONVERT_CHECKED(Smi, index, args[2]); - CONVERT_CHECKED(JSArray, raw_last_match_info, args[3]); - Handle last_match_info(raw_last_match_info); - CHECK(last_match_info->HasFastElements()); - Handle result = RegExpImpl::Exec(regexp, - subject, - index->value(), - last_match_info); + Handle index(args[2]); + ASSERT(index->IsNumber()); + Handle result = RegExpImpl::Exec(regexp, subject, index); if (result.is_null()) return Failure::Exception(); return *result; } @@ -880,16 +873,12 @@ static Object* Runtime_RegExpExec(Arguments args) { static Object* Runtime_RegExpExecGlobal(Arguments args) { HandleScope scope; - ASSERT(args.length() == 3); + ASSERT(args.length() == 2); CONVERT_CHECKED(JSRegExp, raw_regexp, args[0]); Handle regexp(raw_regexp); CONVERT_CHECKED(String, raw_subject, args[1]); Handle subject(raw_subject); - CONVERT_CHECKED(JSArray, raw_last_match_info, args[2]); - Handle last_match_info(raw_last_match_info); - CHECK(last_match_info->HasFastElements()); - Handle result = - RegExpImpl::ExecGlobal(regexp, subject, last_match_info); + Handle result = RegExpImpl::ExecGlobal(regexp, subject); if (result.is_null()) return Failure::Exception(); return *result; } diff --git a/src/runtime.h b/src/runtime.h index 5377e42..34aa4b9 100644 --- a/src/runtime.h +++ b/src/runtime.h @@ -137,8 +137,8 @@ namespace v8 { namespace internal { \ /* Regular expressions */ \ F(RegExpCompile, 3) \ - F(RegExpExec, 4) \ - F(RegExpExecGlobal, 3) \ + F(RegExpExec, 3) \ + F(RegExpExecGlobal, 2) \ \ /* Strings */ \ F(StringCharCodeAt, 2) \ diff --git a/src/string.js b/src/string.js index 57735f0..18d47ca 100644 --- a/src/string.js +++ b/src/string.js @@ -165,9 +165,8 @@ function StringMatch(regexp) { // Build the result array. var result = new $Array(match_string); for (var i = 0; i < matches.length; ++i) { - var matchInfo = matches[i]; - var match_string = subject.slice(matchInfo[CAPTURE0], - matchInfo[CAPTURE1]); + var match = matches[i]; + var match_string = subject.slice(match[0], match[1]); result[i] = match_string; } @@ -219,9 +218,7 @@ function StringReplace(search, replace) { if (IS_FUNCTION(replace)) { builder.add(replace.call(null, search, start, subject)); } else { - reusableMatchInfo[CAPTURE0] = start; - reusableMatchInfo[CAPTURE1] = end; - ExpandReplacement(ToString(replace), subject, reusableMatchInfo, builder); + ExpandReplacement(ToString(replace), subject, [ start, end ], builder); } // suffix @@ -231,15 +228,6 @@ function StringReplace(search, replace) { } -// This has the same size as the lastMatchInfo array, and can be used for -// functions that expect that structure to be returned. It is used when the -// needle is a string rather than a regexp. In this case we can't update -// lastMatchArray without erroneously affecting the properties on the global -// RegExp object. -var reusableMatchInfo = [2, -1, -1, "", ""]; -var reusableMatchArray = [ void 0 ]; - - // Helper function for regular expressions in String.prototype.replace. function StringReplaceRegExp(subject, regexp, replace) { // Compute an array of matches; each match is really a list of @@ -249,10 +237,9 @@ function StringReplaceRegExp(subject, regexp, replace) { matches = DoRegExpExecGlobal(regexp, subject); if (matches.length == 0) return subject; } else { - var lastMatchInfo = DoRegExpExec(regexp, subject, 0); - if (IS_NULL(lastMatchInfo)) return subject; - reusableMatchArray[0] = lastMatchInfo; - matches = reusableMatchArray; + var captures = DoRegExpExec(regexp, subject, 0); + if (IS_NULL(captures)) return subject; + matches = [ captures ]; } // Determine the number of matches. @@ -266,17 +253,17 @@ function StringReplaceRegExp(subject, regexp, replace) { replace = ToString(replace); if (%StringIndexOf(replace, "$", 0) < 0) { for (var i = 0; i < length; i++) { - var matchInfo = matches[i]; - result.addSpecialSlice(previous, matchInfo[CAPTURE0]); + var captures = matches[i]; + result.addSpecialSlice(previous, captures[0]); result.add(replace); - previous = matchInfo[CAPTURE1]; // continue after match + previous = captures[1]; // continue after match } } else { for (var i = 0; i < length; i++) { - var matchInfo = matches[i]; - result.addSpecialSlice(previous, matchInfo[CAPTURE0]); - ExpandReplacement(replace, subject, matchInfo, result); - previous = matchInfo[CAPTURE1]; // continue after match + var captures = matches[i]; + result.addSpecialSlice(previous, captures[0]); + ExpandReplacement(replace, subject, captures, result); + previous = captures[1]; // continue after match } } result.addSpecialSlice(previous, subject.length); @@ -286,7 +273,7 @@ function StringReplaceRegExp(subject, regexp, replace) { // Expand the $-expressions in the string and return a new string with // the result. -function ExpandReplacement(string, subject, matchInfo, builder) { +function ExpandReplacement(string, subject, captures, builder) { var next = %StringIndexOf(string, '$', 0); if (next < 0) { builder.add(string); @@ -294,12 +281,11 @@ function ExpandReplacement(string, subject, matchInfo, builder) { } // Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102. - var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match. + var m = captures.length >> 1; // includes the match if (next > 0) builder.add(SubString(string, 0, next)); var length = string.length; - while (true) { var expansion = '$'; var position = next + 1; @@ -313,14 +299,13 @@ function ExpandReplacement(string, subject, matchInfo, builder) { builder.add('$'); } else if (peek == 38) { // $& - match ++position; - builder.addSpecialSlice(matchInfo[CAPTURE0], - matchInfo[CAPTURE1]); + builder.addSpecialSlice(captures[0], captures[1]); } else if (peek == 96) { // $` - prefix ++position; - builder.addSpecialSlice(0, matchInfo[CAPTURE0]); + builder.addSpecialSlice(0, captures[0]); } else if (peek == 39) { // $' - suffix ++position; - builder.addSpecialSlice(matchInfo[CAPTURE1], subject.length); + builder.addSpecialSlice(captures[1], subject.length); } else if (peek >= 48 && peek <= 57) { // $n, 0 <= n <= 9 ++position; var n = peek - 48; @@ -344,7 +329,7 @@ function ExpandReplacement(string, subject, matchInfo, builder) { } } if (0 < n && n < m) { - addCaptureString(builder, matchInfo, n); + addCaptureString(builder, captures, n); } else { // Because of the captures range check in the parsing of two // digit capture references, we can only enter here when a @@ -376,27 +361,26 @@ function ExpandReplacement(string, subject, matchInfo, builder) { }; -// Compute the string of a given regular expression capture. -function CaptureString(string, lastCaptureInfo, index) { +// Compute the string of a given PCRE capture. +function CaptureString(string, captures, index) { // Scale the index. var scaled = index << 1; // Compute start and end. - var start = lastCaptureInfo[CAPTURE(scaled)]; - var end = lastCaptureInfo[CAPTURE(scaled + 1)]; + var start = captures[scaled]; + var end = captures[scaled + 1]; // If either start or end is missing return undefined. if (start < 0 || end < 0) return; return SubString(string, start, end); }; -// Add the string of a given regular expression capture to the -// ReplaceResultBuilder -function addCaptureString(builder, matchInfo, index) { +// Add the string of a given PCRE capture to the ReplaceResultBuilder +function addCaptureString(builder, captures, index) { // Scale the index. var scaled = index << 1; // Compute start and end. - var start = matchInfo[CAPTURE(scaled)]; - var end = matchInfo[CAPTURE(scaled + 1)]; + var start = captures[scaled]; + var end = captures[scaled + 1]; // If either start or end is missing return. if (start < 0 || end <= start) return; builder.addSpecialSlice(start, end); @@ -412,8 +396,10 @@ function addCaptureString(builder, matchInfo, index) { // should be 'abcd' and not 'dddd' (or anything else). function StringReplaceRegExpWithFunction(subject, regexp, replace) { var result = new ReplaceResultBuilder(subject); - var lastMatchInfo = DoRegExpExec(regexp, subject, 0); - if (IS_NULL(lastMatchInfo)) return subject; + // Captures is an array of pairs of (start, end) indices for the match and + // any captured substrings. + var captures = DoRegExpExec(regexp, subject, 0); + if (IS_NULL(captures)) return subject; // There's at least one match. If the regexp is global, we have to loop // over all matches. The loop is not in C++ code here like the one in @@ -423,16 +409,13 @@ function StringReplaceRegExpWithFunction(subject, regexp, replace) { if (regexp.global) { var previous = 0; do { - result.addSpecialSlice(previous, lastMatchInfo[CAPTURE0]); - var startOfMatch = lastMatchInfo[CAPTURE0]; - previous = lastMatchInfo[CAPTURE1]; - result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject)); - // Can't use lastMatchInfo any more from here, since the function could - // overwrite it. + result.addSpecialSlice(previous, captures[0]); + result.add(ApplyReplacementFunction(replace, captures, subject)); // Continue with the next match. + previous = captures[1]; // Increment previous if we matched an empty string, as per ECMA-262 // 15.5.4.10. - if (previous == startOfMatch) { + if (previous == captures[0]) { // Add the skipped character to the output, if any. if (previous < subject.length) { result.addSpecialSlice(previous, previous + 1); @@ -442,22 +425,19 @@ function StringReplaceRegExpWithFunction(subject, regexp, replace) { // Per ECMA-262 15.10.6.2, if the previous index is greater than the // string length, there is no match - lastMatchInfo = (previous > subject.length) + captures = (previous > subject.length) ? null : DoRegExpExec(regexp, subject, previous); - } while (!IS_NULL(lastMatchInfo)); + } while (!IS_NULL(captures)); // Tack on the final right substring after the last match, if necessary. if (previous < subject.length) { result.addSpecialSlice(previous, subject.length); } } else { // Not a global regexp, no need to loop. - result.addSpecialSlice(0, lastMatchInfo[CAPTURE0]); - var endOfMatch = lastMatchInfo[CAPTURE1]; - result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject)); - // Can't use lastMatchInfo any more from here, since the function could - // overwrite it. - result.addSpecialSlice(endOfMatch, subject.length); + result.addSpecialSlice(0, captures[0]); + result.add(ApplyReplacementFunction(replace, captures, subject)); + result.addSpecialSlice(captures[1], subject.length); } return result.generate(); @@ -465,20 +445,20 @@ function StringReplaceRegExpWithFunction(subject, regexp, replace) { // Helper function to apply a string replacement function once. -function ApplyReplacementFunction(replace, lastMatchInfo, subject) { +function ApplyReplacementFunction(replace, captures, subject) { // Compute the parameter list consisting of the match, captures, index, // and subject for the replace function invocation. - var index = lastMatchInfo[CAPTURE0]; + var index = captures[0]; // The number of captures plus one for the match. - var m = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; + var m = captures.length >> 1; if (m == 1) { - var s = CaptureString(subject, lastMatchInfo, 0); + var s = CaptureString(subject, captures, 0); // Don't call directly to avoid exposing the built-in global object. return ToString(replace.call(null, s, index, subject)); } var parameters = $Array(m + 2); for (var j = 0; j < m; j++) { - parameters[j] = CaptureString(subject, lastMatchInfo, j); + parameters[j] = CaptureString(subject, captures, j); } parameters[j] = index; parameters[j + 1] = subject; @@ -579,14 +559,14 @@ function StringSplit(separator, limit) { return result; } - var lastMatchInfo = splitMatch(sep, subject, currentIndex, startIndex); + var match = splitMatch(sep, subject, currentIndex, startIndex); - if (IS_NULL(lastMatchInfo)) { + if (IS_NULL(match)) { result[result.length] = subject.slice(currentIndex, length); return result; } - var endIndex = lastMatchInfo[CAPTURE1]; + var endIndex = match[0]; // We ignore a zero-length match at the currentIndex. if (startIndex === endIndex && endIndex === currentIndex) { @@ -594,20 +574,11 @@ function StringSplit(separator, limit) { continue; } - result[result.length] = - SubString(subject, currentIndex, lastMatchInfo[CAPTURE0]); + result[result.length] = match[1]; if (result.length === lim) return result; - for (var i = 2; i < NUMBER_OF_CAPTURES(lastMatchInfo); i += 2) { - var start = lastMatchInfo[CAPTURE(i)]; - var end = lastMatchInfo[CAPTURE(i + 1)]; - if (start != -1 && end != -1) { - result[result.length] = SubString(subject, - lastMatchInfo[CAPTURE(i)], - lastMatchInfo[CAPTURE(i + 1)]); - } else { - result[result.length] = void 0; - } + for (var i = 2; i < match.length; i++) { + result[result.length] = match[i]; if (result.length === lim) return result; } @@ -617,24 +588,32 @@ function StringSplit(separator, limit) { // ECMA-262 section 15.5.4.14 -// Helper function used by split. This version returns the lastMatchInfo -// instead of allocating a new array with basically the same information. +// Helper function used by split. function splitMatch(separator, subject, current_index, start_index) { if (IS_REGEXP(separator)) { - var lastMatchInfo = DoRegExpExec(separator, subject, start_index); - if (lastMatchInfo == null) return null; + var ovector = DoRegExpExec(separator, subject, start_index); + if (ovector == null) return null; + var nof_results = ovector.length >> 1; + var result = new $Array(nof_results + 1); // Section 15.5.4.14 paragraph two says that we do not allow zero length // matches at the end of the string. - if (lastMatchInfo[CAPTURE0] === subject.length) return null; - return lastMatchInfo; + if (ovector[0] === subject.length) return null; + result[0] = ovector[1]; + result[1] = subject.slice(current_index, ovector[0]); + for (var i = 1; i < nof_results; i++) { + var matching_start = ovector[2*i]; + var matching_end = ovector[2*i + 1]; + if (matching_start != -1 && matching_end != -1) { + result[i + 1] = subject.slice(matching_start, matching_end); + } + } + return result; } var separatorIndex = subject.indexOf(separator, start_index); if (separatorIndex === -1) return null; - reusableMatchInfo[CAPTURE0] = separatorIndex; - reusableMatchInfo[CAPTURE1] = separatorIndex + separator.length; - return reusableMatchInfo; + return [ separatorIndex + separator.length, subject.slice(current_index, separatorIndex) ]; }; diff --git a/test/mjsunit/regexp-static.js b/test/mjsunit/regexp-static.js index 49fe05d..5db9fe2 100644 --- a/test/mjsunit/regexp-static.js +++ b/test/mjsunit/regexp-static.js @@ -132,8 +132,3 @@ for (var i = 4; i < 10; ++i) { re = /(.)/g; function f() { return RegExp.$1; }; assertEquals('abcd', 'abcd'.replace(re, f)); - -RegExp.multiline = "foo"; -assertTrue(typeof RegExp.multiline == typeof Boolean(), "RegExp.multiline coerces values to booleans"); -RegExp.input = Number(); -assertTrue(typeof RegExp.input == typeof String(), "RegExp.input coerces values to booleans"); diff --git a/test/mjsunit/regexp-string-methods.js b/test/mjsunit/regexp-string-methods.js deleted file mode 100644 index ef3bf6e..0000000 --- a/test/mjsunit/regexp-string-methods.js +++ /dev/null @@ -1,51 +0,0 @@ -// Copyright 2009 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// Regexp shouldn't use String.prototype.slice() -var s = new String("foo"); -assertEquals("f", s.slice(0,1)); -String.prototype.slice = function() { return "x"; } -assertEquals("x", s.slice(0,1)); -assertEquals("g", /g/.exec("gg")); - -// Regexp shouldn't use String.prototype.charAt() -var f1 = new RegExp("f", "i"); -assertEquals("F", f1.exec("F")); -assertEquals("f", "foo".charAt(0)); -String.prototype.charAt = function(idx) { return 'g'; }; -assertEquals("g", "foo".charAt(0)); -var f2 = new RegExp("[g]", "i"); -assertEquals("G", f2.exec("G")); -assertTrue(f2.ignoreCase); - -// On the other hand test is defined in a semi-coherent way as a call to exec. -// 15.10.6.3 -// We match other browsers in using the original value of RegExp.prototype.exec. -// I.e., RegExp.prototype.test shouldn't use the current value of -// RegExp.prototype.exec. -RegExp.prototype.exec = function(string) { return 'x'; } -assertFalse(/f/.test('x')); diff --git a/tools/js2c.py b/tools/js2c.py index 52fe35c..0ae1ad9 100755 --- a/tools/js2c.py +++ b/tools/js2c.py @@ -104,7 +104,7 @@ def ExpandConstants(lines, constants): def ExpandMacros(lines, macros): for name, macro in macros.items(): - start = lines.find(name + '(', 0) + start = lines.find(name, 0) while start != -1: # Scan over the arguments assert lines[start + len(name)] == '(' @@ -132,7 +132,7 @@ def ExpandMacros(lines, macros): result = macro.expand(mapping) # Replace the occurrence of the macro with the expansion lines = lines[:start] + result + lines[end:] - start = lines.find(name + '(', end) + start = lines.find(name, end) return lines class TextMacro: -- 2.7.4