From 50e042dfcdaf148ebe0b7f242c8790252f7d8bcd Mon Sep 17 00:00:00 2001 From: "lrn@chromium.org" Date: Mon, 2 Mar 2009 13:58:37 +0000 Subject: [PATCH] All RegExp data are set on a single FixedArray instead of nesting them three deep. git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1398 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/factory.cc | 32 +++- src/factory.h | 20 ++- src/jsregexp.cc | 462 +++++++++++++++++++++++++++------------------------ src/jsregexp.h | 55 +++--- src/objects-debug.cc | 14 +- src/objects-inl.h | 7 + src/objects.h | 40 ++++- 7 files changed, 373 insertions(+), 257 deletions(-) diff --git a/src/factory.cc b/src/factory.cc index ec52520..2cf411e 100644 --- a/src/factory.cc +++ b/src/factory.cc @@ -826,12 +826,13 @@ Handle Factory::ObjectLiteralMapFromCache(Handle context, } -void Factory::SetRegExpData(Handle regexp, - JSRegExp::Type type, - Handle source, - JSRegExp::Flags flags, - Handle data) { - Handle store = NewFixedArray(JSRegExp::kDataSize); +void Factory::SetRegExpAtomData(Handle regexp, + JSRegExp::Type type, + Handle source, + JSRegExp::Flags flags, + Handle data) { + Handle store = NewFixedArray(JSRegExp::kAtomDataSize); + store->set(JSRegExp::kTagIndex, Smi::FromInt(type)); store->set(JSRegExp::kSourceIndex, *source); store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value())); @@ -839,6 +840,25 @@ void Factory::SetRegExpData(Handle regexp, regexp->set_data(*store); } +void Factory::SetRegExpIrregexpData(Handle regexp, + JSRegExp::Type type, + Handle source, + JSRegExp::Flags flags, + int capture_count) { + Handle store = NewFixedArray(JSRegExp::kIrregexpDataSize); + + store->set(JSRegExp::kTagIndex, Smi::FromInt(type)); + store->set(JSRegExp::kSourceIndex, *source); + store->set(JSRegExp::kFlagsIndex, Smi::FromInt(flags.value())); + store->set(JSRegExp::kIrregexpASCIICodeIndex, Heap::the_hole_value()); + store->set(JSRegExp::kIrregexpUC16CodeIndex, Heap::the_hole_value()); + store->set(JSRegExp::kIrregexpMaxRegisterCountIndex, Smi::FromInt(0)); + store->set(JSRegExp::kIrregexpCaptureCountIndex, + Smi::FromInt(capture_count)); + regexp->set_data(*store); +} + + void Factory::ConfigureInstance(Handle desc, Handle instance, diff --git a/src/factory.h b/src/factory.h index f282896..143235c 100644 --- a/src/factory.h +++ b/src/factory.h @@ -316,12 +316,20 @@ class Factory : public AllStatic { Handle keys); // Creates a new FixedArray that holds the data associated with the - // regexp and stores it in the regexp. - static void SetRegExpData(Handle regexp, - JSRegExp::Type type, - Handle source, - JSRegExp::Flags flags, - Handle data); + // atom regexp and stores it in the regexp. + static void SetRegExpAtomData(Handle regexp, + JSRegExp::Type type, + Handle source, + JSRegExp::Flags flags, + Handle match_pattern); + + // Creates a new FixedArray that holds the data associated with the + // irregexp regexp and stores it in the regexp. + static void SetRegExpIrregexpData(Handle regexp, + JSRegExp::Type type, + Handle source, + JSRegExp::Flags flags, + int capture_count); private: static Handle NewFunctionHelper(Handle name, diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 12572d3..9658a21 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -213,44 +213,41 @@ Handle RegExpImpl::Compile(Handle re, Handle result; if (in_cache) { re->set_data(*cached); - result = re; - } else { - FlattenString(pattern); - ZoneScope zone_scope(DELETE_ON_EXIT); - RegExpCompileData parse_result; - FlatStringReader reader(pattern); - if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { - // Throw an exception if we fail to parse the pattern. - ThrowRegExpException(re, - pattern, - parse_result.error, - "malformed_regexp"); - return Handle::null(); - } + return re; + } + FlattenString(pattern); + ZoneScope zone_scope(DELETE_ON_EXIT); + RegExpCompileData parse_result; + FlatStringReader reader(pattern); + if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) { + // Throw an exception if we fail to parse the pattern. + ThrowRegExpException(re, + pattern, + parse_result.error, + "malformed_regexp"); + return Handle::null(); + } - if (parse_result.simple && !flags.is_ignore_case()) { - // Parse-tree is a single atom that is equal to the pattern. - result = AtomCompile(re, pattern, flags, pattern); - } else if (parse_result.tree->IsAtom() && - !flags.is_ignore_case() && - parse_result.capture_count == 0) { - RegExpAtom* atom = parse_result.tree->AsAtom(); - Vector atom_pattern = atom->data(); - Handle atom_string = Factory::NewStringFromTwoByte(atom_pattern); - result = AtomCompile(re, pattern, flags, atom_string); - } else { - result = IrregexpPrepare(re, pattern, flags); - } - Object* data = re->data(); - if (data->IsFixedArray()) { - // If compilation succeeded then the data is set on the regexp - // and we can store it in the cache. - Handle data(FixedArray::cast(re->data())); - CompilationCache::PutRegExp(pattern, flags, data); - } + if (parse_result.simple && !flags.is_ignore_case()) { + // Parse-tree is a single atom that is equal to the pattern. + AtomCompile(re, pattern, flags, pattern); + } else if (parse_result.tree->IsAtom() && + !flags.is_ignore_case() && + parse_result.capture_count == 0) { + RegExpAtom* atom = parse_result.tree->AsAtom(); + Vector atom_pattern = atom->data(); + Handle atom_string = Factory::NewStringFromTwoByte(atom_pattern); + AtomCompile(re, pattern, flags, atom_string); + } else { + IrregexpPrepare(re, pattern, flags, parse_result.capture_count); } + ASSERT(re->data()->IsFixedArray()); + // Compilation succeeded so the data is set on the regexp + // and we can store it in the cache. + Handle data(FixedArray::cast(re->data())); + CompilationCache::PutRegExp(pattern, flags, data); - return result; + return re; } @@ -275,8 +272,8 @@ Handle RegExpImpl::Exec(Handle regexp, Handle RegExpImpl::ExecGlobal(Handle regexp, - Handle subject, - Handle last_match_info) { + Handle subject, + Handle last_match_info) { switch (regexp->TypeTag()) { case JSRegExp::ATOM: return AtomExecGlobal(regexp, subject, last_match_info); @@ -296,12 +293,15 @@ Handle RegExpImpl::ExecGlobal(Handle regexp, // RegExp Atom implementation: Simple string search using indexOf. -Handle RegExpImpl::AtomCompile(Handle re, - Handle pattern, - JSRegExp::Flags flags, - Handle match_pattern) { - Factory::SetRegExpData(re, JSRegExp::ATOM, pattern, flags, match_pattern); - return re; +void RegExpImpl::AtomCompile(Handle re, + Handle pattern, + JSRegExp::Flags flags, + Handle match_pattern) { + Factory::SetRegExpAtomData(re, + JSRegExp::ATOM, + pattern, + flags, + match_pattern); } @@ -386,23 +386,29 @@ Handle RegExpImpl::AtomExecGlobal(Handle re, // Irregexp implementation. -// Retrieves a compiled version of the regexp for either ASCII or non-ASCII -// strings. If the compiled version doesn't already exist, it is compiled +// Ensures that the regexp object contains a compiled version of the +// source for either ASCII or non-ASCII strings. +// If the compiled version doesn't already exist, it is compiled // from the source pattern. -// Irregexp is not feature complete yet. If there is something in the -// regexp that the compiler cannot currently handle, an empty -// handle is returned, but no exception is thrown. -static Handle GetCompiledIrregexp(Handle re, - bool is_ascii) { - ASSERT(re->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); - Handle alternatives( - FixedArray::cast(re->DataAt(JSRegExp::kIrregexpDataIndex))); - ASSERT_EQ(2, alternatives->length()); - - int index = is_ascii ? 0 : 1; - Object* entry = alternatives->get(index); - if (!entry->IsNull()) { - return Handle(FixedArray::cast(entry)); +// If compilation fails, an exception is thrown and this function +// returns false. +bool RegExpImpl::EnsureCompiledIrregexp(Handle re, + bool is_ascii) { + int index; + if (is_ascii) { + index = JSRegExp::kIrregexpASCIICodeIndex; + } else { + index = JSRegExp::kIrregexpUC16CodeIndex; + } + Object* entry = re->DataAt(index); + if (!entry->IsTheHole()) { + // A value has already been compiled. + if (entry->IsJSObject()) { + // If it's a JS value, it's an error. + Top::Throw(entry); + return false; + } + return true; } // Compile the RegExp. @@ -424,54 +430,101 @@ static Handle GetCompiledIrregexp(Handle re, pattern, compile_data.error, "malformed_regexp"); - return Handle::null(); + return false; } - Handle compiled_entry = + RegExpEngine::CompilationResult result = RegExpEngine::Compile(&compile_data, flags.is_ignore_case(), flags.is_multiline(), pattern, is_ascii); - if (!compiled_entry.is_null()) { - alternatives->set(index, *compiled_entry); + if (result.error_message != NULL) { + // Unable to compile regexp. + Handle array = Factory::NewJSArray(2); + SetElement(array, 0, pattern); + SetElement(array, + 1, + Factory::NewStringFromUtf8(CStrVector(result.error_message))); + Handle regexp_err = + Factory::NewSyntaxError("malformed_regexp", array); + Top::Throw(*regexp_err); + re->SetDataAt(index, *regexp_err); + return false; + } + + Handle data(FixedArray::cast(re->data())); + data->set(index, result.code); + int register_max = IrregexpMaxRegisterCount(data); + if (result.num_registers > register_max) { + SetIrregexpMaxRegisterCount(data, result.num_registers); } - return compiled_entry; + + return true; } -int RegExpImpl::IrregexpNumberOfCaptures(Handle irre) { - return Smi::cast(irre->get(kIrregexpNumberOfCapturesIndex))->value(); +int RegExpImpl::IrregexpMaxRegisterCount(Handle re) { + return Smi::cast( + re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); } -int RegExpImpl::IrregexpNumberOfRegisters(Handle irre) { - return Smi::cast(irre->get(kIrregexpNumberOfRegistersIndex))->value(); +void RegExpImpl::SetIrregexpMaxRegisterCount(Handle re, int value) { + re->set(JSRegExp::kIrregexpMaxRegisterCountIndex, + Smi::FromInt(value)); } -Handle RegExpImpl::IrregexpByteCode(Handle irre) { - ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() - == RegExpMacroAssembler::kBytecodeImplementation); - return Handle(ByteArray::cast(irre->get(kIrregexpCodeIndex))); +int RegExpImpl::IrregexpNumberOfCaptures(Handle re) { + return Smi::cast( + re->get(JSRegExp::kIrregexpCaptureCountIndex))->value(); } -Handle RegExpImpl::IrregexpNativeCode(Handle irre) { - ASSERT(Smi::cast(irre->get(kIrregexpImplementationIndex))->value() - != RegExpMacroAssembler::kBytecodeImplementation); - return Handle(Code::cast(irre->get(kIrregexpCodeIndex))); +int RegExpImpl::IrregexpNumberOfRegisters(Handle re) { + return Smi::cast( + re->get(JSRegExp::kIrregexpMaxRegisterCountIndex))->value(); } -HandleRegExpImpl::IrregexpPrepare(Handle re, - Handle pattern, - JSRegExp::Flags flags) { - // Make space for ASCII and UC16 versions. - Handle alternatives = Factory::NewFixedArray(2); - alternatives->set_null(0); - alternatives->set_null(1); - Factory::SetRegExpData(re, JSRegExp::IRREGEXP, pattern, flags, alternatives); - return re; +Handle RegExpImpl::IrregexpByteCode(Handle re, + bool is_ascii) { + int index; + if (is_ascii) { + index = JSRegExp::kIrregexpASCIICodeIndex; + } else { + index = JSRegExp::kIrregexpUC16CodeIndex; + } + Object* value = re->get(index); + ASSERT(value->IsByteArray()); + return Handle(ByteArray::cast(value)); +} + + +Handle RegExpImpl::IrregexpNativeCode(Handle re, + bool is_ascii) { + int index; + if (is_ascii) { + index = JSRegExp::kIrregexpASCIICodeIndex; + } else { + index = JSRegExp::kIrregexpUC16CodeIndex; + } + Object* value = re->get(index); + ASSERT(value->IsCode()); + return Handle(Code::cast(value)); +} + + +void RegExpImpl::IrregexpPrepare(Handle re, + Handle pattern, + JSRegExp::Flags flags, + int capture_count) { + // Initialize compiled code entries to null. + Factory::SetRegExpIrregexpData(re, + JSRegExp::IRREGEXP, + pattern, + flags, + capture_count); } @@ -480,18 +533,16 @@ Handle RegExpImpl::IrregexpExec(Handle regexp, int index, Handle last_match_info) { ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); - ASSERT(regexp->DataAt(JSRegExp::kIrregexpDataIndex)->IsFixedArray()); bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); - Handle irregexp = GetCompiledIrregexp(regexp, is_ascii); - if (irregexp.is_null()) { - // We can't handle the RegExp with IRRegExp. + if (!EnsureCompiledIrregexp(regexp, is_ascii)) { return Handle::null(); } // Prepare space for the return values. + Handle re_data(FixedArray::cast(regexp->data())); int number_of_capture_registers = - (IrregexpNumberOfCaptures(irregexp) + 1) * 2; + (IrregexpNumberOfCaptures(re_data) + 1) * 2; OffsetsVector offsets(number_of_capture_registers); int previous_index = index; @@ -510,7 +561,7 @@ Handle RegExpImpl::IrregexpExec(Handle regexp, last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead); - return IrregexpExecOnce(irregexp, + return IrregexpExecOnce(re_data, number_of_capture_registers, last_match_info, subject, @@ -524,10 +575,10 @@ Handle RegExpImpl::IrregexpExecGlobal(Handle regexp, Handle subject, Handle last_match_info) { ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP); + Handle irregexp(FixedArray::cast(regexp->data())); bool is_ascii = StringShape(*subject).IsAsciiRepresentation(); - Handle irregexp = GetCompiledIrregexp(regexp, is_ascii); - if (irregexp.is_null()) { + if (!EnsureCompiledIrregexp(regexp, is_ascii)) { return Handle::null(); } @@ -605,135 +656,124 @@ Handle RegExpImpl::IrregexpExecGlobal(Handle regexp, } -Handle RegExpImpl::IrregexpExecOnce(Handle irregexp, +Handle RegExpImpl::IrregexpExecOnce(Handle regexp, int number_of_capture_registers, Handle last_match_info, Handle subject, int previous_index, int* offsets_vector, int offsets_vector_length) { - ASSERT(subject->IsFlat(StringShape(*subject))); + StringShape shape(*subject); + ASSERT(subject->IsFlat(shape)); + bool is_ascii = shape.IsAsciiRepresentation(); bool rc; - int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value(); - - switch (tag) { - case RegExpMacroAssembler::kIA32Implementation: { + if (FLAG_regexp_native) { #ifndef ARM - Handle code = IrregexpNativeCode(irregexp); + Handle code(IrregexpNativeCode(regexp, is_ascii)); + + // Character offsets into string. + int start_offset = previous_index; + int end_offset = subject->length(shape); + + if (shape.IsCons()) { + subject = Handle(ConsString::cast(*subject)->first()); + } else if (shape.IsSliced()) { + SlicedString* slice = SlicedString::cast(*subject); + start_offset += slice->start(); + end_offset += slice->start(); + subject = Handle(slice->buffer()); + } - StringShape shape(*subject); + // String is now either Sequential or External + StringShape flatshape(*subject); + bool is_ascii = flatshape.IsAsciiRepresentation(); + int char_size_shift = is_ascii ? 0 : 1; - // Character offsets into string. - int start_offset = previous_index; - int end_offset = subject->length(shape); + RegExpMacroAssemblerIA32::Result res; - if (shape.IsCons()) { - subject = Handle(ConsString::cast(*subject)->first()); - } else if (shape.IsSliced()) { - SlicedString* slice = SlicedString::cast(*subject); - start_offset += slice->start(); - end_offset += slice->start(); - subject = Handle(slice->buffer()); + if (flatshape.IsExternal()) { + const byte* address; + if (is_ascii) { + ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); + address = reinterpret_cast(ext->resource()->data()); + } else { + ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); + address = reinterpret_cast(ext->resource()->data()); } + res = RegExpMacroAssemblerIA32::Execute( + *code, + const_cast(&address), + start_offset << char_size_shift, + end_offset << char_size_shift, + offsets_vector, + previous_index == 0); + } else { // Sequential string + ASSERT(StringShape(*subject).IsSequential()); + Address char_address = + is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() + : SeqTwoByteString::cast(*subject)->GetCharsAddress(); + int byte_offset = char_address - reinterpret_cast
(*subject); + res = RegExpMacroAssemblerIA32::Execute( + *code, + reinterpret_cast(subject.location()), + byte_offset + (start_offset << char_size_shift), + byte_offset + (end_offset << char_size_shift), + offsets_vector, + previous_index == 0); + } - // String is now either Sequential or External - StringShape flatshape(*subject); - bool is_ascii = flatshape.IsAsciiRepresentation(); - int char_size_shift = is_ascii ? 0 : 1; - - RegExpMacroAssemblerIA32::Result res; + if (res == RegExpMacroAssemblerIA32::EXCEPTION) { + ASSERT(Top::has_pending_exception()); + return Handle::null(); + } + rc = (res == RegExpMacroAssemblerIA32::SUCCESS); - if (flatshape.IsExternal()) { - const byte* address; - if (is_ascii) { - ExternalAsciiString* ext = ExternalAsciiString::cast(*subject); - address = reinterpret_cast(ext->resource()->data()); - } else { - ExternalTwoByteString* ext = ExternalTwoByteString::cast(*subject); - address = reinterpret_cast(ext->resource()->data()); + if (rc) { + // Capture values are relative to start_offset only. + for (int i = 0; i < offsets_vector_length; i++) { + if (offsets_vector[i] >= 0) { + offsets_vector[i] += previous_index; } - res = RegExpMacroAssemblerIA32::Execute( - *code, - const_cast(&address), - start_offset << char_size_shift, - end_offset << char_size_shift, - offsets_vector, - previous_index == 0); - } else { // Sequential string - ASSERT(StringShape(*subject).IsSequential()); - Address char_address = - is_ascii ? SeqAsciiString::cast(*subject)->GetCharsAddress() - : SeqTwoByteString::cast(*subject)->GetCharsAddress(); - int byte_offset = char_address - reinterpret_cast
(*subject); - res = RegExpMacroAssemblerIA32::Execute( - *code, - reinterpret_cast(subject.location()), - byte_offset + (start_offset << char_size_shift), - byte_offset + (end_offset << char_size_shift), - offsets_vector, - previous_index == 0); - } - - if (res == RegExpMacroAssemblerIA32::EXCEPTION) { - ASSERT(Top::has_pending_exception()); - return Handle::null(); } - rc = (res == RegExpMacroAssemblerIA32::SUCCESS); - - if (rc) { - // Capture values are relative to start_offset only. - for (int i = 0; i < offsets_vector_length; i++) { - if (offsets_vector[i] >= 0) { - offsets_vector[i] += previous_index; - } - } - } - break; + } + } else { #else - UNIMPLEMENTED(); - rc = false; - break; + // Unimplemented on ARM, fall through to bytecode. + } + { #endif + for (int i = number_of_capture_registers - 1; i >= 0; i--) { + offsets_vector[i] = -1; } - case RegExpMacroAssembler::kBytecodeImplementation: { - for (int i = number_of_capture_registers - 1; i >= 0; i--) { - offsets_vector[i] = -1; - } - Handle byte_codes = IrregexpByteCode(irregexp); + Handle byte_codes = IrregexpByteCode(regexp, is_ascii); - rc = IrregexpInterpreter::Match(byte_codes, - subject, - offsets_vector, - previous_index); - break; - } - case RegExpMacroAssembler::kARMImplementation: - default: - UNREACHABLE(); - rc = false; - break; + rc = IrregexpInterpreter::Match(byte_codes, + subject, + offsets_vector, + previous_index); } if (!rc) { return Factory::null_value(); } - Handle array(last_match_info->elements()); + FixedArray* array = last_match_info->elements(); + ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead); // The captures come in (start, end+1) pairs. for (int i = 0; i < number_of_capture_registers; i += 2) { - SetCapture(*array, i, offsets_vector[i]); - SetCapture(*array, i + 1, offsets_vector[i + 1]); + SetCapture(array, i, offsets_vector[i]); + SetCapture(array, i + 1, offsets_vector[i + 1]); } - SetLastCaptureCount(*array, number_of_capture_registers); - SetLastSubject(*array, *subject); - SetLastInput(*array, *subject); + SetLastCaptureCount(array, number_of_capture_registers); + SetLastSubject(array, *subject); + SetLastInput(array, *subject); return last_match_info; } // ------------------------------------------------------------------- -// Implmentation of the Irregexp regular expression engine. +// Implementation of the Irregexp regular expression engine. // // The Irregexp regular expression engine is intended to be a complete // implementation of ECMAScript regular expressions. It generates either @@ -950,10 +990,10 @@ class RegExpCompiler { return next_register_++; } - Handle Assemble(RegExpMacroAssembler* assembler, - RegExpNode* start, - int capture_count, - Handle pattern); + RegExpEngine::CompilationResult Assemble(RegExpMacroAssembler* assembler, + RegExpNode* start, + int capture_count, + Handle pattern); inline void AddWork(RegExpNode* node) { work_list_->Add(node); } @@ -998,15 +1038,8 @@ class RecursionCheck { }; -static Handle IrregexpRegExpTooBig(Handle pattern) { - Handle array = Factory::NewJSArray(2); - SetElement(array, 0, pattern); - const char* message = "RegExp too big"; - SetElement(array, 1, Factory::NewStringFromUtf8(CStrVector(message))); - Handle regexp_err = - Factory::NewSyntaxError("malformed_regexp", array); - Top::Throw(*regexp_err); - return Handle(); +static RegExpEngine::CompilationResult IrregexpRegExpTooBig() { + return RegExpEngine::CompilationResult("RegExp too big"); } @@ -1024,7 +1057,7 @@ RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii) } -Handle RegExpCompiler::Assemble( +RegExpEngine::CompilationResult RegExpCompiler::Assemble( RegExpMacroAssembler* macro_assembler, RegExpNode* start, int capture_count, @@ -1046,24 +1079,17 @@ Handle RegExpCompiler::Assemble( while (!work_list.is_empty()) { work_list.RemoveLast()->Emit(this, &new_trace); } - if (reg_exp_too_big_) return IrregexpRegExpTooBig(pattern); - Handle array = - Factory::NewFixedArray(RegExpImpl::kIrregexpDataLength); - array->set(RegExpImpl::kIrregexpImplementationIndex, - Smi::FromInt(macro_assembler_->Implementation())); - array->set(RegExpImpl::kIrregexpNumberOfRegistersIndex, - Smi::FromInt(next_register_)); - array->set(RegExpImpl::kIrregexpNumberOfCapturesIndex, - Smi::FromInt(capture_count)); + if (reg_exp_too_big_) return IrregexpRegExpTooBig(); + Handle code = macro_assembler_->GetCode(pattern); - array->set(RegExpImpl::kIrregexpCodeIndex, *code); + work_list_ = NULL; #ifdef DEBUG if (FLAG_trace_regexp_assembler) { delete macro_assembler_; } #endif - return array; + return RegExpEngine::CompilationResult(*code, next_register_); } @@ -4647,13 +4673,13 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) { } -Handle RegExpEngine::Compile(RegExpCompileData* data, - bool ignore_case, - bool is_multiline, - Handle pattern, - bool is_ascii) { +RegExpEngine::CompilationResult RegExpEngine::Compile(RegExpCompileData* data, + bool ignore_case, + bool is_multiline, + Handle pattern, + bool is_ascii) { if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) { - return IrregexpRegExpTooBig(pattern); + return IrregexpRegExpTooBig(); } RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii); // Wrap the body of the regexp in capture #0. diff --git a/src/jsregexp.h b/src/jsregexp.h index 84e5b95..9b2f0ad 100644 --- a/src/jsregexp.h +++ b/src/jsregexp.h @@ -51,6 +51,7 @@ class RegExpImpl { // Parses the RegExp pattern and prepares the JSRegExp object with // generic data and choice of implementation - as well as what // the implementation wants to store in the data field. + // Returns false if compilation fails. static Handle Compile(Handle re, Handle pattern, Handle flags); @@ -70,15 +71,16 @@ class RegExpImpl { Handle lastMatchInfo); // Prepares a JSRegExp object with Irregexp-specific data. - static Handle IrregexpPrepare(Handle re, - Handle pattern, - JSRegExp::Flags flags); + static void IrregexpPrepare(Handle re, + Handle pattern, + JSRegExp::Flags flags, + int capture_register_count); - static Handle AtomCompile(Handle re, - Handle pattern, - JSRegExp::Flags flags, - Handle match_pattern); + static void AtomCompile(Handle re, + Handle pattern, + JSRegExp::Flags flags, + Handle match_pattern); static Handle AtomExec(Handle regexp, Handle subject, int index, @@ -107,12 +109,6 @@ class RegExpImpl { static Handle StringToTwoByte(Handle pattern); static Handle CachedStringToTwoByte(Handle pattern); - static const int kIrregexpImplementationIndex = 0; - static const int kIrregexpNumberOfCapturesIndex = 1; - static const int kIrregexpNumberOfRegistersIndex = 2; - static const int kIrregexpCodeIndex = 3; - static const int kIrregexpDataLength = 4; - // Offsets in the lastMatchInfo array. static const int kLastCaptureCount = 0; static const int kLastSubject = 1; @@ -141,10 +137,15 @@ class RegExpImpl { static String* last_ascii_string_; static String* two_byte_cached_string_; + static bool EnsureCompiledIrregexp(Handle re, bool is_ascii); + + static int IrregexpMaxRegisterCount(Handle re); + static void SetIrregexpMaxRegisterCount(Handle re, int value); static int IrregexpNumberOfCaptures(Handle re); static int IrregexpNumberOfRegisters(Handle re); - static Handle IrregexpByteCode(Handle re); - static Handle IrregexpNativeCode(Handle re); + static Handle IrregexpByteCode(Handle re, + bool is_ascii); + static Handle IrregexpNativeCode(Handle re, bool is_ascii); // On a successful match, the result is a JSArray containing // captured positions. On a failure, the result is the null value. @@ -1354,11 +1355,25 @@ struct RegExpCompileData { class RegExpEngine: public AllStatic { public: - static Handle Compile(RegExpCompileData* input, - bool ignore_case, - bool multiline, - Handle pattern, - bool is_ascii); + struct CompilationResult { + explicit CompilationResult(const char* error_message) + : error_message(error_message), + code(Heap::the_hole_value()), + num_registers(0) {} + CompilationResult(Object* code, int registers) + : error_message(NULL), + code(code), + num_registers(registers) {} + const char* error_message; + Object* code; + int num_registers; + }; + + static CompilationResult Compile(RegExpCompileData* input, + bool ignore_case, + bool multiline, + Handle pattern, + bool is_ascii); static void DotPrint(const char* label, RegExpNode* node, bool ignore_case); }; diff --git a/src/objects-debug.cc b/src/objects-debug.cc index 0362a15..f8350ed 100644 --- a/src/objects-debug.cc +++ b/src/objects-debug.cc @@ -697,8 +697,18 @@ void JSRegExp::JSRegExpVerify() { } case JSRegExp::IRREGEXP: { FixedArray* arr = FixedArray::cast(data()); - Object* irregexp_data = arr->get(JSRegExp::kIrregexpDataIndex); - ASSERT(irregexp_data->IsFixedArray()); + Object* ascii_data = arr->get(JSRegExp::kIrregexpASCIICodeIndex); + ASSERT(ascii_data->IsTheHole() + || (FLAG_regexp_native ? + ascii_data->IsCode() + : ascii_data->IsByteArray())); + Object* uc16_data = arr->get(JSRegExp::kIrregexpUC16CodeIndex); + ASSERT(uc16_data->IsTheHole() + || (FLAG_regexp_native ? + uc16_data->IsCode() + : uc16_data->IsByteArray())); + ASSERT(arr->get(JSRegExp::kIrregexpCaptureCountIndex)->IsSmi()); + ASSERT(arr->get(JSRegExp::kIrregexpMaxRegisterCountIndex)->IsSmi()); break; } default: diff --git a/src/objects-inl.h b/src/objects-inl.h index 9705b75..18017c3 100644 --- a/src/objects-inl.h +++ b/src/objects-inl.h @@ -2337,6 +2337,13 @@ Object* JSRegExp::DataAt(int index) { } +void JSRegExp::SetDataAt(int index, Object* value) { + ASSERT(TypeTag() != NOT_COMPILED); + ASSERT(index >= kDataIndex); // Only implementation data can be set this way. + FixedArray::cast(data())->set(index, value); +} + + bool JSObject::HasFastElements() { return !elements()->IsDictionary(); } diff --git a/src/objects.h b/src/objects.h index 65dba63..bf1ff05 100644 --- a/src/objects.h +++ b/src/objects.h @@ -2946,6 +2946,19 @@ class JSValue: public JSObject { }; // Regular expressions +// The regular expression holds a single reference to a FixedArray in +// the kDataOffset field. +// The FixedArray contains the following data: +// - tag : type of regexp implementation (not compiled yet, atom or irregexp) +// - reference to the original source string +// - reference to the original flag string +// If it is an atom regexp +// - a reference to a literal string to search for +// If it is an irregexp regexp: +// - a reference to code for ASCII inputs (bytecode or compiled). +// - a reference to code for UC16 inputs (bytecode or compiled). +// - max number of registers used by irregexp implementations. +// - number of capture registers (output values) of the regexp. class JSRegExp: public JSObject { public: // Meaning of Type: @@ -2973,6 +2986,8 @@ class JSRegExp: public JSObject { inline Flags GetFlags(); inline String* Pattern(); inline Object* DataAt(int index); + // Set implementation data after the object has been prepared. + inline void SetDataAt(int index, Object* value); static inline JSRegExp* cast(Object* obj); @@ -2984,14 +2999,29 @@ class JSRegExp: public JSObject { static const int kDataOffset = JSObject::kHeaderSize; static const int kSize = kDataOffset + kIntSize; + // Indices in the data array. static const int kTagIndex = 0; static const int kSourceIndex = kTagIndex + 1; static const int kFlagsIndex = kSourceIndex + 1; - // These two are the same since the same entry is shared for - // different purposes in different types of regexps. - static const int kAtomPatternIndex = kFlagsIndex + 1; - static const int kIrregexpDataIndex = kFlagsIndex + 1; - static const int kDataSize = kAtomPatternIndex + 1; + static const int kDataIndex = kFlagsIndex + 1; + // The data fields are used in different ways depending on the + // value of the tag. + // Atom regexps (literal strings). + static const int kAtomPatternIndex = kDataIndex; + + static const int kAtomDataSize = kAtomPatternIndex + 1; + + // Irregexp compiled code or bytecode for ASCII. + static const int kIrregexpASCIICodeIndex = kDataIndex; + // Irregexp compiled code or bytecode for UC16. + static const int kIrregexpUC16CodeIndex = kDataIndex + 1; + // Maximal number of registers used by either ASCII or UC16. + // Only used to check that there is enough stack space + static const int kIrregexpMaxRegisterCountIndex = kDataIndex + 2; + // Number of captures in the compiled regexp. + static const int kIrregexpCaptureCountIndex = kDataIndex + 3; + + static const int kIrregexpDataSize = kIrregexpCaptureCountIndex + 1; }; -- 2.7.4