From 55849e95d7ea299e65cac13e1046aaac2920f295 Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Mon, 29 Aug 2011 09:36:58 +0000 Subject: [PATCH] MIPS: port Tentative implementation of string slices (hidden under the flag --string-slices). Ported r9027 (e9a3514) BUG= TEST= Review URL: http://codereview.chromium.org/7736010 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@9038 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/mips/code-stubs-mips.cc | 97 ++++++++++++++++++++++++--------- src/mips/regexp-macro-assembler-mips.cc | 27 ++++++--- 2 files changed, 92 insertions(+), 32 deletions(-) diff --git a/src/mips/code-stubs-mips.cc b/src/mips/code-stubs-mips.cc index 8f12ff4..2526a6a 100644 --- a/src/mips/code-stubs-mips.cc +++ b/src/mips/code-stubs-mips.cc @@ -4517,6 +4517,9 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ Addu(a2, a2, Operand(RegExpImpl::kLastMatchOverhead)); __ sra(at, a0, kSmiTagSize); // Untag length for comparison. __ Branch(&runtime, gt, a2, Operand(at)); + + // Reset offset for possibly sliced string. + __ mov(t0, zero_reg); // subject: Subject string // regexp_data: RegExp data (FixedArray) // Check the representation and encoding of the subject string. @@ -4531,22 +4534,34 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // subject: Subject string // a0: instance type if Subject string // regexp_data: RegExp data (FixedArray) - // Check for flat cons string. + // Check for flat cons string or sliced string. // A flat cons string is a cons string where the second part is the empty // string. In that case the subject string is just the first part of the cons // string. Also in this case the first part of the cons string is known to be // a sequential string or an external string. - STATIC_ASSERT(kExternalStringTag != 0); - STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0); - __ And(at, a0, Operand(kIsNotStringMask | kExternalStringTag)); - __ Branch(&runtime, ne, at, Operand(zero_reg)); + // In the case of a sliced string its offset has to be taken into account. + Label cons_string, check_encoding; + STATIC_ASSERT((kConsStringTag < kExternalStringTag)); + STATIC_ASSERT((kSlicedStringTag > kExternalStringTag)); + __ Branch(&cons_string, lt, at, Operand(kExternalStringTag)); + __ Branch(&runtime, eq, at, Operand(kExternalStringTag)); + + // String is sliced. + __ lw(t0, FieldMemOperand(subject, SlicedString::kOffsetOffset)); + __ sra(t0, t0, kSmiTagSize); + __ lw(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); + // t5: offset of sliced string, smi-tagged. + __ jmp(&check_encoding); + // String is a cons string, check whether it is flat. + __ bind(&cons_string); __ lw(a0, FieldMemOperand(subject, ConsString::kSecondOffset)); __ LoadRoot(a1, Heap::kEmptyStringRootIndex); __ Branch(&runtime, ne, a0, Operand(a1)); __ lw(subject, FieldMemOperand(subject, ConsString::kFirstOffset)); + // Is first part of cons or parent of slice a flat string? + __ bind(&check_encoding); __ lw(a0, FieldMemOperand(subject, HeapObject::kMapOffset)); __ lbu(a0, FieldMemOperand(a0, Map::kInstanceTypeOffset)); - // Is first part a flat string? STATIC_ASSERT(kSeqStringTag == 0); __ And(at, a0, Operand(kStringRepresentationMask)); __ Branch(&runtime, ne, at, Operand(zero_reg)); @@ -4562,8 +4577,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ And(a0, a0, Operand(kStringEncodingMask)); // Non-zero for ascii. __ lw(t9, FieldMemOperand(regexp_data, JSRegExp::kDataAsciiCodeOffset)); __ sra(a3, a0, 2); // a3 is 1 for ascii, 0 for UC16 (usyed below). - __ lw(t0, FieldMemOperand(regexp_data, JSRegExp::kDataUC16CodeOffset)); - __ movz(t9, t0, a0); // If UC16 (a0 is 0), replace t9 w/kDataUC16CodeOffset. + __ lw(t1, FieldMemOperand(regexp_data, JSRegExp::kDataUC16CodeOffset)); + __ movz(t9, t1, a0); // If UC16 (a0 is 0), replace t9 w/kDataUC16CodeOffset. // Check that the irregexp code has been generated for the actual string // encoding. If it has, the field contains a code object otherwise it contains @@ -4630,23 +4645,32 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // For arguments 4 and 3 get string length, calculate start of string data // and calculate the shift of the index (0 for ASCII and 1 for two byte). - __ lw(a0, FieldMemOperand(subject, String::kLengthOffset)); - __ sra(a0, a0, kSmiTagSize); STATIC_ASSERT(SeqAsciiString::kHeaderSize == SeqTwoByteString::kHeaderSize); - __ Addu(t0, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag)); + __ Addu(t2, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag)); __ Xor(a3, a3, Operand(1)); // 1 for 2-byte str, 0 for 1-byte. - // Argument 4 (a3): End of string data - // Argument 3 (a2): Start of string data + // Load the length from the original subject string from the previous stack + // frame. Therefore we have to use fp, which points exactly to two pointer + // sizes below the previous sp. (Because creating a new stack frame pushes + // the previous fp onto the stack and moves up sp by 2 * kPointerSize.) + __ lw(a0, MemOperand(fp, kSubjectOffset + 2 * kPointerSize)); + // If slice offset is not 0, load the length from the original sliced string. + // Argument 4, a3: End of string data + // Argument 3, a2: Start of string data + // Prepare start and end index of the input. + __ sllv(t1, t0, a3); + __ addu(t0, t2, t1); __ sllv(t1, a1, a3); __ addu(a2, t0, t1); - __ sllv(t1, a0, a3); - __ addu(a3, t0, t1); + __ lw(t2, FieldMemOperand(a0, String::kLengthOffset)); + __ sra(t2, t2, kSmiTagSize); + __ sllv(t1, t2, a3); + __ addu(a3, t0, t1); // Argument 2 (a1): Previous index. // Already there // Argument 1 (a0): Subject string. - __ mov(a0, subject); + // Already there // Locate the code entry and call it. __ Addu(t9, t9, Operand(Code::kHeaderSize - kHeapObjectTag)); @@ -4663,11 +4687,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // Check the result. Label success; - __ Branch(&success, eq, v0, Operand(NativeRegExpMacroAssembler::SUCCESS)); + __ Branch(&success, eq, + subject, Operand(NativeRegExpMacroAssembler::SUCCESS)); Label failure; - __ Branch(&failure, eq, v0, Operand(NativeRegExpMacroAssembler::FAILURE)); + __ Branch(&failure, eq, + subject, Operand(NativeRegExpMacroAssembler::FAILURE)); // If not exception it can only be retry. Handle that in the runtime system. - __ Branch(&runtime, ne, v0, Operand(NativeRegExpMacroAssembler::EXCEPTION)); + __ Branch(&runtime, ne, + subject, Operand(NativeRegExpMacroAssembler::EXCEPTION)); // Result must now be exception. If there is no pending exception already a // stack overflow (on the backtrack stack) was detected in RegExp code but // haven't created the exception yet. Handle that in the runtime system. @@ -4678,16 +4705,16 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ li(a2, Operand(ExternalReference(Isolate::k_pending_exception_address, masm->isolate()))); __ lw(v0, MemOperand(a2, 0)); - __ Branch(&runtime, eq, v0, Operand(a1)); + __ Branch(&runtime, eq, subject, Operand(a1)); __ sw(a1, MemOperand(a2, 0)); // Clear pending exception. // Check if the exception is a termination. If so, throw as uncatchable. __ LoadRoot(a0, Heap::kTerminationExceptionRootIndex); Label termination_exception; - __ Branch(&termination_exception, eq, v0, Operand(a0)); + __ Branch(&termination_exception, eq, subject, Operand(a0)); - __ Throw(a0); // Expects thrown value in v0. + __ Throw(subject); // Expects thrown value in v0. __ bind(&termination_exception); __ ThrowUncatchable(TERMINATION, v0); // Expects thrown value in v0. @@ -4963,6 +4990,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) { Label flat_string; Label ascii_string; Label got_char_code; + Label sliced_string; ASSERT(!t0.is(scratch_)); ASSERT(!t0.is(index_)); @@ -4996,23 +5024,37 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) { __ Branch(&flat_string, eq, t0, Operand(zero_reg)); // Handle non-flat strings. - __ And(t0, result_, Operand(kIsConsStringMask)); - __ Branch(&call_runtime_, eq, t0, Operand(zero_reg)); + __ And(result_, result_, Operand(kStringRepresentationMask)); + STATIC_ASSERT((kConsStringTag < kExternalStringTag)); + STATIC_ASSERT((kSlicedStringTag > kExternalStringTag)); + __ Branch(&sliced_string, gt, result_, Operand(kExternalStringTag)); + __ Branch(&call_runtime_, eq, result_, Operand(kExternalStringTag)); // ConsString. // Check whether the right hand side is the empty string (i.e. if // this is really a flat string in a cons string). If that is not // the case we would rather go to the runtime system now to flatten // the string. + Label assure_seq_string; __ lw(result_, FieldMemOperand(object_, ConsString::kSecondOffset)); __ LoadRoot(t0, Heap::kEmptyStringRootIndex); __ Branch(&call_runtime_, ne, result_, Operand(t0)); // Get the first of the two strings and load its instance type. __ lw(object_, FieldMemOperand(object_, ConsString::kFirstOffset)); + __ jmp(&assure_seq_string); + + // SlicedString, unpack and add offset. + __ bind(&sliced_string); + __ lw(result_, FieldMemOperand(object_, SlicedString::kOffsetOffset)); + __ addu(scratch_, scratch_, result_); + __ lw(object_, FieldMemOperand(object_, SlicedString::kParentOffset)); + + // Assure that we are dealing with a sequential string. Go to runtime if not. + __ bind(&assure_seq_string); __ lw(result_, FieldMemOperand(object_, HeapObject::kMapOffset)); __ lbu(result_, FieldMemOperand(result_, Map::kInstanceTypeOffset)); - // If the first cons component is also non-flat, then go to runtime. + // Check that parent is not an external string. Go to runtime otherwise. STATIC_ASSERT(kSeqStringTag == 0); __ And(t0, result_, Operand(kStringRepresentationMask)); @@ -5583,6 +5625,11 @@ void SubStringStub::Generate(MacroAssembler* masm) { Register to = t2; Register from = t3; + if (FLAG_string_slices) { + __ nop(); // Jumping as first instruction would crash the code generation. + __ jmp(&sub_string_runtime); + } + // Check bounds and smi-ness. __ lw(to, MemOperand(sp, kToOffset)); __ lw(from, MemOperand(sp, kFromOffset)); diff --git a/src/mips/regexp-macro-assembler-mips.cc b/src/mips/regexp-macro-assembler-mips.cc index 9935ef9..45d3963 100644 --- a/src/mips/regexp-macro-assembler-mips.cc +++ b/src/mips/regexp-macro-assembler-mips.cc @@ -1036,12 +1036,12 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, } // Prepare for possible GC. - HandleScope handles; + HandleScope handles(isolate); Handle code_handle(re_code); Handle subject(frame_entry(re_frame, kInputString)); // Current string. - bool is_ascii = subject->IsAsciiRepresentation(); + bool is_ascii = subject->IsAsciiRepresentationUnderneath(); ASSERT(re_code->instruction_start() <= *return_address); ASSERT(*return_address <= @@ -1059,8 +1059,20 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, return EXCEPTION; } + Handle subject_tmp = subject; + int slice_offset = 0; + + // Extract the underlying string and the slice offset. + if (StringShape(*subject_tmp).IsCons()) { + subject_tmp = Handle(ConsString::cast(*subject_tmp)->first()); + } else if (StringShape(*subject_tmp).IsSliced()) { + SlicedString* slice = SlicedString::cast(*subject_tmp); + subject_tmp = Handle(slice->parent()); + slice_offset = slice->offset(); + } + // String might have changed. - if (subject->IsAsciiRepresentation() != is_ascii) { + if (subject_tmp->IsAsciiRepresentation() != is_ascii) { // If we changed between an ASCII and an UC16 string, the specialized // code cannot be used, and we need to restart regexp matching from // scratch (including, potentially, compiling a new version of the code). @@ -1071,8 +1083,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, // be a sequential or external string with the same content. // Update the start and end pointers in the stack frame to the current // location (whether it has actually moved or not). - ASSERT(StringShape(*subject).IsSequential() || - StringShape(*subject).IsExternal()); + ASSERT(StringShape(*subject_tmp).IsSequential() || + StringShape(*subject_tmp).IsExternal()); // The original start address of the characters to match. const byte* start_address = frame_entry(re_frame, kInputStart); @@ -1080,13 +1092,14 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address, // Find the current start address of the same character at the current string // position. int start_index = frame_entry(re_frame, kStartIndex); - const byte* new_address = StringCharacterPosition(*subject, start_index); + const byte* new_address = StringCharacterPosition(*subject_tmp, + start_index + slice_offset); if (start_address != new_address) { // If there is a difference, update the object pointer and start and end // addresses in the RegExp stack frame to match the new value. const byte* end_address = frame_entry(re_frame, kInputEnd); - int byte_length = end_address - start_address; + int byte_length = static_cast(end_address - start_address); frame_entry(re_frame, kInputString) = *subject; frame_entry(re_frame, kInputStart) = new_address; frame_entry(re_frame, kInputEnd) = new_address + byte_length; -- 2.7.4