MIPS: port Tentative implementation of string slices (hidden under the flag --string...

author yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 29 Aug 2011 09:36:58 +0000 (09:36 +0000)

committer yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Mon, 29 Aug 2011 09:36:58 +0000 (09:36 +0000)
author yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 29 Aug 2011 09:36:58 +0000 (09:36 +0000)
committer yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Mon, 29 Aug 2011 09:36:58 +0000 (09:36 +0000)
diff --git a/src/mips/code-stubs-mips.cc b/src/mips/code-stubs-mips.cc

index 8f12ff4f2bc90143c44b636600b7d9edef76df29..2526a6a28b203bfaa1703403c388bc2131962e2b 100644 (file)
--- a/src/mips/code-stubs-mips.cc
+++ b/src/mips/code-stubs-mips.cc
@@ -4517,6 +4517,9 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
    __ Addu(a2, a2, Operand(RegExpImpl::kLastMatchOverhead));
    __ sra(at, a0, kSmiTagSize);  // Untag length for comparison.
    __ Branch(&runtime, gt, a2, Operand(at));
+
+  // Reset offset for possibly sliced string.
+  __ mov(t0, zero_reg);
    // subject: Subject string
    // regexp_data: RegExp data (FixedArray)
    // Check the representation and encoding of the subject string.
@@ -4531,22 +4534,34 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
    // subject: Subject string
    // a0: instance type if Subject string
    // regexp_data: RegExp data (FixedArray)
-  // Check for flat cons string.
+  // Check for flat cons string or sliced string.
    // A flat cons string is a cons string where the second part is the empty
    // string. In that case the subject string is just the first part of the cons
    // string. Also in this case the first part of the cons string is known to be
    // a sequential string or an external string.
-  STATIC_ASSERT(kExternalStringTag != 0);
-  STATIC_ASSERT((kConsStringTag & kExternalStringTag) == 0);
-  __ And(at, a0, Operand(kIsNotStringMask | kExternalStringTag));
-  __ Branch(&runtime, ne, at, Operand(zero_reg));
+  // In the case of a sliced string its offset has to be taken into account.
+  Label cons_string, check_encoding;
+  STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+  STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+  __ Branch(&cons_string, lt, at, Operand(kExternalStringTag));
+  __ Branch(&runtime, eq, at, Operand(kExternalStringTag));
+
+  // String is sliced.
+  __ lw(t0, FieldMemOperand(subject, SlicedString::kOffsetOffset));
+  __ sra(t0, t0, kSmiTagSize);
+  __ lw(subject, FieldMemOperand(subject, SlicedString::kParentOffset));
+  // t5: offset of sliced string, smi-tagged.
+  __ jmp(&check_encoding);
+  // String is a cons string, check whether it is flat.
+  __ bind(&cons_string);
    __ lw(a0, FieldMemOperand(subject, ConsString::kSecondOffset));
    __ LoadRoot(a1, Heap::kEmptyStringRootIndex);
    __ Branch(&runtime, ne, a0, Operand(a1));
    __ lw(subject, FieldMemOperand(subject, ConsString::kFirstOffset));
+  // Is first part of cons or parent of slice a flat string?
+  __ bind(&check_encoding);
    __ lw(a0, FieldMemOperand(subject, HeapObject::kMapOffset));
    __ lbu(a0, FieldMemOperand(a0, Map::kInstanceTypeOffset));
-  // Is first part a flat string?
    STATIC_ASSERT(kSeqStringTag == 0);
    __ And(at, a0, Operand(kStringRepresentationMask));
    __ Branch(&runtime, ne, at, Operand(zero_reg));
@@ -4562,8 +4577,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
    __ And(a0, a0, Operand(kStringEncodingMask));  // Non-zero for ascii.
    __ lw(t9, FieldMemOperand(regexp_data, JSRegExp::kDataAsciiCodeOffset));
    __ sra(a3, a0, 2);  // a3 is 1 for ascii, 0 for UC16 (usyed below).
-  __ lw(t0, FieldMemOperand(regexp_data, JSRegExp::kDataUC16CodeOffset));
-  __ movz(t9, t0, a0);  // If UC16 (a0 is 0), replace t9 w/kDataUC16CodeOffset.
+  __ lw(t1, FieldMemOperand(regexp_data, JSRegExp::kDataUC16CodeOffset));
+  __ movz(t9, t1, a0);  // If UC16 (a0 is 0), replace t9 w/kDataUC16CodeOffset.
  
    // Check that the irregexp code has been generated for the actual string
    // encoding. If it has, the field contains a code object otherwise it contains
@@ -4630,23 +4645,32 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
  
    // For arguments 4 and 3 get string length, calculate start of string data
    // and calculate the shift of the index (0 for ASCII and 1 for two byte).
-  __ lw(a0, FieldMemOperand(subject, String::kLengthOffset));
-  __ sra(a0, a0, kSmiTagSize);
    STATIC_ASSERT(SeqAsciiString::kHeaderSize == SeqTwoByteString::kHeaderSize);
-  __ Addu(t0, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
+  __ Addu(t2, subject, Operand(SeqAsciiString::kHeaderSize - kHeapObjectTag));
    __ Xor(a3, a3, Operand(1));  // 1 for 2-byte str, 0 for 1-byte.
-  // Argument 4 (a3): End of string data
-  // Argument 3 (a2): Start of string data
+  // Load the length from the original subject string from the previous stack
+  // frame. Therefore we have to use fp, which points exactly to two pointer
+  // sizes below the previous sp. (Because creating a new stack frame pushes
+  // the previous fp onto the stack and moves up sp by 2 * kPointerSize.)
+  __ lw(a0, MemOperand(fp, kSubjectOffset + 2 * kPointerSize));
+  // If slice offset is not 0, load the length from the original sliced string.
+  // Argument 4, a3: End of string data
+  // Argument 3, a2: Start of string data
+  // Prepare start and end index of the input.
+  __ sllv(t1, t0, a3);
+  __ addu(t0, t2, t1);
    __ sllv(t1, a1, a3);
    __ addu(a2, t0, t1);
-  __ sllv(t1, a0, a3);
-  __ addu(a3, t0, t1);
  
+  __ lw(t2, FieldMemOperand(a0, String::kLengthOffset));
+  __ sra(t2, t2, kSmiTagSize);
+  __ sllv(t1, t2, a3);
+  __ addu(a3, t0, t1);
    // Argument 2 (a1): Previous index.
    // Already there
  
    // Argument 1 (a0): Subject string.
-  __ mov(a0, subject);
+  // Already there
  
    // Locate the code entry and call it.
    __ Addu(t9, t9, Operand(Code::kHeaderSize - kHeapObjectTag));
@@ -4663,11 +4687,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
    // Check the result.
  
    Label success;
-  __ Branch(&success, eq, v0, Operand(NativeRegExpMacroAssembler::SUCCESS));
+  __ Branch(&success, eq,
+            subject, Operand(NativeRegExpMacroAssembler::SUCCESS));
    Label failure;
-  __ Branch(&failure, eq, v0, Operand(NativeRegExpMacroAssembler::FAILURE));
+  __ Branch(&failure, eq,
+            subject, Operand(NativeRegExpMacroAssembler::FAILURE));
    // If not exception it can only be retry. Handle that in the runtime system.
-  __ Branch(&runtime, ne, v0, Operand(NativeRegExpMacroAssembler::EXCEPTION));
+  __ Branch(&runtime, ne,
+            subject, Operand(NativeRegExpMacroAssembler::EXCEPTION));
    // Result must now be exception. If there is no pending exception already a
    // stack overflow (on the backtrack stack) was detected in RegExp code but
    // haven't created the exception yet. Handle that in the runtime system.
@@ -4678,16 +4705,16 @@ void RegExpExecStub::Generate(MacroAssembler* masm) {
    __ li(a2, Operand(ExternalReference(Isolate::k_pending_exception_address,
                                        masm->isolate())));
    __ lw(v0, MemOperand(a2, 0));
-  __ Branch(&runtime, eq, v0, Operand(a1));
+  __ Branch(&runtime, eq, subject, Operand(a1));
  
    __ sw(a1, MemOperand(a2, 0));  // Clear pending exception.
  
    // Check if the exception is a termination. If so, throw as uncatchable.
    __ LoadRoot(a0, Heap::kTerminationExceptionRootIndex);
    Label termination_exception;
-  __ Branch(&termination_exception, eq, v0, Operand(a0));
+  __ Branch(&termination_exception, eq, subject, Operand(a0));
  
-  __ Throw(a0);  // Expects thrown value in v0.
+  __ Throw(subject);  // Expects thrown value in v0.
  
    __ bind(&termination_exception);
    __ ThrowUncatchable(TERMINATION, v0);  // Expects thrown value in v0.
@@ -4963,6 +4990,7 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
    Label flat_string;
    Label ascii_string;
    Label got_char_code;
+  Label sliced_string;
  
    ASSERT(!t0.is(scratch_));
    ASSERT(!t0.is(index_));
@@ -4996,23 +5024,37 @@ void StringCharCodeAtGenerator::GenerateFast(MacroAssembler* masm) {
    __ Branch(&flat_string, eq, t0, Operand(zero_reg));
  
    // Handle non-flat strings.
-  __ And(t0, result_, Operand(kIsConsStringMask));
-  __ Branch(&call_runtime_, eq, t0, Operand(zero_reg));
+  __ And(result_, result_, Operand(kStringRepresentationMask));
+  STATIC_ASSERT((kConsStringTag < kExternalStringTag));
+  STATIC_ASSERT((kSlicedStringTag > kExternalStringTag));
+  __ Branch(&sliced_string, gt, result_, Operand(kExternalStringTag));
+  __ Branch(&call_runtime_, eq, result_, Operand(kExternalStringTag));
  
    // ConsString.
    // Check whether the right hand side is the empty string (i.e. if
    // this is really a flat string in a cons string). If that is not
    // the case we would rather go to the runtime system now to flatten
    // the string.
+  Label assure_seq_string;
    __ lw(result_, FieldMemOperand(object_, ConsString::kSecondOffset));
    __ LoadRoot(t0, Heap::kEmptyStringRootIndex);
    __ Branch(&call_runtime_, ne, result_, Operand(t0));
  
    // Get the first of the two strings and load its instance type.
    __ lw(object_, FieldMemOperand(object_, ConsString::kFirstOffset));
+  __ jmp(&assure_seq_string);
+
+  // SlicedString, unpack and add offset.
+  __ bind(&sliced_string);
+  __ lw(result_, FieldMemOperand(object_, SlicedString::kOffsetOffset));
+  __ addu(scratch_, scratch_, result_);
+  __ lw(object_, FieldMemOperand(object_, SlicedString::kParentOffset));
+
+  // Assure that we are dealing with a sequential string. Go to runtime if not.
+  __ bind(&assure_seq_string);
    __ lw(result_, FieldMemOperand(object_, HeapObject::kMapOffset));
    __ lbu(result_, FieldMemOperand(result_, Map::kInstanceTypeOffset));
-  // If the first cons component is also non-flat, then go to runtime.
+  // Check that parent is not an external string. Go to runtime otherwise.
    STATIC_ASSERT(kSeqStringTag == 0);
  
    __ And(t0, result_, Operand(kStringRepresentationMask));
@@ -5583,6 +5625,11 @@ void SubStringStub::Generate(MacroAssembler* masm) {
    Register to = t2;
    Register from = t3;
  
+  if (FLAG_string_slices) {
+    __ nop();  // Jumping as first instruction would crash the code generation.
+    __ jmp(&sub_string_runtime);
+  }
+
    // Check bounds and smi-ness.
    __ lw(to, MemOperand(sp, kToOffset));
    __ lw(from, MemOperand(sp, kFromOffset));
diff --git a/src/mips/regexp-macro-assembler-mips.cc b/src/mips/regexp-macro-assembler-mips.cc

index 9935ef9b5bfbb33fb429fc4c10df39a99d07839d..45d39639727e51b0c5f017754594c95c4e89f1c1 100644 (file)
--- a/src/mips/regexp-macro-assembler-mips.cc
+++ b/src/mips/regexp-macro-assembler-mips.cc
@@ -1036,12 +1036,12 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
    }
  
    // Prepare for possible GC.
-  HandleScope handles;
+  HandleScope handles(isolate);
    Handle<Code> code_handle(re_code);
  
    Handle<String> subject(frame_entry<String*>(re_frame, kInputString));
    // Current string.
-  bool is_ascii = subject->IsAsciiRepresentation();
+  bool is_ascii = subject->IsAsciiRepresentationUnderneath();
  
    ASSERT(re_code->instruction_start() <= *return_address);
    ASSERT(*return_address <=
@@ -1059,8 +1059,20 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
      return EXCEPTION;
    }
  
+  Handle<String> subject_tmp = subject;
+  int slice_offset = 0;
+
+  // Extract the underlying string and the slice offset.
+  if (StringShape(*subject_tmp).IsCons()) {
+    subject_tmp = Handle<String>(ConsString::cast(*subject_tmp)->first());
+  } else if (StringShape(*subject_tmp).IsSliced()) {
+    SlicedString* slice = SlicedString::cast(*subject_tmp);
+    subject_tmp = Handle<String>(slice->parent());
+    slice_offset = slice->offset();
+  }
+
    // String might have changed.
-  if (subject->IsAsciiRepresentation() != is_ascii) {
+  if (subject_tmp->IsAsciiRepresentation() != is_ascii) {
      // If we changed between an ASCII and an UC16 string, the specialized
      // code cannot be used, and we need to restart regexp matching from
      // scratch (including, potentially, compiling a new version of the code).
@@ -1071,8 +1083,8 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
    // be a sequential or external string with the same content.
    // Update the start and end pointers in the stack frame to the current
    // location (whether it has actually moved or not).
-  ASSERT(StringShape(*subject).IsSequential() ||
-      StringShape(*subject).IsExternal());
+  ASSERT(StringShape(*subject_tmp).IsSequential() ||
+      StringShape(*subject_tmp).IsExternal());
  
    // The original start address of the characters to match.
    const byte* start_address = frame_entry<const byte*>(re_frame, kInputStart);
@@ -1080,13 +1092,14 @@ int RegExpMacroAssemblerMIPS::CheckStackGuardState(Address* return_address,
    // Find the current start address of the same character at the current string
    // position.
    int start_index = frame_entry<int>(re_frame, kStartIndex);
-  const byte* new_address = StringCharacterPosition(*subject, start_index);
+  const byte* new_address = StringCharacterPosition(*subject_tmp,
+                                                    start_index + slice_offset);
  
    if (start_address != new_address) {
      // If there is a difference, update the object pointer and start and end
      // addresses in the RegExp stack frame to match the new value.
      const byte* end_address = frame_entry<const byte* >(re_frame, kInputEnd);
-    int byte_length = end_address - start_address;
+    int byte_length = static_cast<int>(end_address - start_address);
      frame_entry<const String*>(re_frame, kInputString) = *subject;
      frame_entry<const byte*>(re_frame, kInputStart) = new_address;
      frame_entry<const byte*>(re_frame, kInputEnd) = new_address + byte_length;
author	yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 29 Aug 2011 09:36:58 +0000 (09:36 +0000)
committer	yangguo@chromium.org <yangguo@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Mon, 29 Aug 2011 09:36:58 +0000 (09:36 +0000)
src/mips/code-stubs-mips.cc		patch \| blob \| history
src/mips/regexp-macro-assembler-mips.cc		patch \| blob \| history