From: yangguo@chromium.org Date: Tue, 26 Feb 2013 09:53:07 +0000 (+0000) Subject: Refactor RegExpStub to check lazily. X-Git-Tag: upstream/4.7.83~15032 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=72b802f27a5cf767d894458272e2f2e3d3c3ebaf;p=platform%2Fupstream%2Fv8.git Refactor RegExpStub to check lazily. R=ulan@chromium.org BUG= Review URL: https://chromiumcodereview.appspot.com/12210143 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@13727 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc index b1f3c08..a7e9611 100644 --- a/src/arm/code-stubs-arm.cc +++ b/src/arm/code-stubs-arm.cc @@ -5082,8 +5082,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { const int kSubjectOffset = 2 * kPointerSize; const int kJSRegExpOffset = 3 * kPointerSize; - Label runtime, invoke_regexp; - + Label runtime; // Allocation of registers for this function. These are in callee save // registers and will be preserved by the call to the native RegExp code, as // this code is called using the normal C calling convention. When calling @@ -5130,68 +5129,48 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // Check that the number of captures fit in the static offsets vector buffer. __ ldr(r2, FieldMemOperand(regexp_data, JSRegExp::kIrregexpCaptureCountOffset)); - // Calculate number of capture registers (number_of_captures + 1) * 2. This - // uses the asumption that smis are 2 * their untagged value. + // Check (number_of_captures + 1) * 2 <= offsets vector size + // Or number_of_captures * 2 <= offsets vector size - 2 + // Multiplying by 2 comes for free since r2 is smi-tagged. STATIC_ASSERT(kSmiTag == 0); STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); - __ add(r2, r2, Operand(2)); // r2 was a smi. - // Check that the static offsets vector buffer is large enough. - __ cmp(r2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize)); + STATIC_ASSERT(Isolate::kJSRegexpStaticOffsetsVectorSize >= 2); + __ cmp(r2, Operand(Isolate::kJSRegexpStaticOffsetsVectorSize - 2)); __ b(hi, &runtime); - // r2: Number of capture registers - // regexp_data: RegExp data (FixedArray) - // Check that the second argument is a string. - __ ldr(subject, MemOperand(sp, kSubjectOffset)); - __ JumpIfSmi(subject, &runtime); - Condition is_string = masm->IsObjectStringType(subject, r0); - __ b(NegateCondition(is_string), &runtime); - // Get the length of the string to r3. - __ ldr(r3, FieldMemOperand(subject, String::kLengthOffset)); - - // r2: Number of capture registers - // r3: Length of subject string as a smi - // subject: Subject string - // regexp_data: RegExp data (FixedArray) - // Check that the third argument is a positive smi less than the subject - // string length. A negative value will be greater (unsigned comparison). - __ ldr(r0, MemOperand(sp, kPreviousIndexOffset)); - __ JumpIfNotSmi(r0, &runtime); - __ cmp(r3, Operand(r0)); - __ b(ls, &runtime); - - // r2: Number of capture registers - // subject: Subject string - // regexp_data: RegExp data (FixedArray) - // Check that the fourth object is a JSArray object. - __ ldr(r0, MemOperand(sp, kLastMatchInfoOffset)); - __ JumpIfSmi(r0, &runtime); - __ CompareObjectType(r0, r1, r1, JS_ARRAY_TYPE); - __ b(ne, &runtime); - // Check that the JSArray is in fast case. - __ ldr(last_match_info_elements, - FieldMemOperand(r0, JSArray::kElementsOffset)); - __ ldr(r0, FieldMemOperand(last_match_info_elements, HeapObject::kMapOffset)); - __ CompareRoot(r0, Heap::kFixedArrayMapRootIndex); - __ b(ne, &runtime); - // Check that the last match info has space for the capture registers and the - // additional information. - __ ldr(r0, - FieldMemOperand(last_match_info_elements, FixedArray::kLengthOffset)); - __ add(r2, r2, Operand(RegExpImpl::kLastMatchOverhead)); - __ cmp(r2, Operand(r0, ASR, kSmiTagSize)); - __ b(gt, &runtime); - // Reset offset for possibly sliced string. __ mov(r9, Operand::Zero()); - // subject: Subject string - // regexp_data: RegExp data (FixedArray) - // Check the representation and encoding of the subject string. - Label seq_string; + __ ldr(subject, MemOperand(sp, kSubjectOffset)); + __ JumpIfSmi(subject, &runtime); + __ mov(r3, subject); // Make a copy of the original subject string. __ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset)); __ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset)); - // First check for flat string. None of the following string type tests will - // succeed if subject is not a string or a short external string. + // subject: subject string + // r3: subject string + // r0: subject string instance type + // regexp_data: RegExp data (FixedArray) + // Handle subject string according to its encoding and representation: + // (1) Sequential string? If yes, go to (5). + // (2) Anything but sequential or cons? If yes, go to (6). + // (3) Cons string. If the string is flat, replace subject with first string. + // Otherwise bailout. + // (4) Is subject external? If yes, go to (7). + // (5) Sequential string. Load regexp code according to encoding. + // (E) Carry on. + /// [...] + + // Deferred code at the end of the stub: + // (6) Not a long external string? If yes, go to (8). + // (7) External string. Make it, offset-wise, look like a sequential string. + // Go to (5). + // (8) Short external string or not a string? If yes, bail out to runtime. + // (9) Sliced string. Replace subject with parent. Go to (4). + + Label seq_string /* 5 */, external_string /* 7 */, + check_underlying /* 4 */, not_seq_nor_cons /* 6 */, + not_long_external /* 8 */; + + // (1) Sequential string? If yes, go to (5). __ and_(r1, r0, Operand(kIsNotStringMask | @@ -5199,77 +5178,62 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { kShortExternalStringMask), SetCC); STATIC_ASSERT((kStringTag | kSeqStringTag) == 0); - __ b(eq, &seq_string); + __ b(eq, &seq_string); // Go to (5). - // subject: Subject string - // regexp_data: RegExp data (FixedArray) - // r1: whether subject is a string and if yes, its string representation - // Check for flat cons string or sliced string. - // A flat cons string is a cons string where the second part is the empty - // string. In that case the subject string is just the first part of the cons - // string. Also in this case the first part of the cons string is known to be - // a sequential string or an external string. - // In the case of a sliced string its offset has to be taken into account. - Label cons_string, external_string, check_encoding; + // (2) Anything but sequential or cons? If yes, go to (6). STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmp(r1, Operand(kExternalStringTag)); - __ b(lt, &cons_string); - __ b(eq, &external_string); - - // Catch non-string subject or short external string. - STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); - __ tst(r1, Operand(kIsNotStringMask | kShortExternalStringMask)); - __ b(ne, &runtime); + __ b(ge, ¬_seq_nor_cons); // Go to (6). - // String is sliced. - __ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset)); - __ mov(r9, Operand(r9, ASR, kSmiTagSize)); - __ ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); - // r9: offset of sliced string, smi-tagged. - __ jmp(&check_encoding); - // String is a cons string, check whether it is flat. - __ bind(&cons_string); + // (3) Cons string. Check that it's flat. + // Replace subject with first string and reload instance type. __ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset)); __ CompareRoot(r0, Heap::kEmptyStringRootIndex); __ b(ne, &runtime); __ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset)); - // Is first part of cons or parent of slice a flat string? - __ bind(&check_encoding); + + // (4) Is subject external? If yes, go to (7). + __ bind(&check_underlying); __ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset)); __ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset)); STATIC_ASSERT(kSeqStringTag == 0); __ tst(r0, Operand(kStringRepresentationMask)); - __ b(ne, &external_string); + // The underlying external string is never a short external string. + STATIC_CHECK(ExternalString::kMaxShortLength < ConsString::kMinLength); + STATIC_CHECK(ExternalString::kMaxShortLength < SlicedString::kMinLength); + __ b(ne, &external_string); // Go to (7). + // (5) Sequential string. Load regexp code according to encoding. __ bind(&seq_string); - // subject: Subject string - // regexp_data: RegExp data (FixedArray) - // r0: Instance type of subject string + // subject: sequential subject string (or look-alike, external string) + // r3: original subject string + // Load previous index and check range before r3 is overwritten. We have to + // use r3 instead of subject here because subject might have been only made + // to look like a sequential string when it actually is an external string. + __ ldr(r1, MemOperand(sp, kPreviousIndexOffset)); + __ JumpIfNotSmi(r1, &runtime); + __ ldr(r3, FieldMemOperand(r3, String::kLengthOffset)); + __ cmp(r3, Operand(r1)); + __ b(ls, &runtime); + __ mov(r1, Operand(r1, ASR, kSmiTagSize)); + STATIC_ASSERT(4 == kOneByteStringTag); STATIC_ASSERT(kTwoByteStringTag == 0); - // Find the code object based on the assumptions above. __ and_(r0, r0, Operand(kStringEncodingMask)); __ mov(r3, Operand(r0, ASR, 2), SetCC); __ ldr(r7, FieldMemOperand(regexp_data, JSRegExp::kDataAsciiCodeOffset), ne); __ ldr(r7, FieldMemOperand(regexp_data, JSRegExp::kDataUC16CodeOffset), eq); + // (E) Carry on. String handling is done. + // r7: irregexp code // Check that the irregexp code has been generated for the actual string // encoding. If it has, the field contains a code object otherwise it contains // a smi (code flushing support). __ JumpIfSmi(r7, &runtime); - // r3: encoding of subject string (1 if ASCII, 0 if two_byte); - // r7: code - // subject: Subject string - // regexp_data: RegExp data (FixedArray) - // Load used arguments before starting to push arguments for call to native - // RegExp code to avoid handling changing stack height. - __ ldr(r1, MemOperand(sp, kPreviousIndexOffset)); - __ mov(r1, Operand(r1, ASR, kSmiTagSize)); - // r1: previous index // r3: encoding of subject string (1 if ASCII, 0 if two_byte); // r7: code @@ -5349,10 +5313,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // subject: subject string (callee saved) // regexp_data: RegExp data (callee saved) // last_match_info_elements: Last match info elements (callee saved) - // Check the result. Label success; - __ cmp(r0, Operand(1)); // We expect exactly one result since we force the called regexp to behave // as non-global. @@ -5398,10 +5360,29 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ ldr(r1, FieldMemOperand(regexp_data, JSRegExp::kIrregexpCaptureCountOffset)); // Calculate number of capture registers (number_of_captures + 1) * 2. + // Multiplying by 2 comes for free since r1 is smi-tagged. STATIC_ASSERT(kSmiTag == 0); STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); __ add(r1, r1, Operand(2)); // r1 was a smi. + __ ldr(r0, MemOperand(sp, kLastMatchInfoOffset)); + __ JumpIfSmi(r0, &runtime); + __ CompareObjectType(r0, r2, r2, JS_ARRAY_TYPE); + __ b(ne, &runtime); + // Check that the JSArray is in fast case. + __ ldr(last_match_info_elements, + FieldMemOperand(r0, JSArray::kElementsOffset)); + __ ldr(r0, FieldMemOperand(last_match_info_elements, HeapObject::kMapOffset)); + __ CompareRoot(r0, Heap::kFixedArrayMapRootIndex); + __ b(ne, &runtime); + // Check that the last match info has space for the capture registers and the + // additional information. + __ ldr(r0, + FieldMemOperand(last_match_info_elements, FixedArray::kLengthOffset)); + __ add(r2, r1, Operand(RegExpImpl::kLastMatchOverhead)); + __ cmp(r2, Operand(r0, ASR, kSmiTagSize)); + __ b(gt, &runtime); + // r1: number of capture registers // r4: subject string // Store the capture count. @@ -5415,10 +5396,11 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ mov(r2, subject); __ RecordWriteField(last_match_info_elements, RegExpImpl::kLastSubjectOffset, - r2, + subject, r7, kLRHasNotBeenSaved, kDontSaveFPRegs); + __ mov(subject, r2); __ str(subject, FieldMemOperand(last_match_info_elements, RegExpImpl::kLastInputOffset)); @@ -5458,8 +5440,17 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ add(sp, sp, Operand(4 * kPointerSize)); __ Ret(); - // External string. Short external strings have already been ruled out. - // r0: scratch + // Do the runtime call to execute the regexp. + __ bind(&runtime); + __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); + + // Deferred code for string handling. + // (6) Not a long external string? If yes, go to (8). + __ bind(¬_seq_nor_cons); + // Compare flags are still set. + __ b(gt, ¬_long_external); // Go to (8). + + // (7) External string. Make it, offset-wise, look like a sequential string. __ bind(&external_string); __ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset)); __ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset)); @@ -5476,11 +5467,20 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ sub(subject, subject, Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); - __ jmp(&seq_string); + __ jmp(&seq_string); // Go to (5). - // Do the runtime call to execute the regexp. - __ bind(&runtime); - __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); + // (8) Short external string or not a string? If yes, bail out to runtime. + __ bind(¬_long_external); + STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); + __ tst(r1, Operand(kIsNotStringMask | kShortExternalStringMask)); + __ b(ne, &runtime); + + // (9) Sliced string. Replace subject with parent. Go to (4). + // Load offset into r9 and replace subject string with parent. + __ ldr(r9, FieldMemOperand(subject, SlicedString::kOffsetOffset)); + __ mov(r9, Operand(r9, ASR, kSmiTagSize)); + __ ldr(subject, FieldMemOperand(subject, SlicedString::kParentOffset)); + __ jmp(&check_underlying); // Go to (4). #endif // V8_INTERPRETED_REGEXP } @@ -7532,7 +7532,6 @@ struct AheadOfTimeWriteBarrierStubList { static const AheadOfTimeWriteBarrierStubList kAheadOfTime[] = { // Used in RegExpExecStub. { REG(r6), REG(r4), REG(r7), EMIT_REMEMBERED_SET }, - { REG(r6), REG(r2), REG(r7), EMIT_REMEMBERED_SET }, // Used in CompileArrayPushCall. // Also used in StoreIC::GenerateNormal via GenerateDictionaryStore. // Also used in KeyedStoreIC::GenerateGeneric. diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index e7e4d4f..0e2db0c 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -3826,7 +3826,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { static const int kSubjectOffset = 3 * kPointerSize; static const int kJSRegExpOffset = 4 * kPointerSize; - Label runtime, invoke_regexp; + Label runtime; + Factory* factory = masm->isolate()->factory(); // Ensure that a RegExp stack is allocated. ExternalReference address_of_regexp_stack_memory_address = @@ -3844,6 +3845,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ JumpIfSmi(eax, &runtime); __ CmpObjectType(eax, JS_REGEXP_TYPE, ecx); __ j(not_equal, &runtime); + // Check that the RegExp has been compiled (data contains a fixed array). __ mov(ecx, FieldOperand(eax, JSRegExp::kDataOffset)); if (FLAG_debug_code) { @@ -3862,156 +3864,124 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // ecx: RegExp data (FixedArray) // Check that the number of captures fit in the static offsets vector buffer. __ mov(edx, FieldOperand(ecx, JSRegExp::kIrregexpCaptureCountOffset)); - // Calculate number of capture registers (number_of_captures + 1) * 2. This - // uses the asumption that smis are 2 * their untagged value. + // Check (number_of_captures + 1) * 2 <= offsets vector size + // Or number_of_captures * 2 <= offsets vector size - 2 + // Multiplying by 2 comes for free since edx is smi-tagged. STATIC_ASSERT(kSmiTag == 0); STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); - __ add(edx, Immediate(2)); // edx was a smi. - // Check that the static offsets vector buffer is large enough. - __ cmp(edx, Isolate::kJSRegexpStaticOffsetsVectorSize); + STATIC_ASSERT(Isolate::kJSRegexpStaticOffsetsVectorSize >= 2); + __ cmp(edx, Isolate::kJSRegexpStaticOffsetsVectorSize - 2); __ j(above, &runtime); - // ecx: RegExp data (FixedArray) - // edx: Number of capture registers - // Check that the second argument is a string. - __ mov(eax, Operand(esp, kSubjectOffset)); - __ JumpIfSmi(eax, &runtime); - Condition is_string = masm->IsObjectStringType(eax, ebx, ebx); - __ j(NegateCondition(is_string), &runtime); - // Get the length of the string to ebx. - __ mov(ebx, FieldOperand(eax, String::kLengthOffset)); - - // ebx: Length of subject string as a smi - // ecx: RegExp data (FixedArray) - // edx: Number of capture registers - // Check that the third argument is a positive smi less than the subject - // string length. A negative value will be greater (unsigned comparison). - __ mov(eax, Operand(esp, kPreviousIndexOffset)); - __ JumpIfNotSmi(eax, &runtime); - __ cmp(eax, ebx); - __ j(above_equal, &runtime); - - // ecx: RegExp data (FixedArray) - // edx: Number of capture registers - // Check that the fourth object is a JSArray object. - __ mov(eax, Operand(esp, kLastMatchInfoOffset)); - __ JumpIfSmi(eax, &runtime); - __ CmpObjectType(eax, JS_ARRAY_TYPE, ebx); - __ j(not_equal, &runtime); - // Check that the JSArray is in fast case. - __ mov(ebx, FieldOperand(eax, JSArray::kElementsOffset)); - __ mov(eax, FieldOperand(ebx, HeapObject::kMapOffset)); - Factory* factory = masm->isolate()->factory(); - __ cmp(eax, factory->fixed_array_map()); - __ j(not_equal, &runtime); - // Check that the last match info has space for the capture registers and the - // additional information. - __ mov(eax, FieldOperand(ebx, FixedArray::kLengthOffset)); - __ SmiUntag(eax); - __ add(edx, Immediate(RegExpImpl::kLastMatchOverhead)); - __ cmp(edx, eax); - __ j(greater, &runtime); - // Reset offset for possibly sliced string. __ Set(edi, Immediate(0)); - // ecx: RegExp data (FixedArray) - // Check the representation and encoding of the subject string. - Label seq_ascii_string, seq_two_byte_string, check_code; __ mov(eax, Operand(esp, kSubjectOffset)); + __ JumpIfSmi(eax, &runtime); + __ mov(edx, eax); // Make a copy of the original subject string. __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset)); __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); - // First check for flat two byte string. + + // eax: subject string + // edx: subject string + // ebx: subject string instance type + // ecx: RegExp data (FixedArray) + // Handle subject string according to its encoding and representation: + // (1) Sequential two byte? If yes, go to (9). + // (2) Sequential one byte? If yes, go to (6). + // (3) Anything but sequential or cons? If yes, go to (7). + // (4) Cons string. If the string is flat, replace subject with first string. + // Otherwise bailout. + // (5a) Is subject sequential two byte? If yes, go to (9). + // (5b) Is subject external? If yes, go to (8). + // (6) One byte sequential. Load regexp code for one byte. + // (E) Carry on. + /// [...] + + // Deferred code at the end of the stub: + // (7) Not a long external string? If yes, go to (10). + // (8) External string. Make it, offset-wise, look like a sequential string. + // (8a) Is the external string one byte? If yes, go to (6). + // (9) Two byte sequential. Load regexp code for one byte. Go to (E). + // (10) Short external string or not a string? If yes, bail out to runtime. + // (11) Sliced string. Replace subject with parent. Go to (5a). + + Label seq_one_byte_string /* 6 */, seq_two_byte_string /* 9 */, + external_string /* 8 */, check_underlying /* 5a */, + not_seq_nor_cons /* 7 */, check_code /* E */, + not_long_external /* 10 */; + + // (1) Sequential two byte? If yes, go to (9). __ and_(ebx, kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask | kShortExternalStringMask); STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0); - __ j(zero, &seq_two_byte_string, Label::kNear); - // Any other flat string must be a flat ASCII string. None of the following - // string type tests will succeed if subject is not a string or a short - // external string. + __ j(zero, &seq_two_byte_string); // Go to (9). + + // (2) Sequential one byte? If yes, go to (6). + // Any other sequential string must be one byte. __ and_(ebx, Immediate(kIsNotStringMask | kStringRepresentationMask | kShortExternalStringMask)); - __ j(zero, &seq_ascii_string, Label::kNear); - - // ebx: whether subject is a string and if yes, its string representation - // Check for flat cons string or sliced string. - // A flat cons string is a cons string where the second part is the empty - // string. In that case the subject string is just the first part of the cons - // string. Also in this case the first part of the cons string is known to be - // a sequential string or an external string. - // In the case of a sliced string its offset has to be taken into account. - Label cons_string, external_string, check_encoding; + __ j(zero, &seq_one_byte_string, Label::kNear); // Go to (6). + + // (3) Anything but sequential or cons? If yes, go to (7). + // We check whether the subject string is a cons, since sequential strings + // have already been covered. STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmp(ebx, Immediate(kExternalStringTag)); - __ j(less, &cons_string); - __ j(equal, &external_string); + __ j(greater_equal, ¬_seq_nor_cons); // Go to (7). - // Catch non-string subject or short external string. - STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); - __ test(ebx, Immediate(kIsNotStringMask | kShortExternalStringTag)); - __ j(not_zero, &runtime); - - // String is sliced. - __ mov(edi, FieldOperand(eax, SlicedString::kOffsetOffset)); - __ mov(eax, FieldOperand(eax, SlicedString::kParentOffset)); - // edi: offset of sliced string, smi-tagged. - // eax: parent string. - __ jmp(&check_encoding, Label::kNear); - // String is a cons string, check whether it is flat. - __ bind(&cons_string); + // (4) Cons string. Check that it's flat. + // Replace subject with first string and reload instance type. __ cmp(FieldOperand(eax, ConsString::kSecondOffset), factory->empty_string()); __ j(not_equal, &runtime); __ mov(eax, FieldOperand(eax, ConsString::kFirstOffset)); - __ bind(&check_encoding); + __ bind(&check_underlying); __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset)); - // eax: first part of cons string or parent of sliced string. - // ebx: map of first part of cons string or map of parent of sliced string. - // Is first part of cons or parent of slice a flat two byte string? - __ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset), - kStringRepresentationMask | kStringEncodingMask); + __ mov(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); + + // (5a) Is subject sequential two byte? If yes, go to (9). + __ test_b(ebx, kStringRepresentationMask | kStringEncodingMask); STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0); - __ j(zero, &seq_two_byte_string, Label::kNear); - // Any other flat string must be sequential ASCII or external. - __ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset), - kStringRepresentationMask); - __ j(not_zero, &external_string); - - __ bind(&seq_ascii_string); - // eax: subject string (flat ASCII) + __ j(zero, &seq_two_byte_string); // Go to (9). + // (5b) Is subject external? If yes, go to (8). + __ test_b(ebx, kStringRepresentationMask); + // The underlying external string is never a short external string. + STATIC_CHECK(ExternalString::kMaxShortLength < ConsString::kMinLength); + STATIC_CHECK(ExternalString::kMaxShortLength < SlicedString::kMinLength); + __ j(not_zero, &external_string); // Go to (8). + + // eax: sequential subject string (or look-alike, external string) + // edx: original subject string // ecx: RegExp data (FixedArray) + // (6) One byte sequential. Load regexp code for one byte. + __ bind(&seq_one_byte_string); + // Load previous index and check range before edx is overwritten. We have + // to use edx instead of eax here because it might have been only made to + // look like a sequential string when it actually is an external string. + __ mov(ebx, Operand(esp, kPreviousIndexOffset)); + __ JumpIfNotSmi(ebx, &runtime); + __ cmp(ebx, FieldOperand(edx, String::kLengthOffset)); + __ j(above_equal, &runtime); __ mov(edx, FieldOperand(ecx, JSRegExp::kDataAsciiCodeOffset)); - __ Set(ecx, Immediate(1)); // Type is ASCII. - __ jmp(&check_code, Label::kNear); - - __ bind(&seq_two_byte_string); - // eax: subject string (flat two byte) - // ecx: RegExp data (FixedArray) - __ mov(edx, FieldOperand(ecx, JSRegExp::kDataUC16CodeOffset)); - __ Set(ecx, Immediate(0)); // Type is two byte. + __ Set(ecx, Immediate(1)); // Type is one byte. + // (E) Carry on. String handling is done. __ bind(&check_code); + // edx: irregexp code // Check that the irregexp code has been generated for the actual string // encoding. If it has, the field contains a code object otherwise it contains // a smi (code flushing support). __ JumpIfSmi(edx, &runtime); // eax: subject string + // ebx: previous index (smi) // edx: code // ecx: encoding of subject string (1 if ASCII, 0 if two_byte); - // Load used arguments before starting to push arguments for call to native - // RegExp code to avoid handling changing stack height. - __ mov(ebx, Operand(esp, kPreviousIndexOffset)); - __ SmiUntag(ebx); // Previous index from smi. - - // eax: subject string - // ebx: previous index - // edx: code - // ecx: encoding of subject string (1 if ASCII 0 if two_byte); // All checks done. Now push arguments for native regexp code. Counters* counters = masm->isolate()->counters(); __ IncrementCounter(counters->regexp_entry_native(), 1); @@ -4042,6 +4012,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { masm->isolate()))); // Argument 2: Previous index. + __ SmiUntag(ebx); __ mov(Operand(esp, 1 * kPointerSize), ebx); // Argument 1: Original subject string. @@ -4151,8 +4122,23 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // edx: Number of capture registers // Load last_match_info which is still known to be a fast case JSArray. + // Check that the fourth object is a JSArray object. __ mov(eax, Operand(esp, kLastMatchInfoOffset)); + __ JumpIfSmi(eax, &runtime); + __ CmpObjectType(eax, JS_ARRAY_TYPE, ebx); + __ j(not_equal, &runtime); + // Check that the JSArray is in fast case. __ mov(ebx, FieldOperand(eax, JSArray::kElementsOffset)); + __ mov(eax, FieldOperand(ebx, HeapObject::kMapOffset)); + __ cmp(eax, factory->fixed_array_map()); + __ j(not_equal, &runtime); + // Check that the last match info has space for the capture registers and the + // additional information. + __ mov(eax, FieldOperand(ebx, FixedArray::kLengthOffset)); + __ SmiUntag(eax); + __ sub(eax, Immediate(RegExpImpl::kLastMatchOverhead)); + __ cmp(edx, eax); + __ j(greater, &runtime); // ebx: last_match_info backing store (FixedArray) // edx: number of capture registers @@ -4162,13 +4148,14 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ SmiUntag(edx); // Number of capture registers back from smi. // Store last subject and last input. __ mov(eax, Operand(esp, kSubjectOffset)); + __ mov(ecx, eax); __ mov(FieldOperand(ebx, RegExpImpl::kLastSubjectOffset), eax); __ RecordWriteField(ebx, RegExpImpl::kLastSubjectOffset, eax, edi, kDontSaveFPRegs); - __ mov(eax, Operand(esp, kSubjectOffset)); + __ mov(eax, ecx); __ mov(FieldOperand(ebx, RegExpImpl::kLastInputOffset), eax); __ RecordWriteField(ebx, RegExpImpl::kLastInputOffset, @@ -4206,10 +4193,19 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ mov(eax, Operand(esp, kLastMatchInfoOffset)); __ ret(4 * kPointerSize); - // External string. Short external strings have already been ruled out. - // eax: subject string (expected to be external) - // ebx: scratch + // Do the runtime call to execute the regexp. + __ bind(&runtime); + __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); + + // Deferred code for string handling. + // (7) Not a long external string? If yes, go to (10). + __ bind(¬_seq_nor_cons); + // Compare flags are still set from (3). + __ j(greater, ¬_long_external, Label::kNear); // Go to (10). + + // (8) External string. Short external strings have been ruled out. __ bind(&external_string); + // Reload instance type. __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset)); __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); if (FLAG_debug_code) { @@ -4223,13 +4219,38 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqOneByteString::kHeaderSize); __ sub(eax, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); STATIC_ASSERT(kTwoByteStringTag == 0); + // (8a) Is the external string one byte? If yes, go to (6). __ test_b(ebx, kStringEncodingMask); - __ j(not_zero, &seq_ascii_string); - __ jmp(&seq_two_byte_string); + __ j(not_zero, &seq_one_byte_string); // Goto (6). - // Do the runtime call to execute the regexp. - __ bind(&runtime); - __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); + // eax: sequential subject string (or look-alike, external string) + // edx: original subject string + // ecx: RegExp data (FixedArray) + // (9) Two byte sequential. Load regexp code for one byte. Go to (E). + __ bind(&seq_two_byte_string); + // Load previous index and check range before edx is overwritten. We have + // to use edx instead of eax here because it might have been only made to + // look like a sequential string when it actually is an external string. + __ mov(ebx, Operand(esp, kPreviousIndexOffset)); + __ JumpIfNotSmi(ebx, &runtime); + __ cmp(ebx, FieldOperand(edx, String::kLengthOffset)); + __ j(above_equal, &runtime); + __ mov(edx, FieldOperand(ecx, JSRegExp::kDataUC16CodeOffset)); + __ Set(ecx, Immediate(0)); // Type is two byte. + __ jmp(&check_code); // Go to (E). + + // (10) Not a string or a short external string? If yes, bail out to runtime. + __ bind(¬_long_external); + // Catch non-string subject or short external string. + STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); + __ test(ebx, Immediate(kIsNotStringMask | kShortExternalStringTag)); + __ j(not_zero, &runtime); + + // (11) Sliced string. Replace subject with parent. Go to (5a). + // Load offset into edi and replace subject string with parent. + __ mov(edi, FieldOperand(eax, SlicedString::kOffsetOffset)); + __ mov(eax, FieldOperand(eax, SlicedString::kParentOffset)); + __ jmp(&check_underlying); // Go to (5a). #endif // V8_INTERPRETED_REGEXP } diff --git a/src/jsregexp.cc b/src/jsregexp.cc index 83722c3..61ab2db 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -687,6 +687,7 @@ Handle RegExpImpl::SetLastMatchInfo(Handle last_match_info, Handle subject, int capture_count, int32_t* match) { + CHECK(last_match_info->HasFastObjectElements()); int capture_register_count = (capture_count + 1) * 2; last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead); AssertNoAllocation no_gc; diff --git a/src/objects.h b/src/objects.h index e15eee3..f2407f8 100644 --- a/src/objects.h +++ b/src/objects.h @@ -7784,6 +7784,9 @@ class ExternalString: public String { static const int kResourceDataOffset = kResourceOffset + kPointerSize; static const int kSize = kResourceDataOffset + kPointerSize; + static const int kMaxShortLength = + (kShortSize - SeqString::kHeaderSize) / kCharSize; + // Return whether external string is short (data pointer is not cached). inline bool is_short(); diff --git a/src/runtime.cc b/src/runtime.cc index 0406b52..95872a1 100644 --- a/src/runtime.cc +++ b/src/runtime.cc @@ -1725,7 +1725,6 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExec) { // length of a string, i.e. it is always a Smi. We check anyway for security. CONVERT_SMI_ARG_CHECKED(index, 2); CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 3); - RUNTIME_ASSERT(last_match_info->HasFastObjectElements()); RUNTIME_ASSERT(index >= 0); RUNTIME_ASSERT(index <= subject->length()); isolate->counters()->regexp_entry_runtime()->Increment(); @@ -3225,8 +3224,6 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_StringReplaceRegExpWithString) { if (!replacement->IsFlat()) replacement = FlattenGetString(replacement); - ASSERT(last_match_info->HasFastObjectElements()); - if (replacement->length() == 0) { if (subject->IsOneByteConvertible()) { return StringReplaceRegExpWithEmptyString( @@ -3775,7 +3772,6 @@ RUNTIME_FUNCTION(MaybeObject*, Runtime_RegExpExecMultiple) { CONVERT_ARG_HANDLE_CHECKED(JSArray, last_match_info, 2); CONVERT_ARG_HANDLE_CHECKED(JSArray, result_array, 3); - ASSERT(last_match_info->HasFastObjectElements()); ASSERT(regexp->GetFlags().is_global()); if (regexp->CaptureCount() == 0) { diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc index 21fa4f8..cd4fa07 100644 --- a/src/x64/code-stubs-x64.cc +++ b/src/x64/code-stubs-x64.cc @@ -2955,6 +2955,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ JumpIfSmi(rax, &runtime); __ CmpObjectType(rax, JS_REGEXP_TYPE, kScratchRegister); __ j(not_equal, &runtime); + // Check that the RegExp has been compiled (data contains a fixed array). __ movq(rax, FieldOperand(rax, JSRegExp::kDataOffset)); if (FLAG_debug_code) { @@ -2975,149 +2976,121 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // Check that the number of captures fit in the static offsets vector buffer. __ SmiToInteger32(rdx, FieldOperand(rax, JSRegExp::kIrregexpCaptureCountOffset)); - // Calculate number of capture registers (number_of_captures + 1) * 2. - __ leal(rdx, Operand(rdx, rdx, times_1, 2)); - // Check that the static offsets vector buffer is large enough. - __ cmpl(rdx, Immediate(Isolate::kJSRegexpStaticOffsetsVectorSize)); + // Check (number_of_captures + 1) * 2 <= offsets vector size + // Or number_of_captures <= offsets vector size / 2 - 1 + STATIC_ASSERT(Isolate::kJSRegexpStaticOffsetsVectorSize >= 2); + __ cmpl(rdx, Immediate(Isolate::kJSRegexpStaticOffsetsVectorSize / 2 - 1)); __ j(above, &runtime); - // rax: RegExp data (FixedArray) - // rdx: Number of capture registers - // Check that the second argument is a string. - __ movq(rdi, Operand(rsp, kSubjectOffset)); - __ JumpIfSmi(rdi, &runtime); - Condition is_string = masm->IsObjectStringType(rdi, rbx, rbx); - __ j(NegateCondition(is_string), &runtime); - - // rdi: Subject string. - // rax: RegExp data (FixedArray). - // rdx: Number of capture registers. - // Check that the third argument is a positive smi less than the string - // length. A negative value will be greater (unsigned comparison). - __ movq(rbx, Operand(rsp, kPreviousIndexOffset)); - __ JumpIfNotSmi(rbx, &runtime); - __ SmiCompare(rbx, FieldOperand(rdi, String::kLengthOffset)); - __ j(above_equal, &runtime); - - // rax: RegExp data (FixedArray) - // rdx: Number of capture registers - // Check that the fourth object is a JSArray object. - __ movq(rdi, Operand(rsp, kLastMatchInfoOffset)); - __ JumpIfSmi(rdi, &runtime); - __ CmpObjectType(rdi, JS_ARRAY_TYPE, kScratchRegister); - __ j(not_equal, &runtime); - // Check that the JSArray is in fast case. - __ movq(rbx, FieldOperand(rdi, JSArray::kElementsOffset)); - __ movq(rdi, FieldOperand(rbx, HeapObject::kMapOffset)); - __ CompareRoot(FieldOperand(rbx, HeapObject::kMapOffset), - Heap::kFixedArrayMapRootIndex); - __ j(not_equal, &runtime); - // Check that the last match info has space for the capture registers and the - // additional information. Ensure no overflow in add. - STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset); - __ SmiToInteger32(rdi, FieldOperand(rbx, FixedArray::kLengthOffset)); - __ addl(rdx, Immediate(RegExpImpl::kLastMatchOverhead)); - __ cmpl(rdx, rdi); - __ j(greater, &runtime); - // Reset offset for possibly sliced string. __ Set(r14, 0); - // rax: RegExp data (FixedArray) - // Check the representation and encoding of the subject string. - Label seq_ascii_string, seq_two_byte_string, check_code; __ movq(rdi, Operand(rsp, kSubjectOffset)); - // Make a copy of the original subject string. - __ movq(r15, rdi); + __ JumpIfSmi(rdi, &runtime); + __ movq(r15, rdi); // Make a copy of the original subject string. __ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset)); __ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset)); - // First check for flat two byte string. + // rax: RegExp data (FixedArray) + // rdi: subject string + // r15: subject string + // Handle subject string according to its encoding and representation: + // (1) Sequential two byte? If yes, go to (9). + // (2) Sequential one byte? If yes, go to (6). + // (3) Anything but sequential or cons? If yes, go to (7). + // (4) Cons string. If the string is flat, replace subject with first string. + // Otherwise bailout. + // (5a) Is subject sequential two byte? If yes, go to (9). + // (5b) Is subject external? If yes, go to (8). + // (6) One byte sequential. Load regexp code for one byte. + // (E) Carry on. + /// [...] + + // Deferred code at the end of the stub: + // (7) Not a long external string? If yes, go to (10). + // (8) External string. Make it, offset-wise, look like a sequential string. + // (8a) Is the external string one byte? If yes, go to (6). + // (9) Two byte sequential. Load regexp code for one byte. Go to (E). + // (10) Short external string or not a string? If yes, bail out to runtime. + // (11) Sliced string. Replace subject with parent. Go to (5a). + + Label seq_one_byte_string /* 6 */, seq_two_byte_string /* 9 */, + external_string /* 8 */, check_underlying /* 5a */, + not_seq_nor_cons /* 7 */, check_code /* E */, + not_long_external /* 10 */; + + // (1) Sequential two byte? If yes, go to (9). __ andb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask | kShortExternalStringMask)); STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0); - __ j(zero, &seq_two_byte_string, Label::kNear); - // Any other flat string must be a flat ASCII string. None of the following - // string type tests will succeed if subject is not a string or a short - // external string. + __ j(zero, &seq_two_byte_string); // Go to (9). + + // (2) Sequential one byte? If yes, go to (6). + // Any other sequential string must be one byte. __ andb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask | kShortExternalStringMask)); - __ j(zero, &seq_ascii_string, Label::kNear); - - // rbx: whether subject is a string and if yes, its string representation - // Check for flat cons string or sliced string. - // A flat cons string is a cons string where the second part is the empty - // string. In that case the subject string is just the first part of the cons - // string. Also in this case the first part of the cons string is known to be - // a sequential string or an external string. - // In the case of a sliced string its offset has to be taken into account. - Label cons_string, external_string, check_encoding; + __ j(zero, &seq_one_byte_string, Label::kNear); // Go to (6). + + // (3) Anything but sequential or cons? If yes, go to (7). + // We check whether the subject string is a cons, since sequential strings + // have already been covered. STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmpq(rbx, Immediate(kExternalStringTag)); - __ j(less, &cons_string, Label::kNear); - __ j(equal, &external_string); - - // Catch non-string subject or short external string. - STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); - __ testb(rbx, Immediate(kIsNotStringMask | kShortExternalStringMask)); - __ j(not_zero, &runtime); + __ j(greater_equal, ¬_seq_nor_cons); // Go to (7). - // String is sliced. - __ SmiToInteger32(r14, FieldOperand(rdi, SlicedString::kOffsetOffset)); - __ movq(rdi, FieldOperand(rdi, SlicedString::kParentOffset)); - // r14: slice offset - // r15: original subject string - // rdi: parent string - __ jmp(&check_encoding, Label::kNear); - // String is a cons string, check whether it is flat. - __ bind(&cons_string); + // (4) Cons string. Check that it's flat. + // Replace subject with first string and reload instance type. __ CompareRoot(FieldOperand(rdi, ConsString::kSecondOffset), Heap::kEmptyStringRootIndex); __ j(not_equal, &runtime); __ movq(rdi, FieldOperand(rdi, ConsString::kFirstOffset)); - // rdi: first part of cons string or parent of sliced string. - // rbx: map of first part of cons string or map of parent of sliced string. - // Is first part of cons or parent of slice a flat two byte string? - __ bind(&check_encoding); + __ bind(&check_underlying); __ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset)); - __ testb(FieldOperand(rbx, Map::kInstanceTypeOffset), - Immediate(kStringRepresentationMask | kStringEncodingMask)); + __ movq(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset)); + + // (5a) Is subject sequential two byte? If yes, go to (9). + __ testb(rbx, Immediate(kStringRepresentationMask | kStringEncodingMask)); STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0); - __ j(zero, &seq_two_byte_string, Label::kNear); - // Any other flat string must be sequential ASCII or external. - __ testb(FieldOperand(rbx, Map::kInstanceTypeOffset), - Immediate(kStringRepresentationMask)); - __ j(not_zero, &external_string); - - __ bind(&seq_ascii_string); - // rdi: subject string (sequential ASCII) + __ j(zero, &seq_two_byte_string); // Go to (9). + // (5b) Is subject external? If yes, go to (8). + __ testb(rbx, Immediate(kStringRepresentationMask)); + // The underlying external string is never a short external string. + STATIC_CHECK(ExternalString::kMaxShortLength < ConsString::kMinLength); + STATIC_CHECK(ExternalString::kMaxShortLength < SlicedString::kMinLength); + __ j(not_zero, &external_string); // Go to (8) + + // (6) One byte sequential. Load regexp code for one byte. + __ bind(&seq_one_byte_string); // rax: RegExp data (FixedArray) __ movq(r11, FieldOperand(rax, JSRegExp::kDataAsciiCodeOffset)); - __ Set(rcx, 1); // Type is ASCII. - __ jmp(&check_code, Label::kNear); - - __ bind(&seq_two_byte_string); - // rdi: subject string (flat two-byte) - // rax: RegExp data (FixedArray) - __ movq(r11, FieldOperand(rax, JSRegExp::kDataUC16CodeOffset)); - __ Set(rcx, 0); // Type is two byte. + __ Set(rcx, 1); // Type is one byte. + // (E) Carry on. String handling is done. __ bind(&check_code); + // r11: irregexp code // Check that the irregexp code has been generated for the actual string // encoding. If it has, the field contains a code object otherwise it contains // smi (code flushing support) __ JumpIfSmi(r11, &runtime); - // rdi: subject string + // rdi: sequential subject string (or look-alike, external string) + // r15: original subject string // rcx: encoding of subject string (1 if ASCII, 0 if two_byte); // r11: code // Load used arguments before starting to push arguments for call to native // RegExp code to avoid handling changing stack height. - __ SmiToInteger64(rbx, Operand(rsp, kPreviousIndexOffset)); + // We have to use r15 instead of rdi to load the length because rdi might + // have been only made to look like a sequential string when it actually + // is an external string. + __ movq(rbx, Operand(rsp, kPreviousIndexOffset)); + __ JumpIfNotSmi(rbx, &runtime); + __ SmiCompare(rbx, FieldOperand(r15, String::kLengthOffset)); + __ j(above_equal, &runtime); + __ SmiToInteger64(rbx, rbx); // rdi: subject string // rbx: previous index @@ -3256,9 +3229,23 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ leal(rdx, Operand(rax, rax, times_1, 2)); // rdx: Number of capture registers - // Load last_match_info which is still known to be a fast case JSArray. - __ movq(rax, Operand(rsp, kLastMatchInfoOffset)); - __ movq(rbx, FieldOperand(rax, JSArray::kElementsOffset)); + // Check that the fourth object is a JSArray object. + __ movq(r15, Operand(rsp, kLastMatchInfoOffset)); + __ JumpIfSmi(r15, &runtime); + __ CmpObjectType(r15, JS_ARRAY_TYPE, kScratchRegister); + __ j(not_equal, &runtime); + // Check that the JSArray is in fast case. + __ movq(rbx, FieldOperand(r15, JSArray::kElementsOffset)); + __ movq(rax, FieldOperand(rbx, HeapObject::kMapOffset)); + __ CompareRoot(rax, Heap::kFixedArrayMapRootIndex); + __ j(not_equal, &runtime); + // Check that the last match info has space for the capture registers and the + // additional information. Ensure no overflow in add. + STATIC_ASSERT(FixedArray::kMaxLength < kMaxInt - FixedArray::kLengthOffset); + __ SmiToInteger32(rax, FieldOperand(rbx, FixedArray::kLengthOffset)); + __ subl(rax, Immediate(RegExpImpl::kLastMatchOverhead)); + __ cmpl(rdx, rax); + __ j(greater, &runtime); // rbx: last_match_info backing store (FixedArray) // rdx: number of capture registers @@ -3269,12 +3256,13 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // Store last subject and last input. __ movq(rax, Operand(rsp, kSubjectOffset)); __ movq(FieldOperand(rbx, RegExpImpl::kLastSubjectOffset), rax); + __ movq(rcx, rax); __ RecordWriteField(rbx, RegExpImpl::kLastSubjectOffset, rax, rdi, kDontSaveFPRegs); - __ movq(rax, Operand(rsp, kSubjectOffset)); + __ movq(rax, rcx); __ movq(FieldOperand(rbx, RegExpImpl::kLastInputOffset), rax); __ RecordWriteField(rbx, RegExpImpl::kLastInputOffset, @@ -3308,7 +3296,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ bind(&done); // Return last match info. - __ movq(rax, Operand(rsp, kLastMatchInfoOffset)); + __ movq(rax, r15); __ ret(4 * kPointerSize); __ bind(&exception); @@ -3334,9 +3322,17 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ bind(&termination_exception); __ ThrowUncatchable(rax); - // External string. Short external strings have already been ruled out. - // rdi: subject string (expected to be external) - // rbx: scratch + // Do the runtime call to execute the regexp. + __ bind(&runtime); + __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); + + // Deferred code for string handling. + // (7) Not a long external string? If yes, go to (10). + __ bind(¬_seq_nor_cons); + // Compare flags are still set from (3). + __ j(greater, ¬_long_external, Label::kNear); // Go to (10). + + // (8) External string. Short external strings have been ruled out. __ bind(&external_string); __ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset)); __ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset)); @@ -3351,13 +3347,30 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqOneByteString::kHeaderSize); __ subq(rdi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); STATIC_ASSERT(kTwoByteStringTag == 0); + // (8a) Is the external string one byte? If yes, go to (6). __ testb(rbx, Immediate(kStringEncodingMask)); - __ j(not_zero, &seq_ascii_string); - __ jmp(&seq_two_byte_string); + __ j(not_zero, &seq_one_byte_string); // Goto (6). - // Do the runtime call to execute the regexp. - __ bind(&runtime); - __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); + // rdi: subject string (flat two-byte) + // rax: RegExp data (FixedArray) + // (9) Two byte sequential. Load regexp code for one byte. Go to (E). + __ bind(&seq_two_byte_string); + __ movq(r11, FieldOperand(rax, JSRegExp::kDataUC16CodeOffset)); + __ Set(rcx, 0); // Type is two byte. + __ jmp(&check_code); // Go to (E). + + // (10) Not a string or a short external string? If yes, bail out to runtime. + __ bind(¬_long_external); + // Catch non-string subject or short external string. + STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); + __ testb(rbx, Immediate(kIsNotStringMask | kShortExternalStringMask)); + __ j(not_zero, &runtime); + + // (11) Sliced string. Replace subject with parent. Go to (5a). + // Load offset into r14 and replace subject string with parent. + __ SmiToInteger32(r14, FieldOperand(rdi, SlicedString::kOffsetOffset)); + __ movq(rdi, FieldOperand(rdi, SlicedString::kParentOffset)); + __ jmp(&check_underlying); #endif // V8_INTERPRETED_REGEXP }