From ce86c1bfb1a1bf96b42c40c3c77de552412b2e9e Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Fri, 9 Dec 2011 10:04:58 +0000 Subject: [PATCH] Avoid bailing out to runtime for short substrings. This significantly improves the speed for creating short substrings (less than 13 characters) from slices, flat cons strings and external strings. TEST=string-external-cached.js, string-slices.js Review URL: http://codereview.chromium.org/8889012 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10221 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/ia32/code-stubs-ia32.cc | 189 +++++++++++++++++++-------------- test/mjsunit/string-external-cached.js | 6 +- test/mjsunit/string-slices.js | 17 +++ 3 files changed, 129 insertions(+), 83 deletions(-) diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index 32ed1f9..eabf201 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -6093,20 +6093,23 @@ void SubStringStub::Generate(MacroAssembler* masm) { __ JumpIfNotSmi(edx, &runtime); __ sub(ecx, edx); __ cmp(ecx, FieldOperand(eax, String::kLengthOffset)); - Label return_eax; - __ j(equal, &return_eax); + Label not_original_string; + __ j(not_equal, ¬_original_string, Label::kNear); + Counters* counters = masm->isolate()->counters(); + __ IncrementCounter(counters->sub_string_native(), 1); + __ ret(3 * kPointerSize); + __ bind(¬_original_string); // Special handling of sub-strings of length 1 and 2. One character strings // are handled in the runtime system (looked up in the single character // cache). Two character strings are looked for in the symbol cache. - __ SmiUntag(ecx); // Result length is no longer smi. - __ cmp(ecx, 2); + __ cmp(ecx, Immediate(Smi::FromInt(2))); __ j(greater, &result_longer_than_two); __ j(less, &runtime); // Sub string of length 2 requested. // eax: string // ebx: instance type - // ecx: sub string length (value is 2) + // ecx: sub string length (smi, value is 2) // edx: from index (smi) __ JumpIfInstanceTypeIsNotSequentialAscii(ebx, ebx, &runtime); @@ -6121,6 +6124,7 @@ void SubStringStub::Generate(MacroAssembler* masm) { StringHelper::GenerateTwoCharacterSymbolTableProbe( masm, ebx, ecx, eax, edx, edi, &make_two_character_string, &make_two_character_string); + __ IncrementCounter(counters->sub_string_native(), 1); __ ret(3 * kPointerSize); __ bind(&make_two_character_string); @@ -6128,55 +6132,61 @@ void SubStringStub::Generate(MacroAssembler* masm) { __ mov(eax, Operand(esp, 3 * kPointerSize)); __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset)); __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); - __ Set(ecx, Immediate(2)); + __ Set(ecx, Immediate(Smi::FromInt(2))); + __ mov(edx, Operand(esp, 2 * kPointerSize)); // Load index. + + __ bind(&result_longer_than_two); + // eax: string + // ebx: instance type + // ecx: sub string length (smi) + // edx: from index (smi) + // Deal with different string types: update the index if necessary + // and put the underlying string into edi. + Label underlying_unpacked, sliced_string, seq_or_external_string; + // If the string is not indirect, it can only be sequential or external. + STATIC_ASSERT(kIsIndirectStringMask == (kSlicedStringTag & kConsStringTag)); + STATIC_ASSERT(kIsIndirectStringMask != 0); + __ test(ebx, Immediate(kIsIndirectStringMask)); + __ j(zero, &seq_or_external_string, Label::kNear); + + Factory* factory = masm->isolate()->factory(); + __ test(ebx, Immediate(kSlicedNotConsMask)); + __ j(not_zero, &sliced_string, Label::kNear); + // Cons string. Check whether it is flat, then fetch first part. + // Flat cons strings have an empty second part. + __ cmp(FieldOperand(eax, ConsString::kSecondOffset), + factory->empty_string()); + __ j(not_equal, &runtime); + __ mov(edi, FieldOperand(eax, ConsString::kFirstOffset)); + // Update instance type. + __ mov(ebx, FieldOperand(edi, HeapObject::kMapOffset)); + __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); + __ jmp(&underlying_unpacked, Label::kNear); + + __ bind(&sliced_string); + // Sliced string. Fetch parent and adjust start index by offset. + __ add(edx, FieldOperand(eax, SlicedString::kOffsetOffset)); + __ mov(edi, FieldOperand(eax, SlicedString::kParentOffset)); + // Update instance type. + __ mov(ebx, FieldOperand(edi, HeapObject::kMapOffset)); + __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); + __ jmp(&underlying_unpacked, Label::kNear); + + __ bind(&seq_or_external_string); + // Sequential or external string. Just move string to the expected register. + __ mov(edi, eax); + + __ bind(&underlying_unpacked); if (FLAG_string_slices) { Label copy_routine; - // If coming from the make_two_character_string path, the string - // is too short to be sliced anyways. - STATIC_ASSERT(2 < SlicedString::kMinLength); - __ jmp(©_routine); - __ bind(&result_longer_than_two); - - // eax: string - // ebx: instance type - // ecx: sub string length - // edx: from index (smi) - Label allocate_slice, sliced_string, seq_or_external_string; - __ cmp(ecx, SlicedString::kMinLength); - // Short slice. Copy instead of slicing. - __ j(less, ©_routine); - // If the string is not indirect, it can only be sequential or external. - STATIC_ASSERT(kIsIndirectStringMask == (kSlicedStringTag & kConsStringTag)); - STATIC_ASSERT(kIsIndirectStringMask != 0); - __ test(ebx, Immediate(kIsIndirectStringMask)); - __ j(zero, &seq_or_external_string, Label::kNear); - - Factory* factory = masm->isolate()->factory(); - __ test(ebx, Immediate(kSlicedNotConsMask)); - __ j(not_zero, &sliced_string, Label::kNear); - // Cons string. Check whether it is flat, then fetch first part. - __ cmp(FieldOperand(eax, ConsString::kSecondOffset), - factory->empty_string()); - __ j(not_equal, &runtime); - __ mov(edi, FieldOperand(eax, ConsString::kFirstOffset)); - __ jmp(&allocate_slice, Label::kNear); - - __ bind(&sliced_string); - // Sliced string. Fetch parent and correct start index by offset. - __ add(edx, FieldOperand(eax, SlicedString::kOffsetOffset)); - __ mov(edi, FieldOperand(eax, SlicedString::kParentOffset)); - __ jmp(&allocate_slice, Label::kNear); - - __ bind(&seq_or_external_string); - // Sequential or external string. Just move string to the correct register. - __ mov(edi, eax); - - __ bind(&allocate_slice); // edi: underlying subject string // ebx: instance type of original subject string - // edx: offset - // ecx: length + // edx: adjusted start index (smi) + // ecx: length (smi) + __ cmp(ecx, Immediate(Smi::FromInt(SlicedString::kMinLength))); + // Short slice. Copy instead of slicing. + __ j(less, ©_routine); // Allocate new sliced string. At this point we do not reload the instance // type including the string encoding because we simply rely on the info // provided by the original string. It does not matter if the original @@ -6193,27 +6203,50 @@ void SubStringStub::Generate(MacroAssembler* masm) { __ AllocateTwoByteSlicedString(eax, ebx, no_reg, &runtime); __ bind(&set_slice_header); __ mov(FieldOperand(eax, SlicedString::kOffsetOffset), edx); - __ SmiTag(ecx); __ mov(FieldOperand(eax, SlicedString::kLengthOffset), ecx); __ mov(FieldOperand(eax, SlicedString::kParentOffset), edi); __ mov(FieldOperand(eax, SlicedString::kHashFieldOffset), Immediate(String::kEmptyHashField)); - __ jmp(&return_eax); + __ IncrementCounter(counters->sub_string_native(), 1); + __ ret(3 * kPointerSize); __ bind(©_routine); - } else { - __ bind(&result_longer_than_two); } - // eax: string - // ebx: instance type - // ecx: result string length - // Check for flat ascii string - Label non_ascii_flat; - __ JumpIfInstanceTypeIsNotSequentialAscii(ebx, ebx, &non_ascii_flat); + // edi: underlying subject string + // ebx: instance type of original subject string + // edx: adjusted start index (smi) + // ecx: length (smi) + // The subject string can only be external or sequential string of either + // encoding at this point. + Label two_byte_sequential, runtime_drop_two, sequential_string; + STATIC_ASSERT(kExternalStringTag != 0); + STATIC_ASSERT(kSeqStringTag == 0); + __ test_b(ebx, kExternalStringTag); + __ j(zero, &sequential_string); + + // Handle external string. + Label ascii_external, done; + // Rule out short external strings. + STATIC_CHECK(kShortExternalStringTag != 0); + __ test_b(ebx, kShortExternalStringMask); + __ j(not_zero, &runtime); + __ mov(edi, FieldOperand(edi, ExternalString::kResourceDataOffset)); + // Move the pointer so that offset-wise, it looks like a sequential string. + STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize); + __ sub(edi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); + + __ bind(&sequential_string); + // Stash away (adjusted) index and (underlying) string. + __ push(edx); + __ push(edi); + __ SmiUntag(ecx); + STATIC_ASSERT((kAsciiStringTag & kStringEncodingMask) != 0); + __ test_b(ebx, kStringEncodingMask); + __ j(zero, &two_byte_sequential); - // Allocate the result. - __ AllocateAsciiString(eax, ecx, ebx, edx, edi, &runtime); + // Sequential ascii string. Allocate the result. + __ AllocateAsciiString(eax, ecx, ebx, edx, edi, &runtime_drop_two); // eax: result string // ecx: result string length @@ -6222,11 +6255,10 @@ void SubStringStub::Generate(MacroAssembler* masm) { __ mov(edi, eax); __ add(edi, Immediate(SeqAsciiString::kHeaderSize - kHeapObjectTag)); // Load string argument and locate character of sub string start. - __ mov(esi, Operand(esp, 3 * kPointerSize)); - __ add(esi, Immediate(SeqAsciiString::kHeaderSize - kHeapObjectTag)); - __ mov(ebx, Operand(esp, 2 * kPointerSize)); // from + __ pop(esi); + __ pop(ebx); __ SmiUntag(ebx); - __ add(esi, ebx); + __ lea(esi, FieldOperand(esi, ebx, times_1, SeqAsciiString::kHeaderSize)); // eax: result string // ecx: result length @@ -6235,20 +6267,12 @@ void SubStringStub::Generate(MacroAssembler* masm) { // esi: character of sub string start StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, true); __ mov(esi, edx); // Restore esi. - Counters* counters = masm->isolate()->counters(); __ IncrementCounter(counters->sub_string_native(), 1); __ ret(3 * kPointerSize); - __ bind(&non_ascii_flat); - // eax: string - // ebx: instance type & kStringRepresentationMask | kStringEncodingMask - // ecx: result string length - // Check for flat two byte string - __ cmp(ebx, kSeqStringTag | kTwoByteStringTag); - __ j(not_equal, &runtime); - - // Allocate the result. - __ AllocateTwoByteString(eax, ecx, ebx, edx, edi, &runtime); + __ bind(&two_byte_sequential); + // Sequential two-byte string. Allocate the result. + __ AllocateTwoByteString(eax, ecx, ebx, edx, edi, &runtime_drop_two); // eax: result string // ecx: result string length @@ -6258,14 +6282,13 @@ void SubStringStub::Generate(MacroAssembler* masm) { __ add(edi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); // Load string argument and locate character of sub string start. - __ mov(esi, Operand(esp, 3 * kPointerSize)); - __ add(esi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); - __ mov(ebx, Operand(esp, 2 * kPointerSize)); // from + __ pop(esi); + __ pop(ebx); // As from is a smi it is 2 times the value which matches the size of a two // byte character. STATIC_ASSERT(kSmiTag == 0); STATIC_ASSERT(kSmiTagSize + kSmiShiftSize == 1); - __ add(esi, ebx); + __ lea(esi, FieldOperand(esi, ebx, times_1, SeqTwoByteString::kHeaderSize)); // eax: result string // ecx: result length @@ -6274,11 +6297,13 @@ void SubStringStub::Generate(MacroAssembler* masm) { // esi: character of sub string start StringHelper::GenerateCopyCharactersREP(masm, edi, esi, ecx, ebx, false); __ mov(esi, edx); // Restore esi. - - __ bind(&return_eax); __ IncrementCounter(counters->sub_string_native(), 1); __ ret(3 * kPointerSize); + // Drop pushed values on the stack before tail call. + __ bind(&runtime_drop_two); + __ Drop(2); + // Just jump to runtime to create the sub string. __ bind(&runtime); __ TailCallRuntime(Runtime::kSubString, 3, 1); diff --git a/test/mjsunit/string-external-cached.js b/test/mjsunit/string-external-cached.js index 12312ac..0a95830 100644 --- a/test/mjsunit/string-external-cached.js +++ b/test/mjsunit/string-external-cached.js @@ -59,7 +59,7 @@ function test() { } catch (ex) { } assertEquals("1", charat_short.charAt(1)); - // Test regexp. + // Test regexp and short substring. var re = /(A|B)/; var rere = /(T.{1,2}B)/; var ascii = "ABCDEFGHIJKLMNOPQRST"; @@ -81,6 +81,10 @@ function test() { assertEquals(["A", "A"], re.exec(twobyte)); assertEquals(["B", "B"], re.exec(twobyte_slice)); assertEquals(["T_AB", "T_AB"], rere.exec(twobyte_cons)); + assertEquals("DEFG", ascii_slice.substr(2, 4)); + assertEquals("DEFG", twobyte_slice.substr(2, 4)); + assertEquals("DEFG", ascii_cons.substr(3, 4)); + assertEquals("DEFG", twobyte_cons.substr(4, 4)); } } diff --git a/test/mjsunit/string-slices.js b/test/mjsunit/string-slices.js index 3eb30f1..5b1dc36 100755 --- a/test/mjsunit/string-slices.js +++ b/test/mjsunit/string-slices.js @@ -160,6 +160,23 @@ for ( var i = 0; i < 1000; i++) { f(flat, cons, slice, i); } +// Short substrings. +flat = "abcdefghijkl12345"; +cons = flat + flat.toUpperCase(); +/x/.exec(cons); // Flatten cons +slice = "abcdefghijklmn12345".slice(1, -1); +assertEquals("cdefg", flat.substr(2, 5)); +assertEquals("cdefg", cons.substr(2, 5)); +assertEquals("cdefg", slice.substr(1, 5)); + +flat = "abc\u1234defghijkl12345"; +cons = flat + flat.toUpperCase(); +/x/.exec(cons); // Flatten cons +slice = "abc\u1234defghijklmn12345".slice(1, -1); +assertEquals("c\u1234def", flat.substr(2, 5)); +assertEquals("c\u1234def", cons.substr(2, 5)); +assertEquals("c\u1234def", slice.substr(1, 5)); + // Concatenate substrings. var ascii = 'abcdefghijklmnop'; var utf = '\u03B1\u03B2\u03B3\u03B4\u03B5\u03B6\u03B7\u03B8\u03B9\u03BA\u03BB'; -- 2.7.4