From 3077e8aa2f8309969616e0b0adb2d4e8bd50cefb Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Tue, 30 Aug 2011 08:22:41 +0000 Subject: [PATCH] Generated code for substring slices in ia32. Review URL: http://codereview.chromium.org/7744052 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@9064 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/lithium-codegen-arm.cc | 2 - src/ia32/code-stubs-ia32.cc | 81 ++++++++++++++++++++++++++++++++++++++-- src/ia32/lithium-codegen-ia32.cc | 2 - src/ia32/macro-assembler-ia32.cc | 36 ++++++++++++++++++ src/ia32/macro-assembler-ia32.h | 11 ++++++ src/objects.h | 5 +++ src/x64/lithium-codegen-x64.cc | 2 - test/mjsunit/string-slices.js | 7 ++-- 8 files changed, 133 insertions(+), 13 deletions(-) diff --git a/src/arm/lithium-codegen-arm.cc b/src/arm/lithium-codegen-arm.cc index ffe4b78..976576b 100644 --- a/src/arm/lithium-codegen-arm.cc +++ b/src/arm/lithium-codegen-arm.cc @@ -3473,8 +3473,6 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) { // Dispatch on the indirect string shape: slice or cons. Label cons_string; - const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag; - ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0); __ tst(result, Operand(kSlicedNotConsMask)); __ b(eq, &cons_string); diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index 4f205eb..d648f54 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -5642,9 +5642,6 @@ void StringHelper::GenerateHashGetHash(MacroAssembler* masm, void SubStringStub::Generate(MacroAssembler* masm) { Label runtime; - if (FLAG_string_slices) { - __ jmp(&runtime); - } // Stack frame on entry. // esp[0]: return address // esp[4]: to @@ -5706,7 +5703,83 @@ void SubStringStub::Generate(MacroAssembler* masm) { __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); __ Set(ecx, Immediate(2)); - __ bind(&result_longer_than_two); + if (FLAG_string_slices) { + Label copy_rountine; + // If coming from the make_two_character_string path, the string + // is too short to be sliced anyways. + STATIC_ASSERT(2 < SlicedString::kMinLength); + __ jmp(©_routine); + __ bind(&result_longer_than_two); + + // eax: string + // ebx: instance type + // ecx: sub string length + // edx: from index (smi) + Label allocate_slice, sliced_string, seq_string; + __ cmp(ecx, SlicedString::kMinLength); + // Short slice. Copy instead of slicing. + __ j(less, ©_routine); + STATIC_ASSERT(kSeqStringTag == 0); + __ test(ebx, Immediate(kStringRepresentationMask)); + __ j(zero, &seq_string, Label::kNear); + STATIC_ASSERT(kIsIndirectStringMask == (kSlicedStringTag & kConsStringTag)); + STATIC_ASSERT(kIsIndirectStringMask != 0); + __ test(ebx, Immediate(kIsIndirectStringMask)); + // External string. Jump to runtime. + __ j(zero, &runtime); + + Factory* factory = masm->isolate()->factory(); + __ test(ebx, Immediate(kSlicedNotConsMask)); + __ j(not_zero, &sliced_string, Label::kNear); + // Cons string. Check whether it is flat, then fetch first part. + __ cmp(FieldOperand(eax, ConsString::kSecondOffset), + factory->empty_string()); + __ j(not_equal, &runtime); + __ mov(edi, FieldOperand(eax, ConsString::kFirstOffset)); + __ jmp(&allocate_slice, Label::kNear); + + __ bind(&sliced_string); + // Sliced string. Fetch parent and correct start index by offset. + __ add(edx, FieldOperand(eax, SlicedString::kOffsetOffset)); + __ mov(edi, FieldOperand(eax, SlicedString::kParentOffset)); + __ jmp(&allocate_slice, Label::kNear); + + __ bind(&seq_string); + // Sequential string. Just move string to the right register. + __ mov(edi, eax); + + __ bind(&allocate_slice); + // edi: underlying subject string + // ebx: instance type of original subject string + // edx: offset + // ecx: length + // Allocate new sliced string. At this point we do not reload the instance + // type including the string encoding because we simply rely on the info + // provided by the original string. It does not matter if the original + // string's encoding is wrong because we always have to recheck encoding of + // the newly created string's parent anyways due to externalized strings. + Label two_byte_slice, set_slice_header; + STATIC_ASSERT(kAsciiStringTag != 0); + __ test(ebx, Immediate(kAsciiStringTag)); + __ j(zero, &two_byte_slice, Label::kNear); + __ AllocateAsciiSlicedString(eax, ebx, no_reg, &runtime); + __ jmp(&set_slice_header, Label::kNear); + __ bind(&two_byte_slice); + __ AllocateSlicedString(eax, ebx, no_reg, &runtime); + __ bind(&set_slice_header); + __ mov(FieldOperand(eax, SlicedString::kOffsetOffset), edx); + __ SmiTag(ecx); + __ mov(FieldOperand(eax, SlicedString::kLengthOffset), ecx); + __ mov(FieldOperand(eax, SlicedString::kParentOffset), edi); + __ mov(FieldOperand(eax, SlicedString::kHashFieldOffset), + Immediate(String::kEmptyHashField)); + __ jmp(&return_eax); + + __ bind(©_routine); + } else { + __ bind(&result_longer_than_two); + } + // eax: string // ebx: instance type // ecx: result string length diff --git a/src/ia32/lithium-codegen-ia32.cc b/src/ia32/lithium-codegen-ia32.cc index 5f67038..32e3074 100644 --- a/src/ia32/lithium-codegen-ia32.cc +++ b/src/ia32/lithium-codegen-ia32.cc @@ -3234,8 +3234,6 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) { // Dispatch on the indirect string shape: slice or cons. Label cons_string; - const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag; - ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0); __ test(result, Immediate(kSlicedNotConsMask)); __ j(zero, &cons_string, Label::kNear); diff --git a/src/ia32/macro-assembler-ia32.cc b/src/ia32/macro-assembler-ia32.cc index 5394ae0..dff174c 100644 --- a/src/ia32/macro-assembler-ia32.cc +++ b/src/ia32/macro-assembler-ia32.cc @@ -1208,6 +1208,42 @@ void MacroAssembler::AllocateAsciiConsString(Register result, } +void MacroAssembler::AllocateSlicedString(Register result, + Register scratch1, + Register scratch2, + Label* gc_required) { + // Allocate heap number in new space. + AllocateInNewSpace(SlicedString::kSize, + result, + scratch1, + scratch2, + gc_required, + TAG_OBJECT); + + // Set the map. The other fields are left uninitialized. + mov(FieldOperand(result, HeapObject::kMapOffset), + Immediate(isolate()->factory()->sliced_string_map())); +} + + +void MacroAssembler::AllocateAsciiSlicedString(Register result, + Register scratch1, + Register scratch2, + Label* gc_required) { + // Allocate heap number in new space. + AllocateInNewSpace(SlicedString::kSize, + result, + scratch1, + scratch2, + gc_required, + TAG_OBJECT); + + // Set the map. The other fields are left uninitialized. + mov(FieldOperand(result, HeapObject::kMapOffset), + Immediate(isolate()->factory()->sliced_ascii_string_map())); +} + + // Copy memory, byte-by-byte, from source to destination. Not optimized for // long or aligned copies. The contents of scratch and length are destroyed. // Source and destination are incremented by length. diff --git a/src/ia32/macro-assembler-ia32.h b/src/ia32/macro-assembler-ia32.h index 550a557..de9361d 100644 --- a/src/ia32/macro-assembler-ia32.h +++ b/src/ia32/macro-assembler-ia32.h @@ -446,6 +446,17 @@ class MacroAssembler: public Assembler { Register scratch2, Label* gc_required); + // Allocate a raw sliced string object. Only the map field of the result is + // initialized. + void AllocateSlicedString(Register result, + Register scratch1, + Register scratch2, + Label* gc_required); + void AllocateAsciiSlicedString(Register result, + Register scratch1, + Register scratch2, + Label* gc_required); + // Copy memory, byte-by-byte, from source to destination. Not optimized for // long or aligned copies. // The contents of index and scratch are destroyed. diff --git a/src/objects.h b/src/objects.h index 6c8888b..53ba981 100644 --- a/src/objects.h +++ b/src/objects.h @@ -496,6 +496,11 @@ STATIC_ASSERT( STATIC_ASSERT( (kSlicedStringTag & kIsIndirectStringMask) == kIsIndirectStringTag); +// Use this mask to distinguish between cons and slice only after making +// sure that the string is one of the two (an indirect string). +const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag; +STATIC_ASSERT(IS_POWER_OF_TWO(kSlicedNotConsMask) && kSlicedNotConsMask != 0); + // If bit 7 is clear, then bit 3 indicates whether this two-byte // string actually contains ascii data. const uint32_t kAsciiDataHintMask = 0x08; diff --git a/src/x64/lithium-codegen-x64.cc b/src/x64/lithium-codegen-x64.cc index 76a9453..c182413 100644 --- a/src/x64/lithium-codegen-x64.cc +++ b/src/x64/lithium-codegen-x64.cc @@ -3217,8 +3217,6 @@ void LCodeGen::DoStringCharCodeAt(LStringCharCodeAt* instr) { // Dispatch on the indirect string shape: slice or cons. Label cons_string; - const uint32_t kSlicedNotConsMask = kSlicedStringTag & ~kConsStringTag; - ASSERT(IsPowerOf2(kSlicedNotConsMask) && kSlicedNotConsMask != 0); __ testb(result, Immediate(kSlicedNotConsMask)); __ j(zero, &cons_string, Label::kNear); diff --git a/test/mjsunit/string-slices.js b/test/mjsunit/string-slices.js index b0b05ec..f629ca9 100755 --- a/test/mjsunit/string-slices.js +++ b/test/mjsunit/string-slices.js @@ -72,7 +72,7 @@ for (var i = 0; i < 25; i++) { } /x/.exec(x); // Try to force a flatten. for (var i = 5; i < 25; i++) { - for (var j = 12; j < 25; j++) { + for (var j = 0; j < 25; j++) { var z = x.substring(i, i+j); var w = Math.random() * 42; // Allocate something new in new-space. assertEquals(j, z.length); @@ -110,7 +110,7 @@ x += x; x += x; var xl = x.length; var cache = []; -for (var i = 0; i < 10000; i++) { +for (var i = 0; i < 1000; i++) { var z = x.substring(i % xl); assertEquals(xl - (i % xl), z.length); cache.push(z); @@ -129,7 +129,7 @@ x += x; x += x; var xl = x.length; var cache = []; -for (var i = 0; i < 10000; i++) { +for (var i = 0; i < 1000; i++) { var z = x.substring(i % xl); assertEquals(xl - (i % xl), z.length); cache.push(z); @@ -149,6 +149,7 @@ for (var i = 63; i >= 0; i--) { var z = cache.pop(); assertTrue(/\u2028123456789ABCDEF/.test(z)); assertEquals(xl - offset, z.length); + assertEquals(x.charAt(i*(i+1)/2), z.charAt(0)); offset -= i; } -- 2.7.4