From d542a2fb75c3082a7e104dc650f7852811beb401 Mon Sep 17 00:00:00 2001 From: "yangguo@chromium.org" Date: Fri, 25 Nov 2011 14:04:47 +0000 Subject: [PATCH] Add external strings support to regexp in generated code. TEST=test/mjsunit/string-external-cached.js Review URL: http://codereview.chromium.org/8680010 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@10070 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/arm/code-stubs-arm.cc | 46 +++++++++++++---- src/ia32/code-stubs-ia32.cc | 49 ++++++++++++++---- src/x64/code-stubs-x64.cc | 49 ++++++++++++++---- test/mjsunit/string-external-cached.js | 94 ++++++++++++++++++++++++++++++++++ test/mjsunit/string-externalize.js | 30 ----------- 5 files changed, 206 insertions(+), 62 deletions(-) create mode 100644 test/mjsunit/string-external-cached.js diff --git a/src/arm/code-stubs-arm.cc b/src/arm/code-stubs-arm.cc index 00872e6..a881335 100644 --- a/src/arm/code-stubs-arm.cc +++ b/src/arm/code-stubs-arm.cc @@ -4612,8 +4612,13 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset)); __ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset)); // First check for flat string. None of the following string type tests will - // succeed if kIsNotStringTag is set. - __ and_(r1, r0, Operand(kIsNotStringMask | kStringRepresentationMask), SetCC); + // succeed if subject is not a string or a short external string. + __ and_(r1, + r0, + Operand(kIsNotStringMask | + kStringRepresentationMask | + kShortExternalStringMask), + SetCC); STATIC_ASSERT((kStringTag | kSeqStringTag) == 0); __ b(eq, &seq_string); @@ -4626,17 +4631,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // string. Also in this case the first part of the cons string is known to be // a sequential string or an external string. // In the case of a sliced string its offset has to be taken into account. - Label cons_string, check_encoding; + Label cons_string, external_string, check_encoding; STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); + STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmp(r1, Operand(kExternalStringTag)); __ b(lt, &cons_string); - __ b(eq, &runtime); + __ b(eq, &external_string); - // Catch non-string subject (should already have been guarded against). - STATIC_ASSERT(kNotStringTag != 0); - __ tst(r1, Operand(kIsNotStringMask)); + // Catch non-string subject or short external string. + STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); + __ tst(r1, Operand(kIsNotStringMask | kShortExternalStringMask)); __ b(ne, &runtime); // String is sliced. @@ -4648,8 +4654,7 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // String is a cons string, check whether it is flat. __ bind(&cons_string); __ ldr(r0, FieldMemOperand(subject, ConsString::kSecondOffset)); - __ LoadRoot(r1, Heap::kEmptyStringRootIndex); - __ cmp(r0, r1); + __ CompareRoot(r0, Heap::kEmptyStringRootIndex); __ b(ne, &runtime); __ ldr(subject, FieldMemOperand(subject, ConsString::kFirstOffset)); // Is first part of cons or parent of slice a flat string? @@ -4658,7 +4663,8 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset)); STATIC_ASSERT(kSeqStringTag == 0); __ tst(r0, Operand(kStringRepresentationMask)); - __ b(ne, &runtime); + __ b(ne, &external_string); + __ bind(&seq_string); // subject: Subject string // regexp_data: RegExp data (FixedArray) @@ -4866,6 +4872,26 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ add(sp, sp, Operand(4 * kPointerSize)); __ Ret(); + // External string. Short external strings have already been ruled out. + // r0: scratch + __ bind(&external_string); + __ ldr(r0, FieldMemOperand(subject, HeapObject::kMapOffset)); + __ ldrb(r0, FieldMemOperand(r0, Map::kInstanceTypeOffset)); + if (FLAG_debug_code) { + // Assert that we do not have a cons or slice (indirect strings) here. + // Sequential strings have already been ruled out. + __ tst(r0, Operand(kIsIndirectStringMask)); + __ Assert(eq, "external string expected, but not found"); + } + __ ldr(subject, + FieldMemOperand(subject, ExternalString::kResourceDataOffset)); + // Move the pointer so that offset-wise, it looks like a sequential string. + STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize); + __ sub(subject, + subject, + Operand(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); + __ jmp(&seq_string); + // Do the runtime call to execute the regexp. __ bind(&runtime); __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); diff --git a/src/ia32/code-stubs-ia32.cc b/src/ia32/code-stubs-ia32.cc index 179bf0d..68eebd3 100644 --- a/src/ia32/code-stubs-ia32.cc +++ b/src/ia32/code-stubs-ia32.cc @@ -3611,13 +3611,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset)); __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); // First check for flat two byte string. - __ and_(ebx, - kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask); + __ and_(ebx, kIsNotStringMask | + kStringRepresentationMask | + kStringEncodingMask | + kShortExternalStringMask); STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0); __ j(zero, &seq_two_byte_string, Label::kNear); // Any other flat string must be a flat ascii string. None of the following - // string type tests will succeed if kIsNotStringTag is set. - __ and_(ebx, Immediate(kIsNotStringMask | kStringRepresentationMask)); + // string type tests will succeed if subject is not a string or a short + // external string. + __ and_(ebx, Immediate(kIsNotStringMask | + kStringRepresentationMask | + kShortExternalStringMask)); __ j(zero, &seq_ascii_string, Label::kNear); // ebx: whether subject is a string and if yes, its string representation @@ -3627,17 +3632,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // string. Also in this case the first part of the cons string is known to be // a sequential string or an external string. // In the case of a sliced string its offset has to be taken into account. - Label cons_string, check_encoding; + Label cons_string, external_string, check_encoding; STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); + STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmp(ebx, Immediate(kExternalStringTag)); __ j(less, &cons_string); - __ j(equal, &runtime); + __ j(equal, &external_string); - // Catch non-string subject (should already have been guarded against). - STATIC_ASSERT(kNotStringTag != 0); - __ test(ebx, Immediate(kIsNotStringMask)); + // Catch non-string subject or short external string. + STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); + __ test(ebx, Immediate(kIsNotStringMask | kShortExternalStringTag)); __ j(not_zero, &runtime); // String is sliced. @@ -3660,10 +3666,10 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { kStringRepresentationMask | kStringEncodingMask); STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0); __ j(zero, &seq_two_byte_string, Label::kNear); - // Any other flat string must be ascii. + // Any other flat string must be sequential ascii or external. __ test_b(FieldOperand(ebx, Map::kInstanceTypeOffset), kStringRepresentationMask); - __ j(not_zero, &runtime); + __ j(not_zero, &external_string); __ bind(&seq_ascii_string); // eax: subject string (flat ascii) @@ -3884,6 +3890,27 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ mov(eax, Operand(esp, kLastMatchInfoOffset)); __ ret(4 * kPointerSize); + // External string. Short external strings have already been ruled out. + // eax: subject string (expected to be external) + // ebx: scratch + __ bind(&external_string); + __ mov(ebx, FieldOperand(eax, HeapObject::kMapOffset)); + __ movzx_b(ebx, FieldOperand(ebx, Map::kInstanceTypeOffset)); + if (FLAG_debug_code) { + // Assert that we do not have a cons or slice (indirect strings) here. + // Sequential strings have already been ruled out. + __ test_b(ebx, kIsIndirectStringMask); + __ Assert(zero, "external string expected, but not found"); + } + __ mov(eax, FieldOperand(eax, ExternalString::kResourceDataOffset)); + // Move the pointer so that offset-wise, it looks like a sequential string. + STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize); + __ sub(eax, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); + STATIC_ASSERT(kTwoByteStringTag == 0); + __ test_b(ebx, kStringEncodingMask); + __ j(not_zero, &seq_ascii_string); + __ jmp(&seq_two_byte_string); + // Do the runtime call to execute the regexp. __ bind(&runtime); __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); diff --git a/src/x64/code-stubs-x64.cc b/src/x64/code-stubs-x64.cc index 2281c4c..96f70bf 100644 --- a/src/x64/code-stubs-x64.cc +++ b/src/x64/code-stubs-x64.cc @@ -2658,13 +2658,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset)); __ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset)); // First check for flat two byte string. - __ andb(rbx, Immediate( - kIsNotStringMask | kStringRepresentationMask | kStringEncodingMask)); + __ andb(rbx, Immediate(kIsNotStringMask | + kStringRepresentationMask | + kStringEncodingMask | + kShortExternalStringMask)); STATIC_ASSERT((kStringTag | kSeqStringTag | kTwoByteStringTag) == 0); __ j(zero, &seq_two_byte_string, Label::kNear); // Any other flat string must be a flat ascii string. None of the following - // string type tests will succeed if kIsNotStringTag is set. - __ andb(rbx, Immediate(kIsNotStringMask | kStringRepresentationMask)); + // string type tests will succeed if subject is not a string or a short + // external string. + __ andb(rbx, Immediate(kIsNotStringMask | + kStringRepresentationMask | + kShortExternalStringMask)); __ j(zero, &seq_ascii_string, Label::kNear); // rbx: whether subject is a string and if yes, its string representation @@ -2674,17 +2679,18 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { // string. Also in this case the first part of the cons string is known to be // a sequential string or an external string. // In the case of a sliced string its offset has to be taken into account. - Label cons_string, check_encoding; + Label cons_string, external_string, check_encoding; STATIC_ASSERT(kConsStringTag < kExternalStringTag); STATIC_ASSERT(kSlicedStringTag > kExternalStringTag); STATIC_ASSERT(kIsNotStringMask > kExternalStringTag); + STATIC_ASSERT(kShortExternalStringTag > kExternalStringTag); __ cmpq(rbx, Immediate(kExternalStringTag)); __ j(less, &cons_string, Label::kNear); - __ j(equal, &runtime); + __ j(equal, &external_string); - // Catch non-string subject (should already have been guarded against). - STATIC_ASSERT(kNotStringTag != 0); - __ testb(rbx, Immediate(kIsNotStringMask)); + // Catch non-string subject or short external string. + STATIC_ASSERT(kNotStringTag != 0 && kShortExternalStringTag !=0); + __ testb(rbx, Immediate(kIsNotStringMask | kShortExternalStringMask)); __ j(not_zero, &runtime); // String is sliced. @@ -2709,10 +2715,10 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { Immediate(kStringRepresentationMask | kStringEncodingMask)); STATIC_ASSERT((kSeqStringTag | kTwoByteStringTag) == 0); __ j(zero, &seq_two_byte_string, Label::kNear); - // Any other flat string must be ascii. + // Any other flat string must be sequential ascii or external. __ testb(FieldOperand(rbx, Map::kInstanceTypeOffset), Immediate(kStringRepresentationMask)); - __ j(not_zero, &runtime); + __ j(not_zero, &external_string); __ bind(&seq_ascii_string); // rdi: subject string (sequential ascii) @@ -2946,6 +2952,27 @@ void RegExpExecStub::Generate(MacroAssembler* masm) { __ bind(&termination_exception); __ ThrowUncatchable(TERMINATION, rax); + // External string. Short external strings have already been ruled out. + // rdi: subject string (expected to be external) + // rbx: scratch + __ bind(&external_string); + __ movq(rbx, FieldOperand(rdi, HeapObject::kMapOffset)); + __ movzxbl(rbx, FieldOperand(rbx, Map::kInstanceTypeOffset)); + if (FLAG_debug_code) { + // Assert that we do not have a cons or slice (indirect strings) here. + // Sequential strings have already been ruled out. + __ testb(rbx, Immediate(kIsIndirectStringMask)); + __ Assert(zero, "external string expected, but not found"); + } + __ movq(rdi, FieldOperand(rdi, ExternalString::kResourceDataOffset)); + // Move the pointer so that offset-wise, it looks like a sequential string. + STATIC_ASSERT(SeqTwoByteString::kHeaderSize == SeqAsciiString::kHeaderSize); + __ subq(rdi, Immediate(SeqTwoByteString::kHeaderSize - kHeapObjectTag)); + STATIC_ASSERT(kTwoByteStringTag == 0); + __ testb(rbx, Immediate(kStringEncodingMask)); + __ j(not_zero, &seq_ascii_string); + __ jmp(&seq_two_byte_string); + // Do the runtime call to execute the regexp. __ bind(&runtime); __ TailCallRuntime(Runtime::kRegExpExec, 4, 1); diff --git a/test/mjsunit/string-external-cached.js b/test/mjsunit/string-external-cached.js new file mode 100644 index 0000000..12312ac --- /dev/null +++ b/test/mjsunit/string-external-cached.js @@ -0,0 +1,94 @@ +// Copyright 2010 the V8 project authors. All rights reserved. +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are +// met: +// +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above +// copyright notice, this list of conditions and the following +// disclaimer in the documentation and/or other materials provided +// with the distribution. +// * Neither the name of Google Inc. nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Flags: --expose-externalize-string --expose-gc +// Test data pointer caching of external strings. + +function test() { + // Test string.charAt. + var charat_str = new Array(5); + charat_str[0] = "0123456789ABCDEF0123456789ABCDEF\ +0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\ +0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\ +0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\ +0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"; + charat_str[1] = "0123456789ABCDEF"; + for (var i = 0; i < 6; i++) charat_str[1] += charat_str[1]; + try { // String can only be externalized once + externalizeString(charat_str[0], false); + externalizeString(charat_str[1], true); + } catch (ex) { } + charat_str[2] = charat_str[0].slice(0, -1); + charat_str[3] = charat_str[1].slice(0, -1); + charat_str[4] = charat_str[0] + charat_str[0]; + + for (var i = 0; i < 5; i++) { + assertEquals('B', charat_str[i].charAt(6*16 + 11)); + assertEquals('C', charat_str[i].charAt(6*16 + 12)); + assertEquals('A', charat_str[i].charAt(3*16 + 10)); + assertEquals('B', charat_str[i].charAt(3*16 + 11)); + } + + charat_short = "012"; + try { // String can only be externalized once + externalizeString(charat_short, true); + } catch (ex) { } + assertEquals("1", charat_short.charAt(1)); + + // Test regexp. + var re = /(A|B)/; + var rere = /(T.{1,2}B)/; + var ascii = "ABCDEFGHIJKLMNOPQRST"; + var twobyte = "_ABCDEFGHIJKLMNOPQRST"; + try { + externalizeString(ascii, false); + externalizeString(twobyte, true); + } catch (ex) { } + assertTrue(isAsciiString(ascii)); + assertFalse(isAsciiString(twobyte)); + var ascii_slice = ascii.slice(1,-1); + var twobyte_slice = twobyte.slice(2,-1); + var ascii_cons = ascii + ascii; + var twobyte_cons = twobyte + twobyte; + for (var i = 0; i < 2; i++) { + assertEquals(["A", "A"], re.exec(ascii)); + assertEquals(["B", "B"], re.exec(ascii_slice)); + assertEquals(["TAB", "TAB"], rere.exec(ascii_cons)); + assertEquals(["A", "A"], re.exec(twobyte)); + assertEquals(["B", "B"], re.exec(twobyte_slice)); + assertEquals(["T_AB", "T_AB"], rere.exec(twobyte_cons)); + } +} + +// Run the test many times to ensure IC-s don't break things. +for (var i = 0; i < 10; i++) { + test(); +} + +// Clean up string to make Valgrind happy. +gc(); +gc(); diff --git a/test/mjsunit/string-externalize.js b/test/mjsunit/string-externalize.js index a68711b..d52a7e2 100644 --- a/test/mjsunit/string-externalize.js +++ b/test/mjsunit/string-externalize.js @@ -87,36 +87,6 @@ function test() { // Flattened string should still be two-byte. assertFalse(isAsciiString(str2)); - - // Test buffered external strings. - var charat_str = new Array(5); - charat_str[0] = "0123456789ABCDEF0123456789ABCDEF\ -0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\ -0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\ -0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF\ -0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF0123456789ABCDEF"; - charat_str[1] = "0123456789ABCDEF"; - for (var i = 0; i < 6; i++) charat_str[1] += charat_str[1]; - try { // String can only be externalized once - externalizeString(charat_str[0], false); - externalizeString(charat_str[1], true); - } catch (ex) { } - charat_str[2] = charat_str[0].slice(0, -1); - charat_str[3] = charat_str[1].slice(0, -1); - charat_str[4] = charat_str[0] + charat_str[0]; - - for (var i = 0; i < 5; i++) { - assertEquals('B', charat_str[i].charAt(6*16 + 11)); - assertEquals('C', charat_str[i].charAt(6*16 + 12)); - assertEquals('A', charat_str[i].charAt(3*16 + 10)); - assertEquals('B', charat_str[i].charAt(3*16 + 11)); - } - - charat_short = "012"; - try { // String can only be externalized once - externalizeString(charat_short, true); - } catch (ex) { } - assertEquals("1", charat_short.charAt(1)); } // Run the test many times to ensure IC-s don't break things. -- 2.7.4