From 49d05495d8b854d6deb9f3431564c6e3e620abb1 Mon Sep 17 00:00:00 2001 From: "erik.corry@gmail.com" Date: Fri, 28 Nov 2008 10:37:06 +0000 Subject: [PATCH] * Remove an unused layer of abstraction by not having both a macro assembler and an assembler when compiling to bytecode. This fixes http://code.google.com/p/v8/issues/detail?id=165 * Preload the 'current character' register when starting a match (byte code only at the moment). Review URL: http://codereview.chromium.org/10995 git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@865 ce2b1a6d-e550-0410-aec6-3dcde31c8c00 --- src/SConscript | 28 +- src/assembler-irregexp.cc | 357 ------------------ src/assembler-irregexp.h | 164 -------- src/assembler.h | 2 +- src/interpreter-irregexp.cc | 14 +- src/jsregexp.cc | 6 +- ... => regexp-macro-assembler-irregexp-inl.h} | 20 +- src/regexp-macro-assembler-irregexp.cc | 195 +++++++--- src/regexp-macro-assembler-irregexp.h | 37 +- test/cctest/test-regexp.cc | 142 +------ tools/visual_studio/v8_base.vcproj | 10 +- 11 files changed, 214 insertions(+), 761 deletions(-) delete mode 100644 src/assembler-irregexp.cc delete mode 100644 src/assembler-irregexp.h rename src/{assembler-irregexp-inl.h => regexp-macro-assembler-irregexp-inl.h} (84%) diff --git a/src/SConscript b/src/SConscript index de584ad15..d9858655c 100644 --- a/src/SConscript +++ b/src/SConscript @@ -35,20 +35,20 @@ Import('context') SOURCES = { 'all': [ - 'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc', - 'assembler-irregexp.cc', 'ast.cc', 'bootstrapper.cc', 'builtins.cc', - 'checks.cc', 'code-stubs.cc', 'codegen.cc', 'compilation-cache.cc', - 'compiler.cc', 'contexts.cc', 'conversions.cc', 'counters.cc', - 'dateparser.cc', 'debug.cc', 'disassembler.cc', 'execution.cc', - 'factory.cc', 'flags.cc', 'frames.cc', 'global-handles.cc', - 'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc', 'interpreter-irregexp.cc', - 'jsregexp.cc', 'log.cc', 'mark-compact.cc', 'messages.cc', - 'objects.cc', 'parser.cc', 'property.cc', 'regexp-macro-assembler.cc', - 'regexp-macro-assembler-irregexp.cc', 'rewriter.cc', 'runtime.cc', 'scanner.cc', - 'scopeinfo.cc', 'scopes.cc', 'serialize.cc', 'snapshot-common.cc', - 'spaces.cc', 'string-stream.cc', 'stub-cache.cc', 'token.cc', 'top.cc', - 'unicode.cc', 'usage-analyzer.cc', 'utils.cc', 'v8-counters.cc', - 'v8.cc', 'v8threads.cc', 'variables.cc', 'zone.cc' + 'accessors.cc', 'allocation.cc', 'api.cc', 'assembler.cc', 'ast.cc', + 'bootstrapper.cc', 'builtins.cc', 'checks.cc', 'code-stubs.cc', + 'codegen.cc', 'compilation-cache.cc', 'compiler.cc', 'contexts.cc', + 'conversions.cc', 'counters.cc', 'dateparser.cc', 'debug.cc', + 'disassembler.cc', 'execution.cc', 'factory.cc', 'flags.cc', 'frames.cc', + 'global-handles.cc', 'handles.cc', 'hashmap.cc', 'heap.cc', 'ic.cc', + 'interpreter-irregexp.cc', 'jsregexp.cc', 'log.cc', 'mark-compact.cc', + 'messages.cc', 'objects.cc', 'parser.cc', 'property.cc', + 'regexp-macro-assembler.cc', 'regexp-macro-assembler-irregexp.cc', + 'rewriter.cc', 'runtime.cc', 'scanner.cc', 'scopeinfo.cc', 'scopes.cc', + 'serialize.cc', 'snapshot-common.cc', 'spaces.cc', 'string-stream.cc', + 'stub-cache.cc', 'token.cc', 'top.cc', 'unicode.cc', 'usage-analyzer.cc', + 'utils.cc', 'v8-counters.cc', 'v8.cc', 'v8threads.cc', 'variables.cc', + 'zone.cc' ], 'arch:arm': ['assembler-arm.cc', 'builtins-arm.cc', 'codegen-arm.cc', 'cpu-arm.cc', 'disasm-arm.cc', 'frames-arm.cc', 'ic-arm.cc', diff --git a/src/assembler-irregexp.cc b/src/assembler-irregexp.cc deleted file mode 100644 index df434be2d..000000000 --- a/src/assembler-irregexp.cc +++ /dev/null @@ -1,357 +0,0 @@ -// Copyright 2008 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// A light-weight assembler for the Irregexp byte code. - - -#include "v8.h" -#include "ast.h" -#include "bytecodes-irregexp.h" -#include "assembler-irregexp.h" - -#include "assembler-irregexp-inl.h" - - -namespace v8 { namespace internal { - - -IrregexpAssembler::IrregexpAssembler(Vector buffer) - : buffer_(buffer), - pc_(0), - own_buffer_(false) { -} - - -IrregexpAssembler::~IrregexpAssembler() { - if (own_buffer_) { - buffer_.Dispose(); - } -} - - -void IrregexpAssembler::PushCurrentPosition(int cp_offset) { - ASSERT(cp_offset >= 0); - Emit(BC_PUSH_CP); - Emit32(cp_offset); -} - - -void IrregexpAssembler::PushBacktrack(Label* l) { - Emit(BC_PUSH_BT); - EmitOrLink(l); -} - - -void IrregexpAssembler::PushRegister(int index) { - ASSERT(index >= 0); - Emit(BC_PUSH_REGISTER); - Emit(index); -} - - -void IrregexpAssembler::WriteCurrentPositionToRegister(int index, - int cp_offset) { - ASSERT(cp_offset >= 0); - ASSERT(index >= 0); - Emit(BC_SET_REGISTER_TO_CP); - Emit(index); - Emit32(cp_offset); -} - - -void IrregexpAssembler::ReadCurrentPositionFromRegister(int index) { - ASSERT(index >= 0); - Emit(BC_SET_CP_TO_REGISTER); - Emit(index); -} - - -void IrregexpAssembler::WriteStackPointerToRegister(int index) { - ASSERT(index >= 0); - Emit(BC_SET_REGISTER_TO_SP); - Emit(index); -} - - -void IrregexpAssembler::ReadStackPointerFromRegister(int index) { - ASSERT(index >= 0); - Emit(BC_SET_SP_TO_REGISTER); - Emit(index); -} - - -void IrregexpAssembler::SetRegister(int index, int value) { - ASSERT(index >= 0); - Emit(BC_SET_REGISTER); - Emit(index); - Emit32(value); -} - - -void IrregexpAssembler::AdvanceRegister(int index, int by) { - ASSERT(index >= 0); - Emit(BC_ADVANCE_REGISTER); - Emit(index); - Emit32(by); -} - - -void IrregexpAssembler::PopCurrentPosition() { - Emit(BC_POP_CP); -} - - -void IrregexpAssembler::PopBacktrack() { - Emit(BC_POP_BT); -} - - -void IrregexpAssembler::PopRegister(int index) { - Emit(BC_POP_REGISTER); - Emit(index); -} - - -void IrregexpAssembler::Fail() { - Emit(BC_FAIL); -} - - -void IrregexpAssembler::Break() { - Emit(BC_BREAK); -} - - -void IrregexpAssembler::Succeed() { - Emit(BC_SUCCEED); -} - - -void IrregexpAssembler::Bind(Label* l) { - ASSERT(!l->is_bound()); - if (l->is_linked()) { - int pos = l->pos(); - while (pos != 0) { - int fixup = pos; - pos = Load32(buffer_.start() + fixup); - Store32(buffer_.start() + fixup, pc_); - } - } - l->bind_to(pc_); -} - - -void IrregexpAssembler::AdvanceCP(int cp_offset) { - Emit(BC_ADVANCE_CP); - Emit32(cp_offset); -} - - -void IrregexpAssembler::GoTo(Label* l) { - Emit(BC_GOTO); - EmitOrLink(l); -} - - -void IrregexpAssembler::LoadCurrentChar(int cp_offset, Label* on_end) { - Emit(BC_LOAD_CURRENT_CHAR); - Emit32(cp_offset); - EmitOrLink(on_end); -} - - -void IrregexpAssembler::CheckCharacter(uc16 c, Label* on_match) { - Emit(BC_CHECK_CHAR); - Emit16(c); - EmitOrLink(on_match); -} - - -void IrregexpAssembler::CheckNotCharacter(uc16 c, Label* on_mismatch) { - Emit(BC_CHECK_NOT_CHAR); - Emit16(c); - EmitOrLink(on_mismatch); -} - -void IrregexpAssembler::OrThenCheckNotCharacter(uc16 c, - uc16 mask, - Label* on_mismatch) { - Emit(BC_OR_CHECK_NOT_CHAR); - Emit16(c); - Emit16(mask); - EmitOrLink(on_mismatch); -} - - -void IrregexpAssembler::MinusOrThenCheckNotCharacter(uc16 c, - uc16 mask, - Label* on_mismatch) { - Emit(BC_MINUS_OR_CHECK_NOT_CHAR); - Emit16(c); - Emit16(mask); - EmitOrLink(on_mismatch); -} - - -void IrregexpAssembler::CheckCharacterLT(uc16 limit, Label* on_less) { - Emit(BC_CHECK_LT); - Emit16(limit); - EmitOrLink(on_less); -} - - -void IrregexpAssembler::CheckCharacterGT(uc16 limit, Label* on_greater) { - Emit(BC_CHECK_GT); - Emit16(limit); - EmitOrLink(on_greater); -} - - -void IrregexpAssembler::CheckNotBackReference(int capture_index, - Label* on_mismatch) { - Emit(BC_CHECK_NOT_BACK_REF); - Emit(capture_index); - EmitOrLink(on_mismatch); -} - - -void IrregexpAssembler::CheckNotBackReferenceNoCase(int capture_index, - Label* on_mismatch) { - Emit(BC_CHECK_NOT_BACK_REF_NO_CASE); - Emit(capture_index); - EmitOrLink(on_mismatch); -} - - -void IrregexpAssembler::CheckNotRegistersEqual(int reg1, - int reg2, - Label* on_not_equal) { - Emit(BC_CHECK_NOT_REGS_EQUAL); - Emit(reg1); - Emit(reg2); - EmitOrLink(on_not_equal); -} - - -void IrregexpAssembler::CheckRegister(int byte_code, - int reg_index, - uint16_t vs, - Label* on_true) { - Emit(byte_code); - Emit(reg_index); - Emit16(vs); - EmitOrLink(on_true); -} - - -void IrregexpAssembler::CheckRegisterLT(int reg_index, - uint16_t vs, - Label* on_less_than) { - CheckRegister(BC_CHECK_REGISTER_LT, reg_index, vs, on_less_than); -} - - -void IrregexpAssembler::CheckRegisterGE(int reg_index, - uint16_t vs, - Label* on_greater_than_equal) { - CheckRegister(BC_CHECK_REGISTER_GE, reg_index, vs, on_greater_than_equal); -} - - -void IrregexpAssembler::LookupMap1(uc16 start, Label* bit_map, Label* on_zero) { - Emit(BC_LOOKUP_MAP1); - Emit16(start); - EmitOrLink(bit_map); - EmitOrLink(on_zero); -} - - -void IrregexpAssembler::LookupMap2(uc16 start, - Label* half_nibble_map, - const Vector& table) { - Emit(BC_LOOKUP_MAP2); - Emit16(start); - EmitOrLink(half_nibble_map); - ASSERT(table.length() > 0); - ASSERT(table.length() <= 4); - for (int i = 0; i < table.length(); i++) { - EmitOrLink(table[i]); - } -} - - -void IrregexpAssembler::LookupMap8(uc16 start, - Label* byte_map, - const Vector& table) { - Emit(BC_LOOKUP_MAP8); - Emit16(start); - EmitOrLink(byte_map); - ASSERT(table.length() > 0); - ASSERT(table.length() <= 256); - for (int i = 0; i < table.length(); i++) { - EmitOrLink(table[i]); - } -} - - -void IrregexpAssembler::LookupHighMap8(byte start, - Label* byte_map, - const Vector& table) { - Emit(BC_LOOKUP_HI_MAP8); - Emit(start); - EmitOrLink(byte_map); - ASSERT(table.length() > 0); - ASSERT(table.length() <= 256); - for (int i = 0; i < table.length(); i++) { - EmitOrLink(table[i]); - } -} - - -int IrregexpAssembler::length() { - return pc_; -} - - -void IrregexpAssembler::Copy(Address a) { - memcpy(a, buffer_.start(), length()); -} - - -void IrregexpAssembler::Expand() { - bool old_buffer_was_our_own = own_buffer_; - Vector old_buffer = buffer_; - buffer_ = Vector::New(old_buffer.length() * 2); - own_buffer_ = true; - memcpy(buffer_.start(), old_buffer.start(), old_buffer.length()); - if (old_buffer_was_our_own) { - old_buffer.Dispose(); - } -} - - -} } // namespace v8::internal diff --git a/src/assembler-irregexp.h b/src/assembler-irregexp.h deleted file mode 100644 index 63bffe89a..000000000 --- a/src/assembler-irregexp.h +++ /dev/null @@ -1,164 +0,0 @@ -// Copyright 2006-2008 the V8 project authors. All rights reserved. -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions are -// met: -// -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above -// copyright notice, this list of conditions and the following -// disclaimer in the documentation and/or other materials provided -// with the distribution. -// * Neither the name of Google Inc. nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS -// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT -// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR -// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -// A light-weight assembler for the Irregexp byte code. - -#ifndef V8_ASSEMBLER_IRREGEXP_H_ -#define V8_ASSEMBLER_IRREGEXP_H_ - -namespace v8 { namespace internal { - - -class IrregexpAssembler { - public: - // Create an assembler. Instructions and relocation information are emitted - // into a buffer, with the instructions starting from the beginning and the - // relocation information starting from the end of the buffer. See CodeDesc - // for a detailed comment on the layout (globals.h). - // - // If the provided buffer is NULL, the assembler allocates and grows its own - // buffer, and buffer_size determines the initial buffer size. The buffer is - // owned by the assembler and deallocated upon destruction of the assembler. - // - // If the provided buffer is not NULL, the assembler uses the provided buffer - // for code generation and assumes its size to be buffer_size. If the buffer - // is too small, a fatal error occurs. No deallocation of the buffer is done - // upon destruction of the assembler. - explicit IrregexpAssembler(Vector); - ~IrregexpAssembler(); - - // CP = current position in source. - // BT = backtrack label. - - // Stack. - void PushCurrentPosition(int cp_offset = 0); - void PushBacktrack(Label* l); - void PushRegister(int index); - void WriteCurrentPositionToRegister(int index, int cp_offset = 0); - void ReadCurrentPositionFromRegister(int index); - void WriteStackPointerToRegister(int index); - void ReadStackPointerFromRegister(int index); - void SetRegister(int index, int value); - void AdvanceRegister(int index, int by); - - void PopCurrentPosition(); - void PopBacktrack(); - void PopRegister(int index); - - void Fail(); - void Succeed(); - - // This instruction will cause a fatal VM error if hit. - void Break(); - - // Binds an unbound label L to the current code posn. - void Bind(Label* l); - - void AdvanceCP(int by); - - void GoTo(Label* l); - - // Loads current char into a machine register. Jumps to the label if we - // reached the end of the subject string. Fall through otherwise. - void LoadCurrentChar(int cp_offset, Label* on_end); - - // Checks current char register against a singleton. - void CheckCharacter(uc16 c, Label* on_match); - void CheckNotCharacter(uc16 c, Label* on_mismatch); - void OrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch); - void MinusOrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch); - - // Used to check current char register against a range. - void CheckCharacterLT(uc16 limit, Label* on_less); - void CheckCharacterGT(uc16 limit, Label* on_greater); - - // Checks current position for a match against a previous capture. Advances - // current position by the length of the capture iff it matches. The capture - // is stored in a given register and the register after. If a register - // contains -1 then the other register must always contain -1 and the - // on_mismatch label will never be called. - void CheckNotBackReference(int capture_index, Label* on_mismatch); - void CheckNotBackReferenceNoCase(int capture_index, Label* on_mismatch); - void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal); - - // Checks a register for strictly-less-than or greater-than-or-equal. - void CheckRegisterLT(int reg_index, uint16_t vs, Label* on_less_than); - void CheckRegisterGE(int reg_index, uint16_t vs, Label* on_greater_equal); - - // Subtracts a 16 bit value from the current character, uses the result to - // look up in a bit array, uses the result of that to decide whether to fall - // though (on 1) or jump to the on_zero label (on 0). - void LookupMap1(uc16 start, Label* bit_map, Label* on_zero); - - // Subtracts a 16 bit value from the current character, uses the result to - // look up in a 2-bit array, uses the result of that to look up in a label - // table and jumps to the label. - void LookupMap2(uc16 start, - Label* half_nibble_map, - const Vector& table); - - // Subtracts a 16 bit value from the current character, uses the result to - // look up in a byte array, uses the result of that to look up in a label - // array and jumps to the label. - void LookupMap8(uc16 start, Label* byte_map, const Vector& table); - - // Takes the high byte of the current character, uses the result to - // look up in a byte array, uses the result of that to look up in a label - // array and jumps to the label. - void LookupHighMap8(byte start, Label* byte_map, const Vector& table); - - // Code and bitmap emission. - inline void Emit32(uint32_t x); - inline void Emit16(uint32_t x); - inline void Emit(uint32_t x); - - // Bytecode buffer. - int length(); - void Copy(Address a); - - inline void EmitOrLink(Label* l); - private: - inline void CheckRegister(int byte_code, - int reg_index, - uint16_t vs, - Label* on_true); - void Expand(); - - // The buffer into which code and relocation info are generated. - Vector buffer_; - // The program counter. - int pc_; - // True if the assembler owns the buffer, false if buffer is external. - bool own_buffer_; - - DISALLOW_IMPLICIT_CONSTRUCTORS(IrregexpAssembler); -}; - - -} } // namespace v8::internal - -#endif // V8_ASSEMBLER_IRREGEXP_H_ diff --git a/src/assembler.h b/src/assembler.h index c04141727..971b95022 100644 --- a/src/assembler.h +++ b/src/assembler.h @@ -85,7 +85,7 @@ class Label : public ZoneObject { // LabelShadows are dynamically allocated. friend class RegexpAssembler; friend class Displacement; friend class LabelShadow; - friend class IrregexpAssembler; + friend class RegExpMacroAssemblerIrregexp; }; diff --git a/src/interpreter-irregexp.cc b/src/interpreter-irregexp.cc index a8720456a..0acbfeb69 100644 --- a/src/interpreter-irregexp.cc +++ b/src/interpreter-irregexp.cc @@ -99,13 +99,13 @@ static void TraceInterpreter(const byte* code_base, static bool RawMatch(const byte* code_base, Vector subject, int* registers, - int current) { + int current, + int current_char) { const byte* pc = code_base; static const int kBacktrackStackSize = 10000; int backtrack_stack[kBacktrackStackSize]; int backtrack_stack_space = kBacktrackStackSize; int* backtrack_sp = backtrack_stack; - int current_char = -1; #ifdef DEBUG if (FLAG_trace_regexp_bytecodes) { PrintF("\n\nStart bytecode interpreter\n\n"); @@ -382,11 +382,15 @@ bool IrregexpInterpreter::Match(Handle code_array, AssertNoAllocation a; const byte* code_base = code_array->GetDataStartAddress(); + uc16 previous_char = '\n'; + Vector subject_vector = + Vector(subject16->GetTwoByteData(), subject16->length()); + if (start_position != 0) previous_char = subject_vector[start_position - 1]; return RawMatch(code_base, - Vector(subject16->GetTwoByteData(), - subject16->length()), + subject_vector, registers, - start_position); + start_position, + previous_char); } } } // namespace v8::internal diff --git a/src/jsregexp.cc b/src/jsregexp.cc index bfefe421e..43148a9b0 100644 --- a/src/jsregexp.cc +++ b/src/jsregexp.cc @@ -40,7 +40,6 @@ #include "compilation-cache.h" #include "string-stream.h" #include "parser.h" -#include "assembler-irregexp.h" #include "regexp-macro-assembler.h" #include "regexp-macro-assembler-tracer.h" #include "regexp-macro-assembler-irregexp.h" @@ -2730,9 +2729,8 @@ Handle RegExpEngine::Compile(RegExpParseResult* input, input->capture_count); #endif } - byte codes[1024]; - IrregexpAssembler assembler(Vector(codes, 1024)); - RegExpMacroAssemblerIrregexp macro_assembler(&assembler); + EmbeddedVector codes; + RegExpMacroAssemblerIrregexp macro_assembler(codes); return compiler.Assemble(¯o_assembler, node, input->capture_count); diff --git a/src/assembler-irregexp-inl.h b/src/regexp-macro-assembler-irregexp-inl.h similarity index 84% rename from src/assembler-irregexp-inl.h rename to src/regexp-macro-assembler-irregexp-inl.h index eb54b8178..faf12da66 100644 --- a/src/assembler-irregexp-inl.h +++ b/src/regexp-macro-assembler-irregexp-inl.h @@ -31,13 +31,12 @@ #include "v8.h" #include "ast.h" #include "bytecodes-irregexp.h" -#include "assembler-irregexp.h" namespace v8 { namespace internal { -void IrregexpAssembler::Emit(uint32_t byte) { +void RegExpMacroAssemblerIrregexp::Emit(uint32_t byte) { ASSERT(pc_ <= buffer_.length()); if (pc_ == buffer_.length()) { Expand(); @@ -46,7 +45,7 @@ void IrregexpAssembler::Emit(uint32_t byte) { } -void IrregexpAssembler::Emit16(uint32_t word) { +void RegExpMacroAssemblerIrregexp::Emit16(uint32_t word) { ASSERT(pc_ <= buffer_.length()); if (pc_ + 1 >= buffer_.length()) { Expand(); @@ -56,7 +55,7 @@ void IrregexpAssembler::Emit16(uint32_t word) { } -void IrregexpAssembler::Emit32(uint32_t word) { +void RegExpMacroAssemblerIrregexp::Emit32(uint32_t word) { ASSERT(pc_ <= buffer_.length()); if (pc_ + 3 >= buffer_.length()) { Expand(); @@ -66,17 +65,4 @@ void IrregexpAssembler::Emit32(uint32_t word) { } -void IrregexpAssembler::EmitOrLink(Label* l) { - if (l->is_bound()) { - Emit32(l->pos()); - } else { - int pos = 0; - if (l->is_linked()) { - pos = l->pos(); - } - l->link_to(pc_); - Emit32(pos); - } -} - } } // namespace v8::internal diff --git a/src/regexp-macro-assembler-irregexp.cc b/src/regexp-macro-assembler-irregexp.cc index ce7357cc1..91923ca8f 100644 --- a/src/regexp-macro-assembler-irregexp.cc +++ b/src/regexp-macro-assembler-irregexp.cc @@ -28,15 +28,21 @@ #include "v8.h" #include "ast.h" #include "bytecodes-irregexp.h" -#include "assembler-irregexp.h" -#include "assembler-irregexp-inl.h" #include "regexp-macro-assembler.h" #include "regexp-macro-assembler-irregexp.h" +#include "regexp-macro-assembler-irregexp-inl.h" namespace v8 { namespace internal { +RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector buffer) + : buffer_(buffer), + pc_(0), + own_buffer_(false) { +} + + RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() { } @@ -48,96 +54,136 @@ RegExpMacroAssemblerIrregexp::Implementation() { void RegExpMacroAssemblerIrregexp::Bind(Label* l) { - assembler_->Bind(l); + ASSERT(!l->is_bound()); + if (l->is_linked()) { + int pos = l->pos(); + while (pos != 0) { + int fixup = pos; + pos = Load32(buffer_.start() + fixup); + Store32(buffer_.start() + fixup, pc_); + } + } + l->bind_to(pc_); } void RegExpMacroAssemblerIrregexp::EmitOrLink(Label* l) { - assembler_->EmitOrLink(l); + if (l->is_bound()) { + Emit32(l->pos()); + } else { + int pos = 0; + if (l->is_linked()) { + pos = l->pos(); + } + l->link_to(pc_); + Emit32(pos); + } } void RegExpMacroAssemblerIrregexp::PopRegister(int register_index) { - assembler_->PopRegister(register_index); + Emit(BC_POP_REGISTER); + Emit(register_index); } void RegExpMacroAssemblerIrregexp::PushRegister(int register_index) { - assembler_->PushRegister(register_index); + ASSERT(register_index >= 0); + Emit(BC_PUSH_REGISTER); + Emit(register_index); } void RegExpMacroAssemblerIrregexp::WriteCurrentPositionToRegister( int register_index) { - assembler_->WriteCurrentPositionToRegister(register_index); + ASSERT(register_index >= 0); + Emit(BC_SET_REGISTER_TO_CP); + Emit(register_index); + Emit32(0); // Current position offset. } void RegExpMacroAssemblerIrregexp::ReadCurrentPositionFromRegister( int register_index) { - assembler_->ReadCurrentPositionFromRegister(register_index); + ASSERT(register_index >= 0); + Emit(BC_SET_CP_TO_REGISTER); + Emit(register_index); } void RegExpMacroAssemblerIrregexp::WriteStackPointerToRegister( int register_index) { - assembler_->WriteStackPointerToRegister(register_index); + ASSERT(register_index >= 0); + Emit(BC_SET_REGISTER_TO_SP); + Emit(register_index); } void RegExpMacroAssemblerIrregexp::ReadStackPointerFromRegister( int register_index) { - assembler_->ReadStackPointerFromRegister(register_index); + ASSERT(register_index >= 0); + Emit(BC_SET_SP_TO_REGISTER); + Emit(register_index); } void RegExpMacroAssemblerIrregexp::SetRegister(int register_index, int to) { - assembler_->SetRegister(register_index, to); + ASSERT(register_index >= 0); + Emit(BC_SET_REGISTER); + Emit(register_index); + Emit32(to); } void RegExpMacroAssemblerIrregexp::AdvanceRegister(int register_index, int by) { - assembler_->AdvanceRegister(register_index, by); + ASSERT(register_index >= 0); + Emit(BC_ADVANCE_REGISTER); + Emit(register_index); + Emit32(by); } void RegExpMacroAssemblerIrregexp::PopCurrentPosition() { - assembler_->PopCurrentPosition(); + Emit(BC_POP_CP); } void RegExpMacroAssemblerIrregexp::PushCurrentPosition() { - assembler_->PushCurrentPosition(); + Emit(BC_PUSH_CP); + Emit32(0); // Current position offset. } void RegExpMacroAssemblerIrregexp::Backtrack() { - assembler_->PopBacktrack(); + Emit(BC_POP_BT); } void RegExpMacroAssemblerIrregexp::GoTo(Label* l) { - assembler_->GoTo(l); + Emit(BC_GOTO); + EmitOrLink(l); } void RegExpMacroAssemblerIrregexp::PushBacktrack(Label* l) { - assembler_->PushBacktrack(l); + Emit(BC_PUSH_BT); + EmitOrLink(l); } void RegExpMacroAssemblerIrregexp::Succeed() { - assembler_->Succeed(); + Emit(BC_SUCCEED); } void RegExpMacroAssemblerIrregexp::Fail() { - assembler_->Fail(); + Emit(BC_FAIL); } void RegExpMacroAssemblerIrregexp::AdvanceCurrentPosition(int by) { - assembler_->AdvanceCP(by); + Emit(BC_ADVANCE_CP); + Emit32(by); } @@ -145,43 +191,57 @@ void RegExpMacroAssemblerIrregexp::CheckCurrentPosition( int register_index, Label* on_equal) { // TODO(erikcorry): Implement. - UNREACHABLE(); + UNIMPLEMENTED(); } void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset, Label* on_failure) { - assembler_->LoadCurrentChar(cp_offset, on_failure); + Emit(BC_LOAD_CURRENT_CHAR); + Emit32(cp_offset); + EmitOrLink(on_failure); } void RegExpMacroAssemblerIrregexp::CheckCharacterLT(uc16 limit, Label* on_less) { - assembler_->CheckCharacterLT(limit, on_less); + Emit(BC_CHECK_LT); + Emit16(limit); + EmitOrLink(on_less); } void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit, Label* on_greater) { - assembler_->CheckCharacterGT(limit, on_greater); + Emit(BC_CHECK_GT); + Emit16(limit); + EmitOrLink(on_greater); } void RegExpMacroAssemblerIrregexp::CheckCharacter(uc16 c, Label* on_equal) { - assembler_->CheckCharacter(c, on_equal); + Emit(BC_CHECK_CHAR); + Emit16(c); + EmitOrLink(on_equal); } void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uc16 c, Label* on_not_equal) { - assembler_->CheckNotCharacter(c, on_not_equal); + Emit(BC_CHECK_NOT_CHAR); + Emit16(c); + EmitOrLink(on_not_equal); } -void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterOr(uc16 c, - uc16 mask, - Label* on_not_equal) { - assembler_->OrThenCheckNotCharacter(c, mask, on_not_equal); +void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterOr( + uc16 c, + uc16 mask, + Label* on_not_equal) { + Emit(BC_OR_CHECK_NOT_CHAR); + Emit16(c); + Emit16(mask); + EmitOrLink(on_not_equal); } @@ -189,34 +249,44 @@ void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr( uc16 c, uc16 mask, Label* on_not_equal) { - assembler_->MinusOrThenCheckNotCharacter(c, mask, on_not_equal); + Emit(BC_MINUS_OR_CHECK_NOT_CHAR); + Emit16(c); + Emit16(mask); + EmitOrLink(on_not_equal); } void RegExpMacroAssemblerIrregexp::CheckNotBackReference(int start_reg, Label* on_not_equal) { - assembler_->CheckNotBackReference(start_reg, on_not_equal); + Emit(BC_CHECK_NOT_BACK_REF); + Emit(start_reg); + EmitOrLink(on_not_equal); } void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase( int start_reg, Label* on_not_equal) { - assembler_->CheckNotBackReferenceNoCase(start_reg, on_not_equal); + Emit(BC_CHECK_NOT_BACK_REF_NO_CASE); + Emit(start_reg); + EmitOrLink(on_not_equal); } void RegExpMacroAssemblerIrregexp::CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal) { - assembler_->CheckNotRegistersEqual(reg1, reg2, on_not_equal); + Emit(BC_CHECK_NOT_REGS_EQUAL); + Emit(reg1); + Emit(reg2); + EmitOrLink(on_not_equal); } void RegExpMacroAssemblerIrregexp::CheckBitmap(uc16 start, Label* bitmap, Label* on_zero) { - assembler_->LookupMap1(start, bitmap, on_zero); + UNIMPLEMENTED(); } @@ -224,7 +294,7 @@ void RegExpMacroAssemblerIrregexp::DispatchHalfNibbleMap( uc16 start, Label* half_nibble_map, const Vector& table) { - assembler_->LookupMap2(start, half_nibble_map, table); + UNIMPLEMENTED(); } @@ -232,7 +302,7 @@ void RegExpMacroAssemblerIrregexp::DispatchByteMap( uc16 start, Label* byte_map, const Vector& table) { - assembler_->LookupMap8(start, byte_map, table); + UNIMPLEMENTED(); } @@ -240,7 +310,7 @@ void RegExpMacroAssemblerIrregexp::DispatchHighByteMap( byte start, Label* byte_map, const Vector& table) { - assembler_->LookupHighMap8(start, byte_map, table); + UNIMPLEMENTED(); } @@ -249,32 +319,65 @@ void RegExpMacroAssemblerIrregexp::CheckCharacters( int cp_offset, Label* on_failure) { for (int i = str.length() - 1; i >= 0; i--) { - assembler_->LoadCurrentChar(cp_offset + i, on_failure); - assembler_->CheckNotCharacter(str[i], on_failure); + Emit(BC_LOAD_CURRENT_CHAR); + Emit32(cp_offset + i); + EmitOrLink(on_failure); + Emit(BC_CHECK_NOT_CHAR); + Emit16(str[i]); + EmitOrLink(on_failure); } } void RegExpMacroAssemblerIrregexp::IfRegisterLT(int register_index, int comparand, - Label* if_less_than) { + Label* on_less_than) { ASSERT(comparand >= 0 && comparand <= 65535); - assembler_->CheckRegisterLT(register_index, comparand, if_less_than); + Emit(BC_CHECK_REGISTER_LT); + Emit(register_index); + Emit16(comparand); + EmitOrLink(on_less_than); } void RegExpMacroAssemblerIrregexp::IfRegisterGE(int register_index, int comparand, - Label* if_greater_or_equal) { + Label* on_greater_or_equal) { ASSERT(comparand >= 0 && comparand <= 65535); - assembler_->CheckRegisterGE(register_index, comparand, if_greater_or_equal); + Emit(BC_CHECK_REGISTER_GE); + Emit(register_index); + Emit16(comparand); + EmitOrLink(on_greater_or_equal); } Handle RegExpMacroAssemblerIrregexp::GetCode() { - Handle array = Factory::NewByteArray(assembler_->length()); - assembler_->Copy(array->GetDataStartAddress()); + Handle array = Factory::NewByteArray(length()); + Copy(array->GetDataStartAddress()); return array; } + +int RegExpMacroAssemblerIrregexp::length() { + return pc_; +} + + +void RegExpMacroAssemblerIrregexp::Copy(Address a) { + memcpy(a, buffer_.start(), length()); +} + + +void RegExpMacroAssemblerIrregexp::Expand() { + bool old_buffer_was_our_own = own_buffer_; + Vector old_buffer = buffer_; + buffer_ = Vector::New(old_buffer.length() * 2); + own_buffer_ = true; + memcpy(buffer_.start(), old_buffer.start(), old_buffer.length()); + if (old_buffer_was_our_own) { + old_buffer.Dispose(); + } +} + + } } // namespace v8::internal diff --git a/src/regexp-macro-assembler-irregexp.h b/src/regexp-macro-assembler-irregexp.h index 56692d9ba..8b572a901 100644 --- a/src/regexp-macro-assembler-irregexp.h +++ b/src/regexp-macro-assembler-irregexp.h @@ -33,9 +33,20 @@ namespace v8 { namespace internal { class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { public: - explicit RegExpMacroAssemblerIrregexp(IrregexpAssembler* assembler) - : assembler_(assembler) { - } + // Create an assembler. Instructions and relocation information are emitted + // into a buffer, with the instructions starting from the beginning and the + // relocation information starting from the end of the buffer. See CodeDesc + // for a detailed comment on the layout (globals.h). + // + // If the provided buffer is NULL, the assembler allocates and grows its own + // buffer, and buffer_size determines the initial buffer size. The buffer is + // owned by the assembler and deallocated upon destruction of the assembler. + // + // If the provided buffer is not NULL, the assembler uses the provided buffer + // for code generation and assumes its size to be buffer_size. If the buffer + // is too small, a fatal error occurs. No deallocation of the buffer is done + // upon destruction of the assembler. + explicit RegExpMacroAssemblerIrregexp(Vector); virtual ~RegExpMacroAssemblerIrregexp(); virtual void Bind(Label* label); virtual void EmitOrLink(Label* label); @@ -88,7 +99,25 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler { virtual IrregexpImplementation Implementation(); virtual Handle GetCode(); private: - IrregexpAssembler* assembler_; + void Expand(); + // Code and bitmap emission. + inline void Emit32(uint32_t x); + inline void Emit16(uint32_t x); + inline void Emit(uint32_t x); + // Bytecode buffer. + int length(); + void Copy(Address a); + + + + // The buffer into which code and relocation info are generated. + Vector buffer_; + // The program counter. + int pc_; + // True if the assembler owns the buffer, false if buffer is external. + bool own_buffer_; + + DISALLOW_IMPLICIT_CONSTRUCTORS(RegExpMacroAssemblerIrregexp); }; } } // namespace v8::internal diff --git a/test/cctest/test-regexp.cc b/test/cctest/test-regexp.cc index 5aab5fb26..4f075a1fb 100644 --- a/test/cctest/test-regexp.cc +++ b/test/cctest/test-regexp.cc @@ -36,7 +36,6 @@ #include "parser.h" #include "ast.h" #include "jsregexp-inl.h" -#include "assembler-irregexp.h" #include "regexp-macro-assembler.h" #include "regexp-macro-assembler-irregexp.h" #ifdef ARM @@ -519,146 +518,10 @@ TEST(DispatchTableConstruction) { } -TEST(Assembler) { - V8::Initialize(NULL); - byte codes[1024]; - IrregexpAssembler assembler(Vector(codes, 1024)); -#define __ assembler. - Label advance; - Label look_for_foo; - Label fail; - __ GoTo(&look_for_foo); - __ Bind(&advance); - __ AdvanceCP(1); - __ Bind(&look_for_foo); - __ LoadCurrentChar(0, &fail); - __ CheckNotCharacter('f', &advance); - __ LoadCurrentChar(1, &fail); - __ CheckNotCharacter('o', &advance); - __ LoadCurrentChar(2, &fail); - __ CheckNotCharacter('o', &advance); - __ WriteCurrentPositionToRegister(0); - __ WriteCurrentPositionToRegister(1, 2); - __ Succeed(); - __ Bind(&fail); - __ Fail(); - - v8::HandleScope scope; - Handle array = Factory::NewByteArray(assembler.length()); - assembler.Copy(array->GetDataStartAddress()); - int captures[2]; - - Handle f1 = - Factory::NewStringFromAscii(CStrVector("Now is the time")); - Handle f1_16 = RegExpImpl::StringToTwoByte(f1); - CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0)); - - Handle f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz")); - Handle f2_16 = RegExpImpl::StringToTwoByte(f2); - CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0)); - CHECK_EQ(0, captures[0]); - CHECK_EQ(2, captures[1]); - - Handle f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery")); - Handle f3_16 = RegExpImpl::StringToTwoByte(f3); - CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0)); - CHECK_EQ(3, captures[0]); - CHECK_EQ(5, captures[1]); -} - - -TEST(Assembler2) { - V8::Initialize(NULL); - byte codes[1024]; - IrregexpAssembler assembler(Vector(codes, 1024)); -#define __ assembler. - // /^.*foo/ - Label more_dots; - Label unwind_dot; - Label failure; - Label foo; - Label foo_failed; - Label dot_match; - // ^ - __ PushCurrentPosition(); - __ PushRegister(0); - __ WriteCurrentPositionToRegister(0); - __ PushBacktrack(&failure); - __ GoTo(&dot_match); - // .* - __ Bind(&more_dots); - __ AdvanceCP(1); - __ Bind(&dot_match); - __ PushCurrentPosition(); - __ PushBacktrack(&unwind_dot); - __ LoadCurrentChar(0, &foo); - __ CheckNotCharacter('\n', &more_dots); - // foo - __ Bind(&foo); - __ CheckNotCharacter('f', &foo_failed); - __ LoadCurrentChar(1, &foo_failed); - __ CheckNotCharacter('o', &foo_failed); - __ LoadCurrentChar(2, &foo_failed); - __ CheckNotCharacter('o', &foo_failed); - __ WriteCurrentPositionToRegister(1, 2); - __ Succeed(); - __ Break(); - - __ Bind(&foo_failed); - __ PopBacktrack(); - __ Break(); - - __ Bind(&unwind_dot); - __ PopCurrentPosition(); - __ LoadCurrentChar(0, &foo_failed); - __ GoTo(&foo); - - __ Bind(&failure); - __ PopRegister(0); - __ PopCurrentPosition(); - __ Fail(); - - v8::HandleScope scope; - Handle array = Factory::NewByteArray(assembler.length()); - assembler.Copy(array->GetDataStartAddress()); - int captures[2]; - - Handle f1 = - Factory::NewStringFromAscii(CStrVector("Now is the time")); - Handle f1_16 = RegExpImpl::StringToTwoByte(f1); - CHECK(!IrregexpInterpreter::Match(array, f1_16, captures, 0)); - - Handle f2 = Factory::NewStringFromAscii(CStrVector("foo bar baz")); - Handle f2_16 = RegExpImpl::StringToTwoByte(f2); - CHECK(IrregexpInterpreter::Match(array, f2_16, captures, 0)); - CHECK_EQ(0, captures[0]); - CHECK_EQ(2, captures[1]); - - Handle f3 = Factory::NewStringFromAscii(CStrVector("tomfoolery")); - Handle f3_16 = RegExpImpl::StringToTwoByte(f3); - CHECK(IrregexpInterpreter::Match(array, f3_16, captures, 0)); - CHECK_EQ(0, captures[0]); - CHECK_EQ(5, captures[1]); - - Handle f4 = - Factory::NewStringFromAscii(CStrVector("football buffoonery")); - Handle f4_16 = RegExpImpl::StringToTwoByte(f4); - CHECK(IrregexpInterpreter::Match(array, f4_16, captures, 0)); - CHECK_EQ(0, captures[0]); - CHECK_EQ(14, captures[1]); - - Handle f5 = - Factory::NewStringFromAscii(CStrVector("walking\nbarefoot")); - Handle f5_16 = RegExpImpl::StringToTwoByte(f5); - CHECK(!IrregexpInterpreter::Match(array, f5_16, captures, 0)); -} - - TEST(MacroAssembler) { V8::Initialize(NULL); byte codes[1024]; - IrregexpAssembler assembler(Vector(codes, 1024)); - RegExpMacroAssemblerIrregexp m(&assembler); + RegExpMacroAssemblerIrregexp m(Vector(codes, 1024)); // ^f(o)o. Label fail, fail2, start; uc16 foo_chars[3]; @@ -695,8 +558,7 @@ TEST(MacroAssembler) { v8::HandleScope scope; - Handle array = Factory::NewByteArray(assembler.length()); - assembler.Copy(array->GetDataStartAddress()); + Handle array = Handle::cast(m.GetCode()); int captures[5]; Handle f1 = diff --git a/tools/visual_studio/v8_base.vcproj b/tools/visual_studio/v8_base.vcproj index f8f8041da..912fca09d 100644 --- a/tools/visual_studio/v8_base.vcproj +++ b/tools/visual_studio/v8_base.vcproj @@ -297,15 +297,7 @@ > - - - -