__ IncrementCounter(isolate->counters()->regexp_entry_native(), 1, r0, r2);
// Isolates: note we add an additional parameter here (isolate pointer).
- const int kRegExpExecuteArguments = 8;
+ const int kRegExpExecuteArguments = 9;
const int kParameterRegisters = 4;
__ EnterExitFrame(false, kRegExpExecuteArguments - kParameterRegisters);
// Stack pointer now points to cell where return address is to be written.
// Arguments are before that on the stack or in registers.
- // Argument 8 (sp[16]): Pass current isolate address.
+ // Argument 9 (sp[20]): Pass current isolate address.
__ mov(r0, Operand(ExternalReference::isolate_address()));
- __ str(r0, MemOperand(sp, 4 * kPointerSize));
+ __ str(r0, MemOperand(sp, 5 * kPointerSize));
- // Argument 7 (sp[12]): Indicate that this is a direct call from JavaScript.
+ // Argument 8 (sp[16]): Indicate that this is a direct call from JavaScript.
__ mov(r0, Operand(1));
- __ str(r0, MemOperand(sp, 3 * kPointerSize));
+ __ str(r0, MemOperand(sp, 4 * kPointerSize));
- // Argument 6 (sp[8]): Start (high end) of backtracking stack memory area.
+ // Argument 7 (sp[12]): Start (high end) of backtracking stack memory area.
__ mov(r0, Operand(address_of_regexp_stack_memory_address));
__ ldr(r0, MemOperand(r0, 0));
__ mov(r2, Operand(address_of_regexp_stack_memory_size));
__ ldr(r2, MemOperand(r2, 0));
__ add(r0, r0, Operand(r2));
+ __ str(r0, MemOperand(sp, 3 * kPointerSize));
+
+ // Argument 6: Set the number of capture registers to zero to force global
+ // regexps to behave as non-global. This does not affect non-global regexps.
+ __ mov(r0, Operand(0));
__ str(r0, MemOperand(sp, 2 * kPointerSize));
// Argument 5 (sp[4]): static offsets vector buffer.
// Check the result.
Label success;
- __ cmp(r0, Operand(NativeRegExpMacroAssembler::SUCCESS));
+ __ cmp(r0, Operand(1));
+ // We expect exactly one result since we force the called regexp to behave
+ // as non-global.
__ b(eq, &success);
Label failure;
__ cmp(r0, Operand(NativeRegExpMacroAssembler::FAILURE));
-// Copyright 2009 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
#ifndef V8_INTERPRETED_REGEXP
/*
* This assembler uses the following register assignment convention
+ * - r4 : Temporarily stores the index of capture start after a matching pass
+ * for a global regexp.
* - r5 : Pointer to current code object (Code*) including heap object tag.
* - r6 : Current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character offset!
* - r7 : Currently loaded character. Must be loaded using
* LoadCurrentCharacter before using any of the dispatch methods.
- * - r8 : points to tip of backtrack stack
+ * - r8 : Points to tip of backtrack stack
* - r9 : Unused, might be used by C code and expected unchanged.
* - r10 : End of input (points to byte after last character in input).
* - r11 : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
* - r12 : IP register, used by assembler. Very volatile.
- * - r13/sp : points to tip of C stack.
+ * - r13/sp : Points to tip of C stack.
*
* The remaining registers are free for computations.
* Each call to a public method should retain this convention.
*
* The stack will have the following structure:
- * - fp[52] Isolate* isolate (Address of the current isolate)
- * - fp[48] direct_call (if 1, direct call from JavaScript code,
- * if 0, call through the runtime system).
- * - fp[44] stack_area_base (High end of the memory area to use as
- * backtracking stack).
+ * - fp[56] Isolate* isolate (address of the current isolate)
+ * - fp[52] direct_call (if 1, direct call from JavaScript code,
+ * if 0, call through the runtime system).
+ * - fp[48] stack_area_base (high end of the memory area to use as
+ * backtracking stack).
+ * - fp[44] capture array size (may fit multiple sets of matches)
* - fp[40] int* capture_array (int[num_saved_registers_], for output).
* - fp[36] secondary link/return address used by native call.
* --- sp when called ---
- * - fp[32] return address (lr).
- * - fp[28] old frame pointer (r11).
+ * - fp[32] return address (lr).
+ * - fp[28] old frame pointer (r11).
* - fp[0..24] backup of registers r4..r10.
* --- frame pointer ----
- * - fp[-4] end of input (Address of end of string).
- * - fp[-8] start of input (Address of first character in string).
+ * - fp[-4] end of input (address of end of string).
+ * - fp[-8] start of input (address of first character in string).
* - fp[-12] start index (character index of start).
* - fp[-16] void* input_string (location of a handle containing the string).
- * - fp[-20] Offset of location before start of input (effectively character
+ * - fp[-20] success counter (only for global regexps to count matches).
+ * - fp[-24] Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a
* non-position.
- * - fp[-24] At start (if 1, we are starting at the start of the
+ * - fp[-28] At start (if 1, we are starting at the start of the
* string, otherwise 0)
- * - fp[-28] register 0 (Only positions must be stored in the first
+ * - fp[-32] register 0 (Only positions must be stored in the first
* - register 1 num_saved_registers_ registers)
* - ...
* - register num_registers-1
void RegExpMacroAssemblerARM::CheckAtStart(Label* on_at_start) {
Label not_at_start;
// Did we start the match at the start of the string at all?
- __ ldr(r0, MemOperand(frame_pointer(), kAtStart));
+ __ ldr(r0, MemOperand(frame_pointer(), kStartIndex));
__ cmp(r0, Operand(0, RelocInfo::NONE));
- BranchOrBacktrack(eq, ¬_at_start);
+ BranchOrBacktrack(ne, ¬_at_start);
// If we did, are we still at the start of the input?
__ ldr(r1, MemOperand(frame_pointer(), kInputStart));
void RegExpMacroAssemblerARM::CheckNotAtStart(Label* on_not_at_start) {
// Did we start the match at the start of the string at all?
- __ ldr(r0, MemOperand(frame_pointer(), kAtStart));
+ __ ldr(r0, MemOperand(frame_pointer(), kStartIndex));
__ cmp(r0, Operand(0, RelocInfo::NONE));
- BranchOrBacktrack(eq, on_not_at_start);
+ BranchOrBacktrack(ne, on_not_at_start);
// If we did, are we still at the start of the input?
__ ldr(r1, MemOperand(frame_pointer(), kInputStart));
__ add(r0, end_of_input_address(), Operand(current_input_offset()));
Handle<HeapObject> RegExpMacroAssemblerARM::GetCode(Handle<String> source) {
+ Label return_r0;
// Finalize code - write the entry point code now we know how many
// registers we need.
// Set frame pointer in space for it if this is not a direct call
// from generated code.
__ add(frame_pointer(), sp, Operand(4 * kPointerSize));
+ __ mov(r0, Operand(0, RelocInfo::NONE));
+ __ push(r0); // Make room for success counter and initialize it to 0.
__ push(r0); // Make room for "position - 1" constant (value is irrelevant).
- __ push(r0); // Make room for "at start" constant (value is irrelevant).
// Check if we have space on the stack for registers.
Label stack_limit_hit;
Label stack_ok;
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ mov(r0, Operand(EXCEPTION));
- __ jmp(&exit_label_);
+ __ jmp(&return_r0);
__ bind(&stack_limit_hit);
CallCheckStackGuardState(r0);
__ cmp(r0, Operand(0, RelocInfo::NONE));
// If returned value is non-zero, we exit with the returned value as result.
- __ b(ne, &exit_label_);
+ __ b(ne, &return_r0);
__ bind(&stack_ok);
// position registers.
__ str(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
- // Determine whether the start index is zero, that is at the start of the
- // string, and store that value in a local variable.
- __ cmp(r1, Operand(0));
- __ mov(r1, Operand(1), LeaveCC, eq);
- __ mov(r1, Operand(0, RelocInfo::NONE), LeaveCC, ne);
- __ str(r1, MemOperand(frame_pointer(), kAtStart));
+ // Initialize code pointer register
+ __ mov(code_pointer(), Operand(masm_->CodeObject()));
+
+ Label load_char_start_regexp, start_regexp;
+ // Load newline if index is at start, previous character otherwise.
+ __ cmp(r1, Operand(0, RelocInfo::NONE));
+ __ b(ne, &load_char_start_regexp);
+ __ mov(current_character(), Operand('\n'), LeaveCC, eq);
+ __ jmp(&start_regexp);
+
+ // Global regexp restarts matching here.
+ __ bind(&load_char_start_regexp);
+ // Load previous char as initial value of current character register.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&start_regexp);
+ // Initialize on-stack registers.
if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
// Fill saved registers with initial value = start offset - 1
-
- // Address of register 0.
- __ add(r1, frame_pointer(), Operand(kRegisterZero));
- __ mov(r2, Operand(num_saved_registers_));
- Label init_loop;
- __ bind(&init_loop);
- __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex));
- __ sub(r2, r2, Operand(1), SetCC);
- __ b(ne, &init_loop);
+ if (num_saved_registers_ > 8) {
+ // Address of register 0.
+ __ add(r1, frame_pointer(), Operand(kRegisterZero));
+ __ mov(r2, Operand(num_saved_registers_));
+ Label init_loop;
+ __ bind(&init_loop);
+ __ str(r0, MemOperand(r1, kPointerSize, NegPostIndex));
+ __ sub(r2, r2, Operand(1), SetCC);
+ __ b(ne, &init_loop);
+ } else {
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ str(r0, register_location(i));
+ }
+ }
}
// Initialize backtrack stack pointer.
__ ldr(backtrack_stackpointer(), MemOperand(frame_pointer(), kStackHighEnd));
- // Initialize code pointer register
- __ mov(code_pointer(), Operand(masm_->CodeObject()));
- // Load previous char as initial value of current character register.
- Label at_start;
- __ ldr(r0, MemOperand(frame_pointer(), kAtStart));
- __ cmp(r0, Operand(0, RelocInfo::NONE));
- __ b(ne, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ mov(current_character(), Operand('\n'));
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
for (int i = 0; i < num_saved_registers_; i += 2) {
__ ldr(r2, register_location(i));
__ ldr(r3, register_location(i + 1));
+ if (global()) {
+ // Keep capture start in r4 for the zero-length check later.
+ __ mov(r4, r2);
+ }
if (mode_ == UC16) {
__ add(r2, r1, Operand(r2, ASR, 1));
__ add(r3, r1, Operand(r3, ASR, 1));
__ str(r3, MemOperand(r0, kPointerSize, PostIndex));
}
}
- __ mov(r0, Operand(SUCCESS));
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ __ ldr(r1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ __ ldr(r2, MemOperand(frame_pointer(), kRegisterOutput));
+ // Increment success counter.
+ __ add(r0, r0, Operand(1));
+ __ str(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ sub(r1, r1, Operand(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmp(r1, Operand(num_saved_registers_));
+ __ b(lt, &return_r0);
+
+ __ str(r1, MemOperand(frame_pointer(), kNumOutputRegisters));
+ // Advance the location for output.
+ __ add(r2, r2, Operand(num_saved_registers_ * kPointerSize));
+ __ str(r2, MemOperand(frame_pointer(), kRegisterOutput));
+
+ // Prepare r0 to initialize registers with its value in the next run.
+ __ ldr(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
+ // Special case for zero-length matches.
+ // r4: capture start index
+ __ cmp(current_input_offset(), r4);
+ // Not a zero-length match, restart.
+ __ b(ne, &load_char_start_regexp);
+ // Offset from the end is zero if we already reached the end.
+ __ cmp(current_input_offset(), Operand(0));
+ __ b(eq, &exit_label_);
+ // Advance current position after a zero-length match.
+ __ add(current_input_offset(),
+ current_input_offset(),
+ Operand((mode_ == UC16) ? 2 : 1));
+ __ b(&load_char_start_regexp);
+ } else {
+ __ mov(r0, Operand(SUCCESS));
+ }
}
+
// Exit and return r0
__ bind(&exit_label_);
+ if (global()) {
+ __ ldr(r0, MemOperand(frame_pointer(), kSuccessfulCaptures));
+ }
+
+ __ bind(&return_r0);
// Skip sp past regexp registers and local variables..
__ mov(sp, frame_pointer());
// Restore registers r4..r11 and return (restoring lr to pc).
__ cmp(r0, Operand(0, RelocInfo::NONE));
// If returning non-zero, we should end execution with the given
// result as return value.
- __ b(ne, &exit_label_);
+ __ b(ne, &return_r0);
// String might have moved: Reload end of string from frame.
__ ldr(end_of_input_address(), MemOperand(frame_pointer(), kInputEnd));
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ mov(r0, Operand(EXCEPTION));
- __ jmp(&exit_label_);
+ __ jmp(&return_r0);
}
CodeDesc code_desc;
}
-void RegExpMacroAssemblerARM::Succeed() {
+bool RegExpMacroAssemblerARM::Succeed() {
__ jmp(&success_label_);
+ return global();
}
int characters) {
Register offset = current_input_offset();
if (cp_offset != 0) {
- __ add(r0, current_input_offset(), Operand(cp_offset * char_size()));
- offset = r0;
+ // r4 is not being used to store the capture start index at this point.
+ __ add(r4, current_input_offset(), Operand(cp_offset * char_size()));
+ offset = r4;
}
// The ldr, str, ldrh, strh instructions can do unaligned accesses, if the CPU
// and the operating system running on the target allow it.
-// Copyright 2006-2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static const int kSecondaryReturnAddress = kReturnAddress + kPointerSize;
// Stack parameters placed by caller.
static const int kRegisterOutput = kSecondaryReturnAddress + kPointerSize;
- static const int kStackHighEnd = kRegisterOutput + kPointerSize;
+ static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
+ static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
static const int kInputString = kStartIndex - kPointerSize;
// When adding local variables remember to push space for them in
// the frame in GetCode.
- static const int kInputStartMinusOne = kInputString - kPointerSize;
- static const int kAtStart = kInputStartMinusOne - kPointerSize;
+ static const int kSuccessfulCaptures = kInputString - kPointerSize;
+ static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
- static const int kRegisterZero = kAtStart - kPointerSize;
+ static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
// Initial size of code buffer.
static const size_t kRegExpCodeSize = 1024;
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
(entry(p0, p1, p2, p3, p4))
typedef int (*arm_regexp_matcher)(String*, int, const byte*, const byte*,
- void*, int*, Address, int, Isolate*);
+ void*, int*, int, Address, int, Isolate*);
// Call the generated regexp code directly. The code at the entry address
// should act as a function matching the type arm_regexp_matcher.
// The fifth argument is a dummy that reserves the space used for
// the return address added by the ExitFrame in native calls.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
(FUNCTION_CAST<arm_regexp_matcher>(entry)( \
- p0, p1, p2, p3, NULL, p4, p5, p6, p7))
+ p0, p1, p2, p3, NULL, p4, p5, p6, p7, p8))
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
reinterpret_cast<TryCatch*>(try_catch_address)
reinterpret_cast<Object*>(Simulator::current(Isolate::Current())->Call( \
FUNCTION_ADDR(entry), 5, p0, p1, p2, p3, p4))
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
Simulator::current(Isolate::Current())->Call( \
- entry, 9, p0, p1, p2, p3, NULL, p4, p5, p6, p7)
+ entry, 10, p0, p1, p2, p3, NULL, p4, p5, p6, p7, p8)
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
try_catch_address == NULL ? \
__ IncrementCounter(counters->regexp_entry_native(), 1);
// Isolates: note we add an additional parameter here (isolate pointer).
- static const int kRegExpExecuteArguments = 8;
+ static const int kRegExpExecuteArguments = 9;
__ EnterApiExitFrame(kRegExpExecuteArguments);
- // Argument 8: Pass current isolate address.
- __ mov(Operand(esp, 7 * kPointerSize),
+ // Argument 9: Pass current isolate address.
+ __ mov(Operand(esp, 8 * kPointerSize),
Immediate(ExternalReference::isolate_address()));
- // Argument 7: Indicate that this is a direct call from JavaScript.
- __ mov(Operand(esp, 6 * kPointerSize), Immediate(1));
+ // Argument 8: Indicate that this is a direct call from JavaScript.
+ __ mov(Operand(esp, 7 * kPointerSize), Immediate(1));
- // Argument 6: Start (high end) of backtracking stack memory area.
+ // Argument 7: Start (high end) of backtracking stack memory area.
__ mov(esi, Operand::StaticVariable(address_of_regexp_stack_memory_address));
__ add(esi, Operand::StaticVariable(address_of_regexp_stack_memory_size));
- __ mov(Operand(esp, 5 * kPointerSize), esi);
+ __ mov(Operand(esp, 6 * kPointerSize), esi);
+
+ // Argument 6: Set the number of capture registers to zero to force global
+ // regexps to behave as non-global. This does not affect non-global regexps.
+ __ mov(Operand(esp, 5 * kPointerSize), Immediate(0));
// Argument 5: static offsets vector buffer.
__ mov(Operand(esp, 4 * kPointerSize),
// Check the result.
Label success;
- __ cmp(eax, NativeRegExpMacroAssembler::SUCCESS);
+ __ cmp(eax, 1);
+ // We expect exactly one result since we force the called regexp to behave
+ // as non-global.
__ j(equal, &success);
Label failure;
__ cmp(eax, NativeRegExpMacroAssembler::FAILURE);
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
#ifndef V8_INTERPRETED_REGEXP
/*
* This assembler uses the following register assignment convention
- * - edx : current character. Must be loaded using LoadCurrentCharacter
- * before using any of the dispatch methods.
- * - edi : current position in input, as negative offset from end of string.
+ * - edx : Current character. Must be loaded using LoadCurrentCharacter
+ * before using any of the dispatch methods. Temporarily stores the
+ * index of capture start after a matching pass for a global regexp.
+ * - edi : Current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character offset!
* - esi : end of input (points to byte after last character in input).
- * - ebp : frame pointer. Used to access arguments, local variables and
+ * - ebp : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
- * - esp : points to tip of C stack.
- * - ecx : points to tip of backtrack stack
+ * - esp : Points to tip of C stack.
+ * - ecx : Points to tip of backtrack stack
*
* The registers eax and ebx are free to use for computations.
*
* Each call to a public method should retain this convention.
* The stack will have the following structure:
- * - Isolate* isolate (Address of the current isolate)
+ * - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0
* call through the runtime system)
- * - stack_area_base (High end of the memory area to use as
+ * - stack_area_base (high end of the memory area to use as
* backtracking stack)
+ * - capture array size (may fit multiple sets of matches)
* - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (Address of end of string)
- * - start of input (Address of first character in string)
+ * - end of input (address of end of string)
+ * - start of input (address of first character in string)
* - start index (character index of start)
* - String* input_string (location of a handle containing the string)
* --- frame alignment (if applicable) ---
* - backup of caller esi
* - backup of caller edi
* - backup of caller ebx
+ * - success counter (only for global regexps to count matches).
* - Offset of location before start of input (effectively character
* position -1). Used to initialize capture registers to a non-position.
- * - register 0 ebp[-4] (Only positions must be stored in the first
+ * - register 0 ebp[-4] (only positions must be stored in the first
* - register 1 ebp[-8] num_saved_registers_ registers)
* - ...
*
void RegExpMacroAssemblerIA32::Fail() {
- ASSERT(FAILURE == 0); // Return value for failure is zero.
- __ Set(eax, Immediate(0));
+ STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
+ if (!global()) {
+ __ Set(eax, Immediate(FAILURE));
+ }
__ jmp(&exit_label_);
}
Handle<HeapObject> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
+ Label return_eax;
// Finalize code - write the entry point code now we know how many
// registers we need.
__ push(esi);
__ push(edi);
__ push(ebx); // Callee-save on MacOS.
+ __ push(Immediate(0)); // Number of successful matches in a global regexp.
__ push(Immediate(0)); // Make room for "input start - 1" constant.
// Check if we have space on the stack for registers.
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ mov(eax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_eax);
__ bind(&stack_limit_hit);
CallCheckStackGuardState(ebx);
__ or_(eax, eax);
// If returned value is non-zero, we exit with the returned value as result.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_eax);
__ bind(&stack_ok);
// Load start index for later use.
// position registers.
__ mov(Operand(ebp, kInputStartMinusOne), eax);
+ Label load_char_start_regexp, start_regexp;
+ // Load newline if index is at start, previous character otherwise.
+ __ cmp(Operand(ebp, kStartIndex), Immediate(0));
+ __ j(not_equal, &load_char_start_regexp, Label::kNear);
+ __ mov(current_character(), '\n');
+ __ jmp(&start_regexp, Label::kNear);
+
+ // Global regexp restarts matching here.
+ __ bind(&load_char_start_regexp);
+ // Load previous char as initial value of current character register.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&start_regexp);
+
+ // Initialize on-stack registers.
if (num_saved_registers_ > 0) { // Always is, if generated from a regexp.
// Fill saved registers with initial value = start offset - 1
// Fill in stack push order, to avoid accessing across an unwritten
// page (a problem on Windows).
- __ mov(ecx, kRegisterZero);
- Label init_loop;
- __ bind(&init_loop);
- __ mov(Operand(ebp, ecx, times_1, +0), eax);
- __ sub(ecx, Immediate(kPointerSize));
- __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
- __ j(greater, &init_loop);
- }
- // Ensure that we have written to each stack page, in order. Skipping a page
- // on Windows can cause segmentation faults. Assuming page size is 4k.
- const int kPageSize = 4096;
- const int kRegistersPerPage = kPageSize / kPointerSize;
- for (int i = num_saved_registers_ + kRegistersPerPage - 1;
- i < num_registers_;
- i += kRegistersPerPage) {
- __ mov(register_location(i), eax); // One write every page.
+ if (num_saved_registers_ > 8) {
+ __ mov(ecx, kRegisterZero);
+ Label init_loop;
+ __ bind(&init_loop);
+ __ mov(Operand(ebp, ecx, times_1, 0), eax);
+ __ sub(ecx, Immediate(kPointerSize));
+ __ cmp(ecx, kRegisterZero - num_saved_registers_ * kPointerSize);
+ __ j(greater, &init_loop);
+ } else { // Unroll the loop.
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ mov(register_location(i), eax);
+ }
+ }
}
-
// Initialize backtrack stack pointer.
__ mov(backtrack_stackpointer(), Operand(ebp, kStackHighEnd));
- // Load previous char as initial value of current-character.
- Label at_start;
- __ cmp(Operand(ebp, kStartIndex), Immediate(0));
- __ j(equal, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ mov(current_character(), '\n');
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
}
for (int i = 0; i < num_saved_registers_; i++) {
__ mov(eax, register_location(i));
+ if (i == 0 && global()) {
+ // Keep capture start in edx for the zero-length check later.
+ __ mov(edx, eax);
+ }
// Convert to index from start of string, not end.
__ add(eax, ecx);
if (mode_ == UC16) {
__ mov(Operand(ebx, i * kPointerSize), eax);
}
}
- __ mov(eax, Immediate(SUCCESS));
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ // Increment success counter.
+ __ inc(Operand(ebp, kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ mov(ecx, Operand(ebp, kNumOutputRegisters));
+ __ sub(ecx, Immediate(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmp(ecx, Immediate(num_saved_registers_));
+ __ j(less, &exit_label_);
+
+ __ mov(Operand(ebp, kNumOutputRegisters), ecx);
+ // Advance the location for output.
+ __ add(Operand(ebp, kRegisterOutput),
+ Immediate(num_saved_registers_ * kPointerSize));
+
+ // Prepare eax to initialize registers with its value in the next run.
+ __ mov(eax, Operand(ebp, kInputStartMinusOne));
+
+ // Special case for zero-length matches.
+ // edx: capture start index
+ __ cmp(edi, edx);
+ // Not a zero-length match, restart.
+ __ j(not_equal, &load_char_start_regexp);
+ // edi (offset from the end) is zero if we already reached the end.
+ __ test(edi, edi);
+ __ j(zero, &exit_label_, Label::kNear);
+ // Advance current position after a zero-length match.
+ if (mode_ == UC16) {
+ __ add(edi, Immediate(2));
+ } else {
+ __ inc(edi);
+ }
+ __ jmp(&load_char_start_regexp);
+ } else {
+ __ mov(eax, Immediate(SUCCESS));
+ }
}
- // Exit and return eax
+
__ bind(&exit_label_);
+ if (global()) {
+ // Return the number of successful captures.
+ __ mov(eax, Operand(ebp, kSuccessfulCaptures));
+ }
+
+ __ bind(&return_eax);
// Skip esp past regexp registers.
__ lea(esp, Operand(ebp, kBackup_ebx));
// Restore callee-save registers.
__ or_(eax, eax);
// If returning non-zero, we should end execution with the given
// result as return value.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_eax);
__ pop(edi);
__ pop(backtrack_stackpointer());
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ mov(eax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_eax);
}
CodeDesc code_desc;
}
-void RegExpMacroAssemblerIA32::Succeed() {
+bool RegExpMacroAssemblerIA32::Succeed() {
__ jmp(&success_label_);
+ return global();
}
-// Copyright 2008-2009 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static const int kInputStart = kStartIndex + kPointerSize;
static const int kInputEnd = kInputStart + kPointerSize;
static const int kRegisterOutput = kInputEnd + kPointerSize;
- static const int kStackHighEnd = kRegisterOutput + kPointerSize;
+ // For the case of global regular expression, we have room to store at least
+ // one set of capture results. For the case of non-global regexp, we ignore
+ // this value.
+ static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
+ static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
// Below the frame pointer - local stack variables.
static const int kBackup_esi = kFramePointer - kPointerSize;
static const int kBackup_edi = kBackup_esi - kPointerSize;
static const int kBackup_ebx = kBackup_edi - kPointerSize;
- static const int kInputStartMinusOne = kBackup_ebx - kPointerSize;
+ static const int kSuccessfulCaptures = kBackup_ebx - kPointerSize;
+ static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
typedef int (*regexp_matcher)(String*, int, const byte*,
- const byte*, int*, Address, int, Isolate*);
+ const byte*, int*, int, Address, int, Isolate*);
// Call the generated regexp code directly. The code at the entry address should
// expect eight int/pointer sized arguments and return an int.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
- (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7))
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
+ (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7, p8))
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
// SerializerDeserializer state.
static const int kPartialSnapshotCacheCapacity = 1400;
- static const int kJSRegexpStaticOffsetsVectorSize = 50;
+ static const int kJSRegexpStaticOffsetsVectorSize = 128;
Address external_callback() {
return thread_local_top_.external_callback_;
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
RegExpEngine::CompilationResult result =
RegExpEngine::Compile(&compile_data,
flags.is_ignore_case(),
+ flags.is_global(),
flags.is_multiline(),
pattern,
sample_subject,
}
-RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(
+int RegExpImpl::GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
+ int registers_per_match,
+ int* max_matches) {
+#ifdef V8_INTERPRETED_REGEXP
+ // Global loop in interpreted regexp is not implemented. Therefore we choose
+ // the size of the offsets vector so that it can only store one match.
+ *max_matches = 1;
+ return registers_per_match;
+#else // V8_INTERPRETED_REGEXP
+ int size = Max(registers_per_match, OffsetsVector::kStaticOffsetsVectorSize);
+ *max_matches = size / registers_per_match;
+ return size;
+#endif // V8_INTERPRETED_REGEXP
+}
+
+
+int RegExpImpl::IrregexpExecRaw(
Handle<JSRegExp> regexp,
Handle<String> subject,
int index,
OffsetsVector registers(required_registers, isolate);
- IrregexpResult res = RegExpImpl::IrregexpExecOnce(
+ int res = RegExpImpl::IrregexpExecRaw(
jsregexp, subject, previous_index, Vector<int>(registers.vector(),
registers.length()));
if (res == RE_SUCCESS) {
RegExpEngine::CompilationResult RegExpEngine::Compile(
RegExpCompileData* data,
bool ignore_case,
+ bool is_global,
bool is_multiline,
Handle<String> pattern,
Handle<String> sample_subject,
macro_assembler.SetCurrentPositionFromEnd(max_length);
}
+ macro_assembler.set_global(is_global);
+
return compiler.Assemble(¯o_assembler,
node,
data->capture_count,
static int IrregexpPrepare(Handle<JSRegExp> regexp,
Handle<String> subject);
- // Execute a regular expression once on the subject, starting from
- // character "index".
- // If successful, returns RE_SUCCESS and set the capture positions
- // in the first registers.
+ // Calculate the size of offsets vector for the case of global regexp
+ // and the number of matches this vector is able to store.
+ static int GlobalOffsetsVectorSize(Handle<JSRegExp> regexp,
+ int registers_per_match,
+ int* max_matches);
+
+ // Execute a regular expression on the subject, starting from index.
+ // If matching succeeds, return the number of matches. This can be larger
+ // than one in the case of global regular expressions.
+ // The captures and subcaptures are stored into the registers vector.
// If matching fails, returns RE_FAILURE.
// If execution fails, sets a pending exception and returns RE_EXCEPTION.
- static IrregexpResult IrregexpExecOnce(Handle<JSRegExp> regexp,
- Handle<String> subject,
- int index,
- Vector<int> registers);
+ static int IrregexpExecRaw(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ int index,
+ Vector<int> registers);
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
static CompilationResult Compile(RegExpCompileData* input,
bool ignore_case,
+ bool global,
bool multiline,
Handle<String> pattern,
Handle<String> sample_subject,
inline int* vector() { return vector_; }
inline int length() { return offsets_vector_length_; }
- static const int kStaticOffsetsVectorSize = 50;
+ static const int kStaticOffsetsVectorSize =
+ Isolate::kJSRegexpStaticOffsetsVectorSize;
private:
static Address static_offsets_vector_address(Isolate* isolate) {
}
-void RegExpMacroAssemblerIrregexp::Succeed() {
+bool RegExpMacroAssemblerIrregexp::Succeed() {
Emit(BC_SUCCEED, 0);
+ return false; // Restart matching for global regexp not supported.
}
-// Copyright 2008-2009 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
virtual void Backtrack();
virtual void GoTo(Label* label);
virtual void PushBacktrack(Label* label);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void Fail();
virtual void PopRegister(int register_index);
virtual void PushRegister(int register_index,
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
}
-void RegExpMacroAssemblerTracer::Succeed() {
- PrintF(" Succeed();\n");
- assembler_->Succeed();
+bool RegExpMacroAssemblerTracer::Succeed() {
+ bool restart = assembler_->Succeed();
+ PrintF(" Succeed();%s\n", restart ? " [restart for global match]" : "");
+ return restart;
}
void RegExpMacroAssemblerTracer::Fail() {
- PrintF(" Fail();\n");
+ PrintF(" Fail();");
assembler_->Fail();
}
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
namespace v8 {
namespace internal {
-RegExpMacroAssembler::RegExpMacroAssembler() : slow_safe_compiler_(false) {
+RegExpMacroAssembler::RegExpMacroAssembler()
+ : slow_safe_compiler_(false),
+ global_(false) {
}
input_start,
input_end,
offsets_vector,
+ offsets_vector_length,
isolate);
return res;
}
const byte* input_start,
const byte* input_end,
int* output,
+ int output_size,
Isolate* isolate) {
ASSERT(isolate == Isolate::Current());
// Ensure that the minimum stack has been allocated.
input_start,
input_end,
output,
+ output_size,
stack_base,
direct_call,
isolate);
- ASSERT(result <= SUCCESS);
ASSERT(result >= RETRY);
if (result == EXCEPTION && !isolate->has_pending_exception()) {
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
virtual void ReadStackPointerFromRegister(int reg) = 0;
virtual void SetCurrentPositionFromEnd(int by) = 0;
virtual void SetRegister(int register_index, int to) = 0;
- virtual void Succeed() = 0;
+ // Return whether the matching (with a global regexp) will be restarted.
+ virtual bool Succeed() = 0;
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset) = 0;
virtual void ClearRegisters(int reg_from, int reg_to) = 0;
virtual void WriteStackPointerToRegister(int reg) = 0;
void set_slow_safe(bool ssc) { slow_safe_compiler_ = ssc; }
bool slow_safe() { return slow_safe_compiler_; }
+ // Set whether the regular expression has the global flag. Exiting due to
+ // a failure in a global regexp may still mean success overall.
+ void set_global(bool global) { global_ = global; }
+ bool global() { return global_; }
+
private:
bool slow_safe_compiler_;
+ bool global_;
};
const byte* input_start,
const byte* input_end,
int* output,
+ int output_size,
Isolate* isolate);
};
}
-static RegExpImpl::IrregexpResult SearchRegExpNoCaptureMultiple(
+static int SearchRegExpNoCaptureMultiple(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
Handle<JSArray> last_match_array,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
+ ASSERT(regexp->CaptureCount() == 0);
int match_start = -1;
int match_end = 0;
int pos = 0;
- int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
- if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
-
- OffsetsVector registers(required_registers, isolate);
+ int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
+ if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
+
+ int max_matches;
+ int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
+ registers_per_match,
+ &max_matches);
+ OffsetsVector registers(num_registers, isolate);
Vector<int32_t> register_vector(registers.vector(), registers.length());
int subject_length = subject->length();
bool first = true;
-
for (;;) { // Break on failure, return on exception.
- RegExpImpl::IrregexpResult result =
- RegExpImpl::IrregexpExecOnce(regexp,
- subject,
- pos,
- register_vector);
- if (result == RegExpImpl::RE_SUCCESS) {
- match_start = register_vector[0];
- builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
- if (match_end < match_start) {
- ReplacementStringBuilder::AddSubjectSlice(builder,
- match_end,
- match_start);
- }
- match_end = register_vector[1];
- HandleScope loop_scope(isolate);
- if (!first) {
- builder->Add(*isolate->factory()->NewProperSubString(subject,
- match_start,
- match_end));
- } else {
- builder->Add(*isolate->factory()->NewSubString(subject,
- match_start,
- match_end));
+ int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+ subject,
+ pos,
+ register_vector);
+ if (num_matches > 0) {
+ for (int match_index = 0; match_index < num_matches; match_index++) {
+ int32_t* current_match = ®ister_vector[match_index * 2];
+ match_start = current_match[0];
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ if (match_end < match_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ match_start);
+ }
+ match_end = current_match[1];
+ HandleScope loop_scope(isolate);
+ if (!first) {
+ builder->Add(*isolate->factory()->NewProperSubString(subject,
+ match_start,
+ match_end));
+ } else {
+ builder->Add(*isolate->factory()->NewSubString(subject,
+ match_start,
+ match_end));
+ first = false;
+ }
}
+
+ // If we did not get the maximum number of matches, we can stop here
+ // since there are no matches left.
+ if (num_matches < max_matches) break;
+
if (match_start != match_end) {
pos = match_end;
} else {
pos = match_end + 1;
if (pos > subject_length) break;
}
- } else if (result == RegExpImpl::RE_FAILURE) {
+ } else if (num_matches == 0) {
break;
} else {
- ASSERT_EQ(result, RegExpImpl::RE_EXCEPTION);
- return result;
+ ASSERT_EQ(num_matches, RegExpImpl::RE_EXCEPTION);
+ return RegExpImpl::RE_EXCEPTION;
}
- first = false;
}
if (match_start >= 0) {
// Only called from Runtime_RegExpExecMultiple so it doesn't need to maintain
// separate last match info. See comment on that function.
-static RegExpImpl::IrregexpResult SearchRegExpMultiple(
+static int SearchRegExpMultiple(
Isolate* isolate,
Handle<String> subject,
Handle<JSRegExp> regexp,
FixedArrayBuilder* builder) {
ASSERT(subject->IsFlat());
- int required_registers = RegExpImpl::IrregexpPrepare(regexp, subject);
- if (required_registers < 0) return RegExpImpl::RE_EXCEPTION;
-
- OffsetsVector registers(required_registers, isolate);
+ int registers_per_match = RegExpImpl::IrregexpPrepare(regexp, subject);
+ if (registers_per_match < 0) return RegExpImpl::RE_EXCEPTION;
+
+ int max_matches;
+ int num_registers = RegExpImpl::GlobalOffsetsVectorSize(regexp,
+ registers_per_match,
+ &max_matches);
+ OffsetsVector registers(num_registers, isolate);
Vector<int32_t> register_vector(registers.vector(), registers.length());
- RegExpImpl::IrregexpResult result =
- RegExpImpl::IrregexpExecOnce(regexp,
- subject,
- 0,
- register_vector);
+ int num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+ subject,
+ 0,
+ register_vector);
int capture_count = regexp->CaptureCount();
int subject_length = subject->length();
int pos = 0;
// End of previous match. Differs from pos if match was empty.
int match_end = 0;
- if (result == RegExpImpl::RE_SUCCESS) {
- bool first = true;
+ bool first = true;
+
+ if (num_matches > 0) {
do {
- int match_start = register_vector[0];
- builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
- if (match_end < match_start) {
- ReplacementStringBuilder::AddSubjectSlice(builder,
- match_end,
- match_start);
- }
- match_end = register_vector[1];
-
- {
- // Avoid accumulating new handles inside loop.
- HandleScope temp_scope(isolate);
- // Arguments array to replace function is match, captures, index and
- // subject, i.e., 3 + capture count in total.
- Handle<FixedArray> elements =
- isolate->factory()->NewFixedArray(3 + capture_count);
- Handle<String> match;
- if (!first) {
- match = isolate->factory()->NewProperSubString(subject,
- match_start,
- match_end);
- } else {
- match = isolate->factory()->NewSubString(subject,
- match_start,
- match_end);
+ int match_start = 0;
+ for (int match_index = 0; match_index < num_matches; match_index++) {
+ int32_t* current_match =
+ ®ister_vector[match_index * registers_per_match];
+ match_start = current_match[0];
+ builder->EnsureCapacity(kMaxBuilderEntriesPerRegExpMatch);
+ if (match_end < match_start) {
+ ReplacementStringBuilder::AddSubjectSlice(builder,
+ match_end,
+ match_start);
}
- elements->set(0, *match);
- for (int i = 1; i <= capture_count; i++) {
- int start = register_vector[i * 2];
- if (start >= 0) {
- int end = register_vector[i * 2 + 1];
- ASSERT(start <= end);
- Handle<String> substring;
- if (!first) {
- substring = isolate->factory()->NewProperSubString(subject,
- start,
- end);
+ match_end = current_match[1];
+
+ {
+ // Avoid accumulating new handles inside loop.
+ HandleScope temp_scope(isolate);
+ // Arguments array to replace function is match, captures, index and
+ // subject, i.e., 3 + capture count in total.
+ Handle<FixedArray> elements =
+ isolate->factory()->NewFixedArray(3 + capture_count);
+ Handle<String> match;
+ if (!first) {
+ match = isolate->factory()->NewProperSubString(subject,
+ match_start,
+ match_end);
+ } else {
+ match = isolate->factory()->NewSubString(subject,
+ match_start,
+ match_end);
+ first = false;
+ }
+ elements->set(0, *match);
+ for (int i = 1; i <= capture_count; i++) {
+ int start = current_match[i * 2];
+ if (start >= 0) {
+ int end = current_match[i * 2 + 1];
+ ASSERT(start <= end);
+ Handle<String> substring =
+ isolate->factory()->NewProperSubString(subject, start, end);
+ elements->set(i, *substring);
} else {
- substring = isolate->factory()->NewSubString(subject, start, end);
+ ASSERT(current_match[i * 2 + 1] < 0);
+ elements->set(i, isolate->heap()->undefined_value());
}
- elements->set(i, *substring);
- } else {
- ASSERT(register_vector[i * 2 + 1] < 0);
- elements->set(i, isolate->heap()->undefined_value());
}
+ elements->set(capture_count + 1, Smi::FromInt(match_start));
+ elements->set(capture_count + 2, *subject);
+ builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
}
- elements->set(capture_count + 1, Smi::FromInt(match_start));
- elements->set(capture_count + 2, *subject);
- builder->Add(*isolate->factory()->NewJSArrayWithElements(elements));
}
+ // If we did not get the maximum number of matches, we can stop here
+ // since there are no matches left.
+ if (num_matches < max_matches) break;
+
if (match_end > match_start) {
pos = match_end;
} else {
}
}
- result = RegExpImpl::IrregexpExecOnce(regexp,
- subject,
- pos,
- register_vector);
- first = false;
- } while (result == RegExpImpl::RE_SUCCESS);
+ num_matches = RegExpImpl::IrregexpExecRaw(regexp,
+ subject,
+ pos,
+ register_vector);
+ } while (num_matches > 0);
- if (result != RegExpImpl::RE_EXCEPTION) {
+ if (num_matches != RegExpImpl::RE_EXCEPTION) {
// Finished matching, with at least one match.
if (match_end < subject_length) {
ReplacementStringBuilder::AddSubjectSlice(builder,
}
}
// No matches at all, return failure or exception result directly.
- return result;
+ return num_matches;
}
ASSERT_EQ(regexp->TypeTag(), JSRegExp::IRREGEXP);
- RegExpImpl::IrregexpResult result;
+ int result;
if (regexp->CaptureCount() == 0) {
result = SearchRegExpNoCaptureMultiple(isolate,
subject,
__ IncrementCounter(counters->regexp_entry_native(), 1);
// Isolates: note we add an additional parameter here (isolate pointer).
- static const int kRegExpExecuteArguments = 8;
+ static const int kRegExpExecuteArguments = 9;
int argument_slots_on_stack =
masm->ArgumentStackSlotsForCFunctionCall(kRegExpExecuteArguments);
__ EnterApiExitFrame(argument_slots_on_stack);
- // Argument 8: Pass current isolate address.
+ // Argument 9: Pass current isolate address.
// __ movq(Operand(rsp, (argument_slots_on_stack - 1) * kPointerSize),
// Immediate(ExternalReference::isolate_address()));
__ LoadAddress(kScratchRegister, ExternalReference::isolate_address());
__ movq(Operand(rsp, (argument_slots_on_stack - 1) * kPointerSize),
kScratchRegister);
- // Argument 7: Indicate that this is a direct call from JavaScript.
+ // Argument 8: Indicate that this is a direct call from JavaScript.
__ movq(Operand(rsp, (argument_slots_on_stack - 2) * kPointerSize),
Immediate(1));
- // Argument 6: Start (high end) of backtracking stack memory area.
+ // Argument 7: Start (high end) of backtracking stack memory area.
__ movq(kScratchRegister, address_of_regexp_stack_memory_address);
__ movq(r9, Operand(kScratchRegister, 0));
__ movq(kScratchRegister, address_of_regexp_stack_memory_size);
__ addq(r9, Operand(kScratchRegister, 0));
- // Argument 6 passed in r9 on Linux and on the stack on Windows.
-#ifdef _WIN64
__ movq(Operand(rsp, (argument_slots_on_stack - 3) * kPointerSize), r9);
+
+ // Argument 6: Set the number of capture registers to zero to force global
+ // regexps to behave as non-global. This does not affect non-global regexps.
+ // Argument 6 is passed in r9 on Linux and on the stack on Windows.
+#ifdef _WIN64
+ __ movq(Operand(rsp, (argument_slots_on_stack - 4) * kPointerSize),
+ Immediate(0));
+#else
+ __ Set(r9, 0);
#endif
// Argument 5: static offsets vector buffer.
ExternalReference::address_of_static_offsets_vector(isolate));
// Argument 5 passed in r8 on Linux and on the stack on Windows.
#ifdef _WIN64
- __ movq(Operand(rsp, (argument_slots_on_stack - 4) * kPointerSize), r8);
+ __ movq(Operand(rsp, (argument_slots_on_stack - 5) * kPointerSize), r8);
#endif
// First four arguments are passed in registers on both Linux and Windows.
// Check the result.
Label success;
Label exception;
- __ cmpl(rax, Immediate(NativeRegExpMacroAssembler::SUCCESS));
+ __ cmpl(rax, Immediate(1));
+ // We expect exactly one result since we force the called regexp to behave
+ // as non-global.
__ j(equal, &success, Label::kNear);
__ cmpl(rax, Immediate(NativeRegExpMacroAssembler::EXCEPTION));
__ j(equal, &exception);
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
/*
* This assembler uses the following register assignment convention
- * - rdx : currently loaded character(s) as ASCII or UC16. Must be loaded using
- * LoadCurrentCharacter before using any of the dispatch methods.
- * - rdi : current position in input, as negative offset from end of string.
+ * - rdx : Currently loaded character(s) as ASCII or UC16. Must be loaded
+ * using LoadCurrentCharacter before using any of the dispatch methods.
+ * Temporarily stores the index of capture start after a matching pass
+ * for a global regexp.
+ * - rdi : Current position in input, as negative offset from end of string.
* Please notice that this is the byte offset, not the character
- * offset! Is always a 32-bit signed (negative) offset, but must be
+ * offset! Is always a 32-bit signed (negative) offset, but must be
* maintained sign-extended to 64 bits, since it is used as index.
- * - rsi : end of input (points to byte after last character in input),
+ * - rsi : End of input (points to byte after last character in input),
* so that rsi+rdi points to the current character.
- * - rbp : frame pointer. Used to access arguments, local variables and
+ * - rbp : Frame pointer. Used to access arguments, local variables and
* RegExp registers.
- * - rsp : points to tip of C stack.
- * - rcx : points to tip of backtrack stack. The backtrack stack contains
- * only 32-bit values. Most are offsets from some base (e.g., character
+ * - rsp : Points to tip of C stack.
+ * - rcx : Points to tip of backtrack stack. The backtrack stack contains
+ * only 32-bit values. Most are offsets from some base (e.g., character
* positions from end of string or code location from Code* pointer).
- * - r8 : code object pointer. Used to convert between absolute and
+ * - r8 : Code object pointer. Used to convert between absolute and
* code-object-relative addresses.
*
* The registers rax, rbx, r9 and r11 are free to use for computations.
*
* The stack will have the following content, in some order, indexable from the
* frame pointer (see, e.g., kStackHighEnd):
- * - Isolate* isolate (Address of the current isolate)
+ * - Isolate* isolate (address of the current isolate)
* - direct_call (if 1, direct call from JavaScript code, if 0 call
* through the runtime system)
- * - stack_area_base (High end of the memory area to use as
+ * - stack_area_base (high end of the memory area to use as
* backtracking stack)
+ * - capture array size (may fit multiple sets of matches)
* - int* capture_array (int[num_saved_registers_], for output).
- * - end of input (Address of end of string)
- * - start of input (Address of first character in string)
+ * - end of input (address of end of string)
+ * - start of input (address of first character in string)
* - start index (character index of start)
* - String* input_string (input string)
* - return address
* - backup of callee save registers (rbx, possibly rsi and rdi).
+ * - success counter (only useful for global regexp to count matches)
* - Offset of location before start of input (effectively character
- * position -1). Used to initialize capture registers to a non-position.
+ * position -1). Used to initialize capture registers to a non-position.
* - At start of string (if 1, we are starting at the start of the
* string, otherwise 0)
* - register 0 rbp[-n] (Only positions must be stored in the first
*
* The first num_saved_registers_ registers are initialized to point to
* "character -1" in the string (i.e., char_size() bytes before the first
- * character of the string). The remaining registers starts out uninitialized.
+ * character of the string). The remaining registers starts out uninitialized.
*
* The first seven values must be provided by the calling code by
* calling the code's entry address cast to a function pointer with the
void RegExpMacroAssemblerX64::Fail() {
- ASSERT(FAILURE == 0); // Return value for failure is zero.
- __ Set(rax, 0);
+ STATIC_ASSERT(FAILURE == 0); // Return value for failure is zero.
+ if (!global()) {
+ __ Set(rax, FAILURE);
+ }
__ jmp(&exit_label_);
}
Handle<HeapObject> RegExpMacroAssemblerX64::GetCode(Handle<String> source) {
+ Label return_rax;
// Finalize code - write the entry point code now we know how many
// registers we need.
// Entry code:
ASSERT_EQ(kInputStart, -3 * kPointerSize);
ASSERT_EQ(kInputEnd, -4 * kPointerSize);
ASSERT_EQ(kRegisterOutput, -5 * kPointerSize);
- ASSERT_EQ(kStackHighEnd, -6 * kPointerSize);
+ ASSERT_EQ(kNumOutputRegisters, -6 * kPointerSize);
__ push(rdi);
__ push(rsi);
__ push(rdx);
__ push(rbx); // Callee-save
#endif
+ __ push(Immediate(0)); // Number of successful matches in a global regexp.
__ push(Immediate(0)); // Make room for "at start" constant.
// Check if we have space on the stack for registers.
// Exit with OutOfMemory exception. There is not enough space on the stack
// for our working registers.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
__ bind(&stack_limit_hit);
__ Move(code_object_pointer(), masm_.CodeObject());
CallCheckStackGuardState(); // Preserves no registers beside rbp and rsp.
__ testq(rax, rax);
// If returned value is non-zero, we exit with the returned value as result.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
__ bind(&stack_ok);
// position registers.
__ movq(Operand(rbp, kInputStartMinusOne), rax);
+ // Initialize code object pointer.
+ __ Move(code_object_pointer(), masm_.CodeObject());
+
+ Label load_char_start_regexp, start_regexp;
+ // Load newline if index is at start, previous character otherwise.
+ __ cmpb(Operand(rbp, kStartIndex), Immediate(0));
+ __ j(not_equal, &load_char_start_regexp, Label::kNear);
+ __ Set(current_character(), '\n');
+ __ jmp(&start_regexp, Label::kNear);
+
+ // Global regexp restarts matching here.
+ __ bind(&load_char_start_regexp);
+ // Load previous char as initial value of current character register.
+ LoadCurrentCharacterUnchecked(-1, 1);
+ __ bind(&start_regexp);
+
+ // Initialize on-stack registers.
if (num_saved_registers_ > 0) {
// Fill saved registers with initial value = start offset - 1
// Fill in stack push order, to avoid accessing across an unwritten
// page (a problem on Windows).
- __ Set(rcx, kRegisterZero);
- Label init_loop;
- __ bind(&init_loop);
- __ movq(Operand(rbp, rcx, times_1, 0), rax);
- __ subq(rcx, Immediate(kPointerSize));
- __ cmpq(rcx,
- Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
- __ j(greater, &init_loop);
- }
- // Ensure that we have written to each stack page, in order. Skipping a page
- // on Windows can cause segmentation faults. Assuming page size is 4k.
- const int kPageSize = 4096;
- const int kRegistersPerPage = kPageSize / kPointerSize;
- for (int i = num_saved_registers_ + kRegistersPerPage - 1;
- i < num_registers_;
- i += kRegistersPerPage) {
- __ movq(register_location(i), rax); // One write every page.
+ if (num_saved_registers_ > 8) {
+ __ Set(rcx, kRegisterZero);
+ Label init_loop;
+ __ bind(&init_loop);
+ __ movq(Operand(rbp, rcx, times_1, 0), rax);
+ __ subq(rcx, Immediate(kPointerSize));
+ __ cmpq(rcx,
+ Immediate(kRegisterZero - num_saved_registers_ * kPointerSize));
+ __ j(greater, &init_loop);
+ } else { // Unroll the loop.
+ for (int i = 0; i < num_saved_registers_; i++) {
+ __ movq(register_location(i), rax);
+ }
+ }
}
// Initialize backtrack stack pointer.
__ movq(backtrack_stackpointer(), Operand(rbp, kStackHighEnd));
- // Initialize code object pointer.
- __ Move(code_object_pointer(), masm_.CodeObject());
- // Load previous char as initial value of current-character.
- Label at_start;
- __ cmpb(Operand(rbp, kStartIndex), Immediate(0));
- __ j(equal, &at_start);
- LoadCurrentCharacterUnchecked(-1, 1); // Load previous char.
- __ jmp(&start_label_);
- __ bind(&at_start);
- __ Set(current_character(), '\n');
- __ jmp(&start_label_);
+ __ jmp(&start_label_);
// Exit code:
if (success_label_.is_linked()) {
}
for (int i = 0; i < num_saved_registers_; i++) {
__ movq(rax, register_location(i));
+ if (i == 0 && global()) {
+ // Keep capture start in rdx for the zero-length check later.
+ __ movq(rdx, rax);
+ }
__ addq(rax, rcx); // Convert to index from start, not end.
if (mode_ == UC16) {
__ sar(rax, Immediate(1)); // Convert byte index to character index.
__ movl(Operand(rbx, i * kIntSize), rax);
}
}
- __ Set(rax, SUCCESS);
+
+ if (global()) {
+ // Restart matching if the regular expression is flagged as global.
+ // Increment success counter.
+ __ incq(Operand(rbp, kSuccessfulCaptures));
+ // Capture results have been stored, so the number of remaining global
+ // output registers is reduced by the number of stored captures.
+ __ movq(rcx, Operand(rbp, kNumOutputRegisters));
+ __ subq(rcx, Immediate(num_saved_registers_));
+ // Check whether we have enough room for another set of capture results.
+ __ cmpq(rcx, Immediate(num_saved_registers_));
+ __ j(less, &exit_label_);
+
+ __ movq(Operand(rbp, kNumOutputRegisters), rcx);
+ // Advance the location for output.
+ __ addq(Operand(rbp, kRegisterOutput),
+ Immediate(num_saved_registers_ * kIntSize));
+
+ // Prepare rax to initialize registers with its value in the next run.
+ __ movq(rax, Operand(rbp, kInputStartMinusOne));
+
+ // Special case for zero-length matches.
+ // rdx: capture start index
+ __ cmpq(rdi, rdx);
+ // Not a zero-length match, restart.
+ __ j(not_equal, &load_char_start_regexp);
+ // rdi (offset from the end) is zero if we already reached the end.
+ __ testq(rdi, rdi);
+ __ j(zero, &exit_label_, Label::kNear);
+ // Advance current position after a zero-length match.
+ if (mode_ == UC16) {
+ __ addq(rdi, Immediate(2));
+ } else {
+ __ incq(rdi);
+ }
+ __ jmp(&load_char_start_regexp);
+ } else {
+ __ movq(rax, Immediate(SUCCESS));
+ }
}
- // Exit and return rax
__ bind(&exit_label_);
+ if (global()) {
+ // Return the number of successful captures.
+ __ movq(rax, Operand(rbp, kSuccessfulCaptures));
+ }
+ __ bind(&return_rax);
#ifdef _WIN64
// Restore callee save registers.
__ lea(rsp, Operand(rbp, kLastCalleeSaveRegister));
__ testq(rax, rax);
// If returning non-zero, we should end execution with the given
// result as return value.
- __ j(not_zero, &exit_label_);
+ __ j(not_zero, &return_rax);
// Restore registers.
__ Move(code_object_pointer(), masm_.CodeObject());
__ bind(&exit_with_exception);
// Exit with Result EXCEPTION(-1) to signal thrown exception.
__ Set(rax, EXCEPTION);
- __ jmp(&exit_label_);
+ __ jmp(&return_rax);
}
FixupCodeRelativePositions();
}
-void RegExpMacroAssemblerX64::Succeed() {
+bool RegExpMacroAssemblerX64::Succeed() {
__ jmp(&success_label_);
+ return global();
}
-// Copyright 2010 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
virtual void ReadStackPointerFromRegister(int reg);
virtual void SetCurrentPositionFromEnd(int by);
virtual void SetRegister(int register_index, int to);
- virtual void Succeed();
+ virtual bool Succeed();
virtual void WriteCurrentPositionToRegister(int reg, int cp_offset);
virtual void ClearRegisters(int reg_from, int reg_to);
virtual void WriteStackPointerToRegister(int reg);
static const int kInputStart = kStartIndex + kPointerSize;
static const int kInputEnd = kInputStart + kPointerSize;
static const int kRegisterOutput = kInputEnd + kPointerSize;
- static const int kStackHighEnd = kRegisterOutput + kPointerSize;
+ // For the case of global regular expression, we have room to store at least
+ // one set of capture results. For the case of non-global regexp, we ignore
+ // this value.
+ static const int kNumOutputRegisters = kRegisterOutput + kPointerSize;
+ static const int kStackHighEnd = kNumOutputRegisters + kPointerSize;
// DirectCall is passed as 32 bit int (values 0 or 1).
static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
static const int kInputStart = kStartIndex - kPointerSize;
static const int kInputEnd = kInputStart - kPointerSize;
static const int kRegisterOutput = kInputEnd - kPointerSize;
- static const int kStackHighEnd = kRegisterOutput - kPointerSize;
- static const int kDirectCall = kFrameAlign;
+ // For the case of global regular expression, we have room to store at least
+ // one set of capture results. For the case of non-global regexp, we ignore
+ // this value.
+ static const int kNumOutputRegisters = kRegisterOutput - kPointerSize;
+ static const int kStackHighEnd = kFrameAlign;
+ static const int kDirectCall = kStackHighEnd + kPointerSize;
static const int kIsolate = kDirectCall + kPointerSize;
#endif
// AMD64 Calling Convention has only one callee-save register that
// we use. We push this after the frame pointer (and after the
// parameters).
- static const int kBackup_rbx = kStackHighEnd - kPointerSize;
+ static const int kBackup_rbx = kNumOutputRegisters - kPointerSize;
static const int kLastCalleeSaveRegister = kBackup_rbx;
#endif
+ static const int kSuccessfulCaptures = kBackup_rbx - kPointerSize;
// When adding local variables remember to push space for them in
// the frame in GetCode.
- static const int kInputStartMinusOne =
- kLastCalleeSaveRegister - kPointerSize;
+ static const int kInputStartMinusOne = kSuccessfulCaptures - kPointerSize;
// First register address. Following registers are below it on the stack.
static const int kRegisterZero = kInputStartMinusOne - kPointerSize;
-// Copyright 2011 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
(entry(p0, p1, p2, p3, p4))
typedef int (*regexp_matcher)(String*, int, const byte*,
- const byte*, int*, Address, int, Isolate*);
+ const byte*, int*, int, Address, int, Isolate*);
// Call the generated regexp code directly. The code at the entry address should
// expect eight int/pointer sized arguments and return an int.
-#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7) \
- (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7))
+#define CALL_GENERATED_REGEXP_CODE(entry, p0, p1, p2, p3, p4, p5, p6, p7, p8) \
+ (FUNCTION_CAST<regexp_matcher>(entry)(p0, p1, p2, p3, p4, p5, p6, p7, p8))
#define TRY_CATCH_FROM_ADDRESS(try_catch_address) \
(reinterpret_cast<TryCatch*>(try_catch_address))
-// Copyright 2008 the V8 project authors. All rights reserved.
+// Copyright 2012 the V8 project authors. All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
NewStringFromUtf8(CStrVector(input));
Handle<String> sample_subject =
isolate->factory()->NewStringFromUtf8(CStrVector(""));
- RegExpEngine::Compile(
- &compile_data, false, multiline, pattern, sample_subject, is_ascii);
+ RegExpEngine::Compile(&compile_data,
+ false,
+ false,
+ multiline,
+ pattern,
+ sample_subject,
+ is_ascii);
return compile_data.node;
}
input_start,
input_end,
captures,
+ 0,
Isolate::Current());
}
int output[4];
NativeRegExpMacroAssembler::Result result =
Execute(*code,
- *input,
- 0,
- start_adr,
- start_adr + input->length() * 2,
- output);
+ *input,
+ 0,
+ start_adr,
+ start_adr + input->length() * 2,
+ output);
CHECK_EQ(NativeRegExpMacroAssembler::SUCCESS, result);
CHECK_EQ(0, output[0]);
--- /dev/null
+// Copyright 2012 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+// Test that an optional capture is cleared between two matches.
+var str = "ABX X";
+str = str.replace(/(\w)?X/g, function(match, capture) {
+ assertTrue(match.indexOf(capture) >= 0 ||
+ capture === undefined);
+ return capture ? capture.toLowerCase() : "-";
+ });
+assertEquals("Ab -", str);
+
+// Test zero-length matches.
+str = "Als Gregor Samsa eines Morgens";
+str = str.replace(/\b/g, function(match, capture) {
+ return "/";
+ });
+assertEquals("/Als/ /Gregor/ /Samsa/ /eines/ /Morgens/", str);
+
+// Test zero-length matches that have non-zero-length sub-captures.
+str = "It was a pleasure to burn.";
+str = str.replace(/(?=(\w+))\b/g, function(match, capture) {
+ return capture.length;
+ });
+assertEquals("2It 3was 1a 8pleasure 2to 4burn.", str);
+
+// Test multiple captures.
+str = "Try not. Do, or do not. There is no try.";
+str = str.replace(/(not?)|(do)|(try)/gi,
+ function(match, c1, c2, c3) {
+ assertTrue((c1 === undefined && c2 === undefined) ||
+ (c2 === undefined && c3 === undefined) ||
+ (c1 === undefined && c3 === undefined));
+ if (c1) return "-";
+ if (c2) return "+";
+ if (c3) return "="
+ });
+assertEquals("= -. +, or + -. There is - =.", str);
+
+// Test multiple alternate captures.
+str = "FOUR LEGS GOOD, TWO LEGS BAD!";
+str = str.replace(/(FOUR|TWO) LEGS (GOOD|BAD)/g,
+ function(match, num_legs, likeability) {
+ assertTrue(num_legs !== undefined);
+ assertTrue(likeability !== undefined);
+ if (num_legs == "FOUR") assertTrue(likeability == "GOOD");
+ if (num_legs == "TWO") assertTrue(likeability == "BAD");
+ return match.length - 10;
+ });
+assertEquals("4, 2!", str);
+
+
+// The same tests with UC16.
+
+//Test that an optional capture is cleared between two matches.
+str = "AB\u1234 \u1234";
+str = str.replace(/(\w)?\u1234/g,
+ function(match, capture) {
+ assertTrue(match.indexOf(capture) >= 0 ||
+ capture === undefined);
+ return capture ? capture.toLowerCase() : "-";
+ });
+assertEquals("Ab -", str);
+
+// Test zero-length matches.
+str = "Als \u2623\u2642 eines Morgens";
+str = str.replace(/\b/g, function(match, capture) {
+ return "/";
+ });
+assertEquals("/Als/ \u2623\u2642 /eines/ /Morgens/", str);
+
+// Test zero-length matches that have non-zero-length sub-captures.
+str = "It was a pleasure to \u70e7.";
+str = str.replace(/(?=(\w+))\b/g, function(match, capture) {
+ return capture.length;
+ });
+assertEquals("2It 3was 1a 8pleasure 2to \u70e7.", str);
+
+// Test multiple captures.
+str = "Try not. D\u26aa, or d\u26aa not. There is no try.";
+str = str.replace(/(not?)|(d\u26aa)|(try)/gi,
+ function(match, c1, c2, c3) {
+ assertTrue((c1 === undefined && c2 === undefined) ||
+ (c2 === undefined && c3 === undefined) ||
+ (c1 === undefined && c3 === undefined));
+ if (c1) return "-";
+ if (c2) return "+";
+ if (c3) return "="
+ });
+assertEquals("= -. +, or + -. There is - =.", str);
+
+// Test multiple alternate captures.
+str = "FOUR \u817f GOOD, TWO \u817f BAD!";
+str = str.replace(/(FOUR|TWO) \u817f (GOOD|BAD)/g,
+ function(match, num_legs, likeability) {
+ assertTrue(num_legs !== undefined);
+ assertTrue(likeability !== undefined);
+ if (num_legs == "FOUR") assertTrue(likeability == "GOOD");
+ if (num_legs == "TWO") assertTrue(likeability == "BAD");
+ return match.length - 7;
+ });
+assertEquals("4, 2!", str);