__ ldr(r0, MemOperand(frame_pointer(), kInputStart));
// Find negative length (offset of start relative to end).
__ sub(current_input_offset(), r0, end_of_input_address());
- // Set r0 to address of char before start of input
+ // Set r0 to address of char before start of the input string
// (effectively string position -1).
+ __ ldr(r1, MemOperand(frame_pointer(), kStartIndex));
__ sub(r0, current_input_offset(), Operand(char_size()));
+ __ sub(r0, r0, Operand(r1, LSL, (mode_ == UC16) ? 1 : 0));
// Store this value in a local variable, for use when clearing
// position registers.
__ str(r0, MemOperand(frame_pointer(), kInputStartMinusOne));
// Determine whether the start index is zero, that is at the start of the
// string, and store that value in a local variable.
- __ ldr(r1, MemOperand(frame_pointer(), kStartIndex));
__ tst(r1, Operand(r1));
__ mov(r1, Operand(1), LeaveCC, eq);
__ mov(r1, Operand(0), LeaveCC, ne);
// copy captures to output
__ ldr(r1, MemOperand(frame_pointer(), kInputStart));
__ ldr(r0, MemOperand(frame_pointer(), kRegisterOutput));
+ __ ldr(r2, MemOperand(frame_pointer(), kStartIndex));
__ sub(r1, end_of_input_address(), r1);
// r1 is length of input in bytes.
if (mode_ == UC16) {
__ mov(r1, Operand(r1, LSR, 1));
}
// r1 is length of input in characters.
+ __ add(r1, r1, Operand(r2));
+ // r1 is length of string in characters.
ASSERT_EQ(0, num_saved_registers_ % 2);
// Always an even number of capture registers. This allows us to
__ j(negative, &done);
// Read the value from the static offsets vector buffer.
__ mov(edi, Operand(ecx, edx, times_int_size, 0));
- // Perform explicit shift
- ASSERT_EQ(0, kSmiTag);
- __ shl(edi, kSmiTagSize);
- // Add previous index (from its stack slot) if value is not negative.
- Label capture_negative;
- // Carry flag set by shift above.
- __ j(negative, &capture_negative, not_taken);
- __ add(edi, Operand(eax)); // Add previous index (adding smi to smi).
- __ bind(&capture_negative);
+ __ SmiTag(edi);
// Store the smi value in the last match info.
__ mov(FieldOperand(ebx,
edx,
// Smi tagging support.
void SmiTag(Register reg) {
ASSERT(kSmiTag == 0);
- shl(reg, kSmiTagSize);
+ ASSERT(kSmiTagSize == 1);
+ add(reg, Operand(reg));
}
void SmiUntag(Register reg) {
sar(reg, kSmiTagSize);
__ j(not_zero, &exit_label_);
__ bind(&stack_ok);
+ // Load start index for later use.
+ __ mov(ebx, Operand(ebp, kStartIndex));
// Allocate space on stack for registers.
__ sub(Operand(esp), Immediate(num_registers_ * kPointerSize));
__ mov(edi, Operand(ebp, kInputStart));
// Set up edi to be negative offset from string end.
__ sub(edi, Operand(esi));
- // Set eax to address of char before start of input
+
+ // Set eax to address of char before start of the string.
// (effectively string position -1).
- __ lea(eax, Operand(edi, -char_size()));
+ __ neg(ebx);
+ if (mode_ == UC16) {
+ __ lea(eax, Operand(edi, ebx, times_2, -char_size()));
+ } else {
+ __ lea(eax, Operand(edi, ebx, times_1, -char_size()));
+ }
// Store this value in a local variable, for use when clearing
// position registers.
__ mov(Operand(ebp, kInputStartMinusOne), eax);
// Determine whether the start index is zero, that is at the start of the
// string, and store that value in a local variable.
- __ mov(ebx, Operand(ebp, kStartIndex));
__ xor_(Operand(ecx), ecx); // setcc only operates on cl (lower byte of ecx).
+ // Register ebx still holds -stringIndex.
__ test(ebx, Operand(ebx));
__ setcc(zero, ecx); // 1 if 0 (start of string), 0 if positive.
__ mov(Operand(ebp, kAtStart), ecx);
// copy captures to output
__ mov(ebx, Operand(ebp, kRegisterOutput));
__ mov(ecx, Operand(ebp, kInputEnd));
+ __ mov(edx, Operand(ebp, kStartIndex));
__ sub(ecx, Operand(ebp, kInputStart));
+ if (mode_ == UC16) {
+ __ lea(ecx, Operand(ecx, edx, times_2, 0));
+ } else {
+ __ add(ecx, Operand(edx));
+ }
for (int i = 0; i < num_saved_registers_; i++) {
__ mov(eax, register_location(i));
- __ add(eax, Operand(ecx)); // Convert to index from start, not end.
+ // Convert to index from start of string, not end.
+ __ add(eax, Operand(ecx));
if (mode_ == UC16) {
__ sar(eax, 1); // Convert byte index to character index.
}
Handle<String> atom_string = Factory::NewStringFromTwoByte(atom_pattern);
AtomCompile(re, pattern, flags, atom_string);
} else {
- IrregexpPrepare(re, pattern, flags, parse_result.capture_count);
+ IrregexpInitialize(re, pattern, flags, parse_result.capture_count);
}
ASSERT(re->data()->IsFixedArray());
// Compilation succeeded so the data is set on the regexp
}
-void RegExpImpl::IrregexpPrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
- int capture_count) {
+void RegExpImpl::IrregexpInitialize(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ int capture_count) {
// Initialize compiled code entries to null.
Factory::SetRegExpIrregexpData(re,
JSRegExp::IRREGEXP,
}
+int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
+ Handle<String> subject) {
+ if (!subject->IsFlat()) {
+ FlattenString(subject);
+ }
+ bool is_ascii = subject->IsAsciiRepresentation();
+ if (!EnsureCompiledIrregexp(regexp, is_ascii)) {
+ return -1;
+ }
+#ifdef V8_NATIVE_REGEXP
+ // Native regexp only needs room to output captures. Registers are handled
+ // internally.
+ return (IrregexpNumberOfCaptures(FixedArray::cast(regexp->data())) + 1) * 2;
+#else // !V8_NATIVE_REGEXP
+ // Byte-code regexp needs space allocated for all its registers.
+ return IrregexpNumberOfRegisters(FixedArray::cast(regexp->data()));
+#endif // V8_NATIVE_REGEXP
+}
+
+
+RegExpImpl::IrregexpResult RegExpImpl::IrregexpExecOnce(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ int index,
+ Vector<int> output) {
+ Handle<FixedArray> irregexp(FixedArray::cast(regexp->data()));
+
+ ASSERT(index >= 0);
+ ASSERT(index <= subject->length());
+ ASSERT(subject->IsFlat());
+
+#ifdef V8_NATIVE_REGEXP
+ ASSERT(output.length() >=
+ (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
+ do {
+ bool is_ascii = subject->IsAsciiRepresentation();
+ Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii));
+ NativeRegExpMacroAssembler::Result res =
+ NativeRegExpMacroAssembler::Match(code,
+ subject,
+ output.start(),
+ output.length(),
+ index);
+ if (res != NativeRegExpMacroAssembler::RETRY) {
+ ASSERT(res != NativeRegExpMacroAssembler::EXCEPTION ||
+ Top::has_pending_exception());
+ STATIC_ASSERT(
+ static_cast<int>(NativeRegExpMacroAssembler::SUCCESS) == RE_SUCCESS);
+ STATIC_ASSERT(
+ static_cast<int>(NativeRegExpMacroAssembler::FAILURE) == RE_FAILURE);
+ STATIC_ASSERT(static_cast<int>(NativeRegExpMacroAssembler::EXCEPTION)
+ == RE_EXCEPTION);
+ return static_cast<IrregexpResult>(res);
+ }
+ // If result is RETRY, the string has changed representation, and we
+ // must restart from scratch.
+ // In this case, it means we must make sure we are prepared to handle
+ // the, potentially, differen subject (the string can switch between
+ // being internal and external, and even between being ASCII and UC16,
+ // but the characters are always the same).
+ IrregexpPrepare(regexp, subject);
+ } while (true);
+ UNREACHABLE();
+ return RE_EXCEPTION;
+#else // ndef V8_NATIVE_REGEXP
+
+ ASSERT(output.length() >= IrregexpNumberOfRegisters(*irregexp));
+ bool is_ascii = subject->IsAsciiRepresentation();
+ // We must have done EnsureCompiledIrregexp, so we can get the number of
+ // registers.
+ int* register_vector = output.start();
+ int number_of_capture_registers =
+ (IrregexpNumberOfCaptures(*irregexp) + 1) * 2;
+ for (int i = number_of_capture_registers - 1; i >= 0; i--) {
+ register_vector[i] = -1;
+ }
+ Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii));
+
+ if (IrregexpInterpreter::Match(byte_codes,
+ subject,
+ register_vector,
+ index)) {
+ return RE_SUCCESS;
+ }
+ return RE_FAILURE;
+#endif // ndef V8_NATIVE_REGEXP
+}
+
+
Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> jsregexp,
Handle<String> subject,
int previous_index,
ASSERT_EQ(jsregexp->TypeTag(), JSRegExp::IRREGEXP);
// Prepare space for the return values.
- int number_of_capture_registers =
- (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
-
#ifndef V8_NATIVE_REGEXP
#ifdef DEBUG
if (FLAG_trace_regexp_bytecodes) {
}
#endif
#endif
-
- if (!subject->IsFlat()) {
- FlattenString(subject);
- }
-
- last_match_info->EnsureSize(number_of_capture_registers + kLastMatchOverhead);
-
- Handle<FixedArray> array;
-
- // Dispatch to the correct RegExp implementation.
- Handle<FixedArray> regexp(FixedArray::cast(jsregexp->data()));
-
-#ifdef V8_NATIVE_REGEXP
-
- OffsetsVector captures(number_of_capture_registers);
- int* captures_vector = captures.vector();
- NativeRegExpMacroAssembler::Result res;
- do {
- bool is_ascii = subject->IsAsciiRepresentation();
- if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
- return Handle<Object>::null();
- }
- Handle<Code> code(RegExpImpl::IrregexpNativeCode(*regexp, is_ascii));
- res = NativeRegExpMacroAssembler::Match(code,
- subject,
- captures_vector,
- captures.length(),
- previous_index);
- // If result is RETRY, the string have changed representation, and we
- // must restart from scratch.
- } while (res == NativeRegExpMacroAssembler::RETRY);
- if (res == NativeRegExpMacroAssembler::EXCEPTION) {
+ int required_registers = RegExpImpl::IrregexpPrepare(jsregexp, subject);
+ if (required_registers < 0) {
+ // Compiling failed with an exception.
ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
}
- ASSERT(res == NativeRegExpMacroAssembler::SUCCESS
- || res == NativeRegExpMacroAssembler::FAILURE);
- if (res != NativeRegExpMacroAssembler::SUCCESS) return Factory::null_value();
+ OffsetsVector registers(required_registers);
- array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
- ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
- // The captures come in (start, end+1) pairs.
- for (int i = 0; i < number_of_capture_registers; i += 2) {
- // Capture values are relative to start_offset only.
- // Convert them to be relative to start of string.
- if (captures_vector[i] >= 0) {
- captures_vector[i] += previous_index;
- }
- if (captures_vector[i + 1] >= 0) {
- captures_vector[i + 1] += previous_index;
+ IrregexpResult res = IrregexpExecOnce(jsregexp,
+ subject,
+ previous_index,
+ Vector<int>(registers.vector(),
+ registers.length()));
+ if (res == RE_SUCCESS) {
+ int capture_register_count =
+ (IrregexpNumberOfCaptures(FixedArray::cast(jsregexp->data())) + 1) * 2;
+ last_match_info->EnsureSize(capture_register_count + kLastMatchOverhead);
+ AssertNoAllocation no_gc;
+ int* register_vector = registers.vector();
+ FixedArray* array = FixedArray::cast(last_match_info->elements());
+ for (int i = 0; i < capture_register_count; i += 2) {
+ SetCapture(array, i, register_vector[i]);
+ SetCapture(array, i + 1, register_vector[i + 1]);
}
- SetCapture(*array, i, captures_vector[i]);
- SetCapture(*array, i + 1, captures_vector[i + 1]);
+ SetLastCaptureCount(array, capture_register_count);
+ SetLastSubject(array, *subject);
+ SetLastInput(array, *subject);
+ return last_match_info;
}
-
-#else // ! V8_NATIVE_REGEXP
-
- bool is_ascii = subject->IsAsciiRepresentation();
- if (!EnsureCompiledIrregexp(jsregexp, is_ascii)) {
+ if (res == RE_EXCEPTION) {
+ ASSERT(Top::has_pending_exception());
return Handle<Object>::null();
}
- // Now that we have done EnsureCompiledIrregexp we can get the number of
- // registers.
- int number_of_registers =
- IrregexpNumberOfRegisters(FixedArray::cast(jsregexp->data()));
- OffsetsVector registers(number_of_registers);
- int* register_vector = registers.vector();
- for (int i = number_of_capture_registers - 1; i >= 0; i--) {
- register_vector[i] = -1;
- }
- Handle<ByteArray> byte_codes(IrregexpByteCode(*regexp, is_ascii));
-
- if (!IrregexpInterpreter::Match(byte_codes,
- subject,
- register_vector,
- previous_index)) {
- return Factory::null_value();
- }
-
- array = Handle<FixedArray>(FixedArray::cast(last_match_info->elements()));
- ASSERT(array->length() >= number_of_capture_registers + kLastMatchOverhead);
- // The captures come in (start, end+1) pairs.
- for (int i = 0; i < number_of_capture_registers; i += 2) {
- SetCapture(*array, i, register_vector[i]);
- SetCapture(*array, i + 1, register_vector[i + 1]);
- }
-
-#endif // V8_NATIVE_REGEXP
-
- SetLastCaptureCount(*array, number_of_capture_registers);
- SetLastSubject(*array, *subject);
- SetLastInput(*array, *subject);
-
- return last_match_info;
+ ASSERT(res == RE_FAILURE);
+ return Factory::null_value();
}
Handle<JSArray> lastMatchInfo);
// Prepares a JSRegExp object with Irregexp-specific data.
- static void IrregexpPrepare(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags,
- int capture_register_count);
+ static void IrregexpInitialize(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags,
+ int capture_register_count);
static void AtomCompile(Handle<JSRegExp> re,
int index,
Handle<JSArray> lastMatchInfo);
+ enum IrregexpResult { RE_FAILURE = 0, RE_SUCCESS = 1, RE_EXCEPTION = -1 };
+
+ // Prepare a RegExp for being executed one or more times (using
+ // IrregexpExecOnce) on the subject.
+ // This ensures that the regexp is compiled for the subject, and that
+ // the subject is flat.
+ // Returns the number of integer spaces required by IrregexpExecOnce
+ // as its "registers" argument. If the regexp cannot be compiled,
+ // an exception is set as pending, and this function returns negative.
+ static int IrregexpPrepare(Handle<JSRegExp> regexp,
+ Handle<String> subject);
+
+ // Execute a regular expression once on the subject, starting from
+ // character "index".
+ // If successful, returns RE_SUCCESS and set the capture positions
+ // in the first registers.
+ // If matching fails, returns RE_FAILURE.
+ // If execution fails, sets a pending exception and returns RE_EXCEPTION.
+ static IrregexpResult IrregexpExecOnce(Handle<JSRegExp> regexp,
+ Handle<String> subject,
+ int index,
+ Vector<int32_t> registers);
+
// Execute an Irregexp bytecode pattern.
// On a successful match, the result is a JSArray containing
// captured positions. On a failure, the result is the null value.
// Read the value from the static offsets vector buffer and make it a smi.
__ movl(rdi, Operand(rcx, rdx, times_int_size, 0));
__ Integer32ToSmi(rdi, rdi, &runtime);
- // Add previous index (from its stack slot) if value is not negative.
- Label capture_negative;
- // Negative flag set by smi convertion above.
- __ j(negative, &capture_negative);
- __ SmiAdd(rdi, rdi, rax, &runtime); // Add previous index.
- __ bind(&capture_negative);
// Store the smi value in the last match info.
__ movq(FieldOperand(rbx,
rdx,
__ movq(rdi, Operand(rbp, kInputStart));
// Set up rdi to be negative offset from string end.
__ subq(rdi, rsi);
- // Set rax to address of char before start of input
+ // Set rax to address of char before start of the string
// (effectively string position -1).
- __ lea(rax, Operand(rdi, -char_size()));
+ __ movq(rbx, Operand(rbp, kStartIndex));
+ __ neg(rbx);
+ if (mode_ == UC16) {
+ __ lea(rax, Operand(rdi, rbx, times_2, -char_size()));
+ } else {
+ __ lea(rax, Operand(rdi, rbx, times_1, -char_size()));
+ }
// Store this value in a local variable, for use when clearing
// position registers.
__ movq(Operand(rbp, kInputStartMinusOne), rax);
__ bind(&success_label_);
if (num_saved_registers_ > 0) {
// copy captures to output
+ __ movq(rdx, Operand(rbp, kStartIndex));
__ movq(rbx, Operand(rbp, kRegisterOutput));
__ movq(rcx, Operand(rbp, kInputEnd));
__ subq(rcx, Operand(rbp, kInputStart));
+ if (mode_ == UC16) {
+ __ lea(rcx, Operand(rcx, rdx, times_2, 0));
+ } else {
+ __ addq(rcx, rdx);
+ }
for (int i = 0; i < num_saved_registers_; i++) {
__ movq(rax, register_location(i));
__ addq(rax, rcx); // Convert to index from start, not end.