Handle<String> flags) {
// Call the construct code with 2 arguments.
Handle<Object> argv[] = { pattern, flags };
- return Execution::New(constructor, ARRAY_SIZE(argv), argv);
+ return Execution::New(constructor, arraysize(argv), argv);
}
case 'm':
flags |= JSRegExp::MULTILINE;
break;
+ case 'y':
+ if (FLAG_harmony_regexps) flags |= JSRegExp::STICKY;
+ break;
}
}
return JSRegExp::Flags(flags);
elements->set(0, *pattern);
elements->set(1, *error_text);
Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
- Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
- return isolate->Throw<Object>(regexp_err);
+ Handle<Object> regexp_err;
+ THROW_NEW_ERROR(isolate, NewSyntaxError(message, array), Object);
}
if (parse_result.simple &&
!flags.is_ignore_case() &&
+ !flags.is_sticky() &&
!HasFewDifferentCharacters(pattern)) {
// Parse-tree is a single atom that is equal to the pattern.
AtomCompile(re, pattern, flags, pattern);
has_been_compiled = true;
} else if (parse_result.tree->IsAtom() &&
!flags.is_ignore_case() &&
+ !flags.is_sticky() &&
parse_result.capture_count == 0) {
RegExpAtom* atom = parse_result.tree->AsAtom();
Vector<const uc16> atom_pattern = atom->data();
DCHECK(needle_content.IsFlat());
DCHECK(subject_content.IsFlat());
// dispatch on type of strings
- index = (needle_content.IsAscii()
- ? (subject_content.IsAscii()
- ? SearchString(isolate,
- subject_content.ToOneByteVector(),
- needle_content.ToOneByteVector(),
- index)
- : SearchString(isolate,
- subject_content.ToUC16Vector(),
- needle_content.ToOneByteVector(),
- index))
- : (subject_content.IsAscii()
- ? SearchString(isolate,
- subject_content.ToOneByteVector(),
- needle_content.ToUC16Vector(),
- index)
- : SearchString(isolate,
- subject_content.ToUC16Vector(),
- needle_content.ToUC16Vector(),
- index)));
+ index =
+ (needle_content.IsOneByte()
+ ? (subject_content.IsOneByte()
+ ? SearchString(isolate, subject_content.ToOneByteVector(),
+ needle_content.ToOneByteVector(), index)
+ : SearchString(isolate, subject_content.ToUC16Vector(),
+ needle_content.ToOneByteVector(), index))
+ : (subject_content.IsOneByte()
+ ? SearchString(isolate, subject_content.ToOneByteVector(),
+ needle_content.ToUC16Vector(), index)
+ : SearchString(isolate, subject_content.ToUC16Vector(),
+ needle_content.ToUC16Vector(), index)));
if (index == -1) {
return i / 2; // Return number of matches.
} else {
// Irregexp implementation.
// Ensures that the regexp object contains a compiled version of the
-// source for either ASCII or non-ASCII strings.
+// source for either one-byte or two-byte subject strings.
// If the compiled version doesn't already exist, it is compiled
// from the source pattern.
// If compilation fails, an exception is thrown and this function
// returns false.
-bool RegExpImpl::EnsureCompiledIrregexp(
- Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {
- Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
+bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
+ Handle<String> sample_subject,
+ bool is_one_byte) {
+ Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
#ifdef V8_INTERPRETED_REGEXP
if (compiled_code->IsByteArray()) return true;
#else // V8_INTERPRETED_REGEXP (RegExp native code)
#endif
// We could potentially have marked this as flushable, but have kept
// a saved version if we did not flush it yet.
- Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+ Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
if (saved_code->IsCode()) {
// Reinstate the code in the original place.
- re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
+ re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);
DCHECK(compiled_code->IsSmi());
return true;
}
- return CompileIrregexp(re, sample_subject, is_ascii);
+ return CompileIrregexp(re, sample_subject, is_one_byte);
}
-static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
- bool is_ascii,
+static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
Handle<String> error_message,
Isolate* isolate) {
Factory* factory = isolate->factory();
elements->set(0, re->Pattern());
elements->set(1, *error_message);
Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
- Handle<Object> regexp_err =
+ Handle<Object> error;
+ MaybeHandle<Object> maybe_error =
factory->NewSyntaxError("malformed_regexp", array);
- isolate->Throw(*regexp_err);
- return false;
+ if (maybe_error.ToHandle(&error)) isolate->Throw(*error);
}
bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
Handle<String> sample_subject,
- bool is_ascii) {
+ bool is_one_byte) {
// Compile the RegExp.
Isolate* isolate = re->GetIsolate();
Zone zone(isolate);
PostponeInterruptsScope postpone(isolate);
// If we had a compilation error the last time this is saved at the
// saved code index.
- Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
+ Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));
// When arriving here entry can only be a smi, either representing an
// uncompiled regexp, a previous compilation error, or code that has
// been flushed.
// A previous compilation failed and threw an error which we store in
// the saved code index (we store the error message, not the actual
// error). Recreate the error object and throw it.
- Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+ Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
DCHECK(error_string->IsString());
Handle<String> error_message(String::cast(error_string));
- CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
+ CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
return false;
}
"malformed_regexp"));
return false;
}
- RegExpEngine::CompilationResult result =
- RegExpEngine::Compile(&compile_data,
- flags.is_ignore_case(),
- flags.is_global(),
- flags.is_multiline(),
- pattern,
- sample_subject,
- is_ascii,
- &zone);
+ RegExpEngine::CompilationResult result = RegExpEngine::Compile(
+ &compile_data, flags.is_ignore_case(), flags.is_global(),
+ flags.is_multiline(), flags.is_sticky(), pattern, sample_subject,
+ is_one_byte, &zone);
if (result.error_message != NULL) {
// Unable to compile regexp.
Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
CStrVector(result.error_message)).ToHandleChecked();
- CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
+ CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
return false;
}
Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
- data->set(JSRegExp::code_index(is_ascii), result.code);
+ data->set(JSRegExp::code_index(is_one_byte), result.code);
int register_max = IrregexpMaxRegisterCount(*data);
if (result.num_registers > register_max) {
SetIrregexpMaxRegisterCount(*data, result.num_registers);
}
-ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
- return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
+ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) {
+ return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));
}
-Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
- return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
+Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) {
+ return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
}
Handle<String> subject) {
subject = String::Flatten(subject);
- // Check the asciiness of the underlying storage.
- bool is_ascii = subject->IsOneByteRepresentationUnderneath();
- if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;
+ // Check representation of the underlying storage.
+ bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
+ if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;
#ifdef V8_INTERPRETED_REGEXP
// Byte-code regexp needs space allocated for all its registers.
DCHECK(index <= subject->length());
DCHECK(subject->IsFlat());
- bool is_ascii = subject->IsOneByteRepresentationUnderneath();
+ bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
#ifndef V8_INTERPRETED_REGEXP
DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
do {
- EnsureCompiledIrregexp(regexp, subject, is_ascii);
- Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
+ EnsureCompiledIrregexp(regexp, subject, is_one_byte);
+ Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
// The stack is used to allocate registers for the compiled regexp code.
// This means that in case of failure, the output registers array is left
// untouched and contains the capture results from the previous successful
// must restart from scratch.
// In this case, it means we must make sure we are prepared to handle
// the, potentially, different subject (the string can switch between
- // being internal and external, and even between being ASCII and UC16,
+ // being internal and external, and even between being Latin1 and UC16,
// but the characters are always the same).
IrregexpPrepare(regexp, subject);
- is_ascii = subject->IsOneByteRepresentationUnderneath();
+ is_one_byte = subject->IsOneByteRepresentationUnderneath();
} while (true);
UNREACHABLE();
return RE_EXCEPTION;
for (int i = number_of_capture_registers - 1; i >= 0; i--) {
raw_output[i] = -1;
}
- Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
+ Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
+ isolate);
IrregexpResult result = IrregexpInterpreter::Match(isolate,
byte_codes,
class RegExpCompiler {
public:
- RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,
+ RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte,
Zone* zone);
int AllocateRegister() {
void SetRegExpTooBig() { reg_exp_too_big_ = true; }
inline bool ignore_case() { return ignore_case_; }
- inline bool ascii() { return ascii_; }
+ inline bool one_byte() { return one_byte_; }
FrequencyCollator* frequency_collator() { return &frequency_collator_; }
int current_expansion_factor() { return current_expansion_factor_; }
int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
bool ignore_case_;
- bool ascii_;
+ bool one_byte_;
bool reg_exp_too_big_;
int current_expansion_factor_;
FrequencyCollator frequency_collator_;
// Attempts to compile the regexp using an Irregexp code generator. Returns
// a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,
- Zone* zone)
+RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,
+ bool one_byte, Zone* zone)
: next_register_(2 * (capture_count + 1)),
work_list_(NULL),
recursion_depth_(0),
ignore_case_(ignore_case),
- ascii_(ascii),
+ one_byte_(one_byte),
reg_exp_too_big_(false),
current_expansion_factor_(1),
frequency_collator_(),
// Returns the number of characters in the equivalence class, omitting those
// that cannot occur in the source string because it is ASCII.
-static int GetCaseIndependentLetters(Isolate* isolate,
- uc16 character,
- bool ascii_subject,
+static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
+ bool one_byte_subject,
unibrow::uchar* letters) {
int length =
isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
letters[0] = character;
length = 1;
}
- if (!ascii_subject || character <= String::kMaxOneByteCharCode) {
+ if (!one_byte_subject || character <= String::kMaxOneByteCharCode) {
return length;
}
+
// The standard requires that non-ASCII characters cannot have ASCII
// character codes in their equivalence class.
+ // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,
+ // is it? For example, \u00C5 is equivalent to \u212B.
return 0;
}
bool check,
bool preloaded) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool ascii = compiler->ascii();
+ bool one_byte = compiler->one_byte();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
if (length < 1) {
- // This can't match. Must be an ASCII subject and a non-ASCII character.
- // We do not need to do anything since the ASCII pass already handled this.
+ // This can't match. Must be an one-byte subject and a non-one-byte
+ // character. We do not need to do anything since the one-byte pass
+ // already handled this.
return false; // Bounds not checked.
}
bool checked = false;
// We handle the length > 1 case in a later pass.
if (length == 1) {
- if (ascii && c > String::kMaxOneByteCharCodeU) {
+ if (one_byte && c > String::kMaxOneByteCharCodeU) {
// Can't match - see above.
return false; // Bounds not checked.
}
static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
- bool ascii,
- uc16 c1,
- uc16 c2,
+ bool one_byte, uc16 c1, uc16 c2,
Label* on_failure) {
uc16 char_mask;
- if (ascii) {
+ if (one_byte) {
char_mask = String::kMaxOneByteCharCode;
} else {
char_mask = String::kMaxUtf16CodeUnit;
bool check,
bool preloaded) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- bool ascii = compiler->ascii();
+ bool one_byte = compiler->one_byte();
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
+ int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
if (length <= 1) return false;
// We may not need to check against the end of the input string
// if this character lies before a character that matched.
DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
switch (length) {
case 2: {
- if (ShortCutEmitCharacterPair(macro_assembler,
- ascii,
- chars[0],
- chars[1],
- on_failure)) {
+ if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
+ chars[1], on_failure)) {
} else {
macro_assembler->CheckCharacter(chars[0], &ok);
macro_assembler->CheckNotCharacter(chars[1], on_failure);
// new_start_index is the index of the first edge that is beyond the
// current kSize space.
- // For very large search spaces we do a binary chop search of the non-ASCII
+ // For very large search spaces we do a binary chop search of the non-Latin1
// space instead of just going to the end of the current kSize space. The
// heuristics are complicated a little by the fact that any 128-character
// encoding space can be quickly tested with a table lookup, so we don't
// for example, we only want to match every second character (eg. the lower
// case characters on some Unicode pages).
int binary_chop_index = (end_index + start_index) / 2;
- // The first test ensures that we get to the code that handles the ASCII
+ // The first test ensures that we get to the code that handles the Latin1
// range with a single not-taken branch, speeding up this important
- // character range (even non-ASCII charset-based text has spaces and
+ // character range (even non-Latin1 charset-based text has spaces and
// punctuation).
- if (*border - 1 > String::kMaxOneByteCharCode && // ASCII case.
+ if (*border - 1 > String::kMaxOneByteCharCode && // Latin1 case.
end_index - start_index > (*new_start_index - start_index) * 2 &&
- last - first > kSize * 2 &&
- binary_chop_index > *new_start_index &&
+ last - first > kSize * 2 && binary_chop_index > *new_start_index &&
ranges->at(binary_chop_index) >= first + 2 * kSize) {
int scan_forward_for_section_border = binary_chop_index;;
int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
- RegExpCharacterClass* cc,
- bool ascii,
- Label* on_failure,
- int cp_offset,
- bool check_offset,
- bool preloaded,
- Zone* zone) {
+ RegExpCharacterClass* cc, bool one_byte,
+ Label* on_failure, int cp_offset, bool check_offset,
+ bool preloaded, Zone* zone) {
ZoneList<CharacterRange>* ranges = cc->ranges(zone);
if (!CharacterRange::IsCanonical(ranges)) {
CharacterRange::Canonicalize(ranges);
}
int max_char;
- if (ascii) {
+ if (one_byte) {
max_char = String::kMaxOneByteCharCode;
} else {
max_char = String::kMaxUtf16CodeUnit;
bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
+ Trace* bounds_check_trace,
Trace* trace,
bool preload_has_checked_bounds,
Label* on_possible_success,
GetQuickCheckDetails(
details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
if (details->cannot_match()) return false;
- if (!details->Rationalize(compiler->ascii())) return false;
+ if (!details->Rationalize(compiler->one_byte())) return false;
DCHECK(details->characters() == 1 ||
compiler->macro_assembler()->CanReadUnaligned());
uint32_t mask = details->mask();
RegExpMacroAssembler* assembler = compiler->macro_assembler();
if (trace->characters_preloaded() != details->characters()) {
+ DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
+ // We are attempting to preload the minimum number of characters
+ // any choice would eat, so if the bounds check fails, then none of the
+ // choices can succeed, so we can just immediately backtrack, rather
+ // than go to the next choice.
assembler->LoadCurrentCharacter(trace->cp_offset(),
- trace->backtrack(),
+ bounds_check_trace->backtrack(),
!preload_has_checked_bounds,
details->characters());
}
// If number of characters preloaded is 1 then we used a byte or 16 bit
// load so the value is already masked down.
uint32_t char_mask;
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
char_mask = String::kMaxOneByteCharCode;
} else {
char_mask = String::kMaxUtf16CodeUnit;
if ((mask & char_mask) == char_mask) need_mask = false;
mask &= char_mask;
} else {
- // For 2-character preloads in ASCII mode or 1-character preloads in
- // TWO_BYTE mode we also use a 16 bit load with zero extend.
- if (details->characters() == 2 && compiler->ascii()) {
+ // For 2-character preloads in one-byte mode or 1-character preloads in
+ // two-byte mode we also use a 16 bit load with zero extend.
+ if (details->characters() == 2 && compiler->one_byte()) {
if ((mask & 0xffff) == 0xffff) need_mask = false;
- } else if (details->characters() == 1 && !compiler->ascii()) {
+ } else if (details->characters() == 1 && !compiler->one_byte()) {
if ((mask & 0xffff) == 0xffff) need_mask = false;
} else {
if (mask == 0xffffffff) need_mask = false;
DCHECK(characters_filled_in < details->characters());
int characters = details->characters();
int char_mask;
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
char_mask = String::kMaxOneByteCharCode;
} else {
char_mask = String::kMaxUtf16CodeUnit;
details->positions(characters_filled_in);
uc16 c = quarks[i];
if (c > char_mask) {
- // If we expect a non-ASCII character from an ASCII string,
- // there is no way we can match. Not even case independent
- // matching can turn an ASCII character into non-ASCII or
+ // If we expect a non-Latin1 character from an one-byte string,
+ // there is no way we can match. Not even case-independent
+ // matching can turn an Latin1 character into non-Latin1 or
// vice versa.
+ // TODO(dcarney): issue 3550. Verify that this works as expected.
+ // For example, \u0178 is uppercase of \u00ff (y-umlaut).
details->set_cannot_match();
pos->determines_perfectly = false;
return;
}
if (compiler->ignore_case()) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
- int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
- chars);
+ int length = GetCaseIndependentLetters(isolate, c,
+ compiler->one_byte(), chars);
DCHECK(length != 0); // Can only happen if c > char_mask (see above).
if (length == 1) {
// This letter has no case equivalents, so it's nice and simple
}
-void QuickCheckDetails::Advance(int by, bool ascii) {
+void QuickCheckDetails::Advance(int by, bool one_byte) {
DCHECK(by >= 0);
if (by >= characters_) {
Clear();
};
-RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
- RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);
+ RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
if (next == NULL) return set_replacement(NULL);
on_success_ = next;
return set_replacement(this);
}
-RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
DCHECK(!info()->visited);
}
-RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
VisitMarker marker(info());
RegExpNode* continue_replacement =
- continue_node_->FilterASCII(depth - 1, ignore_case);
+ continue_node_->FilterOneByte(depth - 1, ignore_case);
// If we can't continue after the loop then there is no sense in doing the
// loop.
if (continue_replacement == NULL) return set_replacement(NULL);
}
- return ChoiceNode::FilterASCII(depth - 1, ignore_case);
+ return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
}
-RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
for (int i = 0; i < choice_count; i++) {
GuardedAlternative alternative = alternatives_->at(i);
RegExpNode* replacement =
- alternative.node()->FilterASCII(depth - 1, ignore_case);
+ alternative.node()->FilterOneByte(depth - 1, ignore_case);
DCHECK(replacement != this); // No missing EMPTY_MATCH_CHECK.
if (replacement != NULL) {
alternatives_->at(i).set_node(replacement);
new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
for (int i = 0; i < choice_count; i++) {
RegExpNode* replacement =
- alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case);
+ alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
if (replacement != NULL) {
alternatives_->at(i).set_node(replacement);
new_alternatives->Add(alternatives_->at(i), zone());
}
-RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,
- bool ignore_case) {
+RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,
+ bool ignore_case) {
if (info()->replacement_calculated) return replacement();
if (depth < 0) return this;
if (info()->visited) return this;
// Alternative 0 is the negative lookahead, alternative 1 is what comes
// afterwards.
RegExpNode* node = alternatives_->at(1).node();
- RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);
+ RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
if (replacement == NULL) return set_replacement(NULL);
alternatives_->at(1).set_node(replacement);
RegExpNode* neg_node = alternatives_->at(0).node();
- RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);
+ RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
// If the negative lookahead is always going to fail then
// we don't need to check it.
if (neg_replacement == NULL) return set_replacement(replacement);
if (!assembler->CheckSpecialCharacterClass('n',
new_trace.backtrack())) {
// Newline means \n, \r, 0x2028 or 0x2029.
- if (!compiler->ascii()) {
+ if (!compiler->one_byte()) {
assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
}
assembler->CheckCharacter('\n', &ok);
int* checked_up_to) {
RegExpMacroAssembler* assembler = compiler->macro_assembler();
Isolate* isolate = assembler->zone()->isolate();
- bool ascii = compiler->ascii();
+ bool one_byte = compiler->one_byte();
Label* backtrack = trace->backtrack();
QuickCheckDetails* quick_check = trace->quick_check_performed();
int element_count = elms_->length();
if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
EmitCharacterFunction* emit_function = NULL;
switch (pass) {
- case NON_ASCII_MATCH:
- DCHECK(ascii);
+ case NON_LATIN1_MATCH:
+ DCHECK(one_byte);
if (quarks[j] > String::kMaxOneByteCharCode) {
assembler->GoTo(backtrack);
return;
if (first_element_checked && i == 0) continue;
if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
RegExpCharacterClass* cc = elm.char_class();
- EmitCharClass(assembler,
- cc,
- ascii,
- backtrack,
- cp_offset,
- *checked_up_to < cp_offset,
- preloaded,
- zone());
+ EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
+ *checked_up_to < cp_offset, preloaded, zone());
UpdateBoundsCheck(cp_offset, checked_up_to);
}
}
return;
}
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
int dummy = 0;
- TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
+ TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
}
bool first_elt_done = false;
// Adjust the offsets of the quick check performed information. This
// information is used to find out what we already determined about the
// characters by means of mask and compare.
- quick_check_performed_.Advance(by, compiler->ascii());
+ quick_check_performed_.Advance(by, compiler->one_byte());
cp_offset_ += by;
if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
compiler->SetRegExpTooBig();
}
-void TextNode::MakeCaseIndependent(bool is_ascii) {
+void TextNode::MakeCaseIndependent(bool is_one_byte) {
int element_count = elms_->length();
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
ZoneList<CharacterRange>* ranges = cc->ranges(zone());
int range_count = ranges->length();
for (int j = 0; j < range_count; j++) {
- ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());
+ ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());
}
}
}
}
if (ranges->length() != 1) return NULL;
uint32_t max_char;
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
max_char = String::kMaxOneByteCharCode;
} else {
max_char = String::kMaxUtf16CodeUnit;
void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
if (trace->stop_node() == this) {
+ // Back edge of greedy optimized loop node graph.
int text_length =
GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
int eats_at_least) {
int preload_characters = Min(4, eats_at_least);
if (compiler->macro_assembler()->CanReadUnaligned()) {
- bool ascii = compiler->ascii();
- if (ascii) {
+ bool one_byte = compiler->one_byte();
+ if (one_byte) {
if (preload_characters > 4) preload_characters = 4;
// We can't preload 3 characters because there is no machine instruction
// to do that. We can't just load 4 because we could be reading
0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
0xFEFF, 0xFF00, 0x10000 };
-static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
+static const int kSpaceRangeCount = arraysize(kSpaceRanges);
static const int kWordRanges[] = {
'0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
-static const int kWordRangeCount = ARRAY_SIZE(kWordRanges);
+static const int kWordRangeCount = arraysize(kWordRanges);
static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
-static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
+static const int kDigitRangeCount = arraysize(kDigitRanges);
static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
-static const int kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
+static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
0x2028, 0x202A, 0x10000 };
-static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges);
+static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
void BoyerMoorePositionInfo::Set(int character) {
int length, RegExpCompiler* compiler, Zone* zone)
: length_(length),
compiler_(compiler) {
- if (compiler->ascii()) {
+ if (compiler->one_byte()) {
max_char_ = String::kMaxOneByteCharCode;
} else {
max_char_ = String::kMaxUtf16CodeUnit;
// dividing by 2 we switch off the skipping if the probability of skipping
// is less than 50%. This is because the multibyte mask-and-compare
// skipping in quickcheck is more likely to do well on this case.
- bool in_quickcheck_range = ((i - remembered_from < 4) ||
- (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));
+ bool in_quickcheck_range =
+ ((i - remembered_from < 4) ||
+ (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
// Called 'probability' but it is only a rough estimate and can actually
// be outside the 0-kSize range.
int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
// See comment above on the implementation of GetSkipTable.
-bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
+void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
const int kSize = RegExpMacroAssembler::kTableSize;
int min_lookahead = 0;
int max_lookahead = 0;
- if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
+ if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
bool found_single_character = false;
int single_character = 0;
if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
// The mask-compare can probably handle this better.
- return false;
+ return;
}
if (found_single_character) {
masm->AdvanceCurrentPosition(lookahead_width);
masm->GoTo(&again);
masm->Bind(&cont);
- return true;
+ return;
}
Factory* factory = masm->zone()->isolate()->factory();
masm->AdvanceCurrentPosition(skip_distance);
masm->GoTo(&again);
masm->Bind(&cont);
-
- return true;
}
* \ F V
* \-----S4
*
- * For greedy loops we reverse our expectation and expect to match rather
- * than fail. Therefore we want the loop code to look like this (U is the
- * unwind code that steps back in the greedy loop). The following alternatives
- * look the same as above.
+ * For greedy loops we push the current position, then generate the code that
+ * eats the input specially in EmitGreedyLoop. The other choice (the
+ * continuation) is generated by the normal code in EmitChoices, and steps back
+ * in the input to the starting position when it fails to match. The loop code
+ * looks like this (U is the unwind code that steps back in the greedy loop).
+ *
* _____
* / \
* V |
* / |S |
* F/ \_____/
* /
- * |<-----------
- * | \
- * V \
- * Q2 ---> S2 \
- * | S / |
- * F| / |
- * | F/ |
- * | / |
- * | R |
- * | / |
- * F VL |
- * <------U |
- * back |S |
- * \______________/
+ * |<-----
+ * | \
+ * V |S
+ * Q2 ---> U----->backtrack
+ * | F /
+ * S| /
+ * V F /
+ * S2--/
*/
-void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- int choice_count = alternatives_->length();
+GreedyLoopState::GreedyLoopState(bool not_at_start) {
+ counter_backtrack_trace_.set_backtrack(&label_);
+ if (not_at_start) counter_backtrack_trace_.set_at_start(false);
+}
+
+
+void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
#ifdef DEBUG
+ int choice_count = alternatives_->length();
for (int i = 0; i < choice_count - 1; i++) {
GuardedAlternative alternative = alternatives_->at(i);
ZoneList<Guard*>* guards = alternative.guards();
}
}
#endif
+}
+
+
+void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
+ Trace* current_trace,
+ PreloadState* state) {
+ if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
+ // Save some time by looking at most one machine word ahead.
+ state->eats_at_least_ =
+ EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
+ current_trace->at_start() == Trace::FALSE_VALUE);
+ }
+ state->preload_characters_ =
+ CalculatePreloadCharacters(compiler, state->eats_at_least_);
+
+ state->preload_is_current_ =
+ (current_trace->characters_preloaded() == state->preload_characters_);
+ state->preload_has_checked_bounds_ = state->preload_is_current_;
+}
+
+
+void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+ int choice_count = alternatives_->length();
+
+ AssertGuardsMentionRegisters(trace);
LimitResult limit_result = LimitVersions(compiler, trace);
if (limit_result == DONE) return;
DCHECK(limit_result == CONTINUE);
- int new_flush_budget = trace->flush_budget() / choice_count;
+ // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
+ // other choice nodes we only flush if we are out of code size budget.
if (trace->flush_budget() == 0 && trace->actions() != NULL) {
trace->Flush(compiler, this);
return;
RecursionCheck rc(compiler);
- Trace* current_trace = trace;
+ PreloadState preload;
+ preload.init();
+ GreedyLoopState greedy_loop_state(not_at_start());
- int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
- bool greedy_loop = false;
- Label greedy_loop_label;
- Trace counter_backtrack_trace;
- counter_backtrack_trace.set_backtrack(&greedy_loop_label);
- if (not_at_start()) counter_backtrack_trace.set_at_start(false);
+ int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
+ AlternativeGenerationList alt_gens(choice_count, zone());
if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
- // Here we have special handling for greedy loops containing only text nodes
- // and other simple nodes. These are handled by pushing the current
- // position on the stack and then incrementing the current position each
- // time around the switch. On backtrack we decrement the current position
- // and check it against the pushed value. This avoids pushing backtrack
- // information for each iteration of the loop, which could take up a lot of
- // space.
- greedy_loop = true;
- DCHECK(trace->stop_node() == NULL);
- macro_assembler->PushCurrentPosition();
- current_trace = &counter_backtrack_trace;
- Label greedy_match_failed;
- Trace greedy_match_trace;
- if (not_at_start()) greedy_match_trace.set_at_start(false);
- greedy_match_trace.set_backtrack(&greedy_match_failed);
- Label loop_label;
- macro_assembler->Bind(&loop_label);
- greedy_match_trace.set_stop_node(this);
- greedy_match_trace.set_loop_label(&loop_label);
- alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
- macro_assembler->Bind(&greedy_match_failed);
+ trace = EmitGreedyLoop(compiler,
+ trace,
+ &alt_gens,
+ &preload,
+ &greedy_loop_state,
+ text_length);
+ } else {
+ // TODO(erikcorry): Delete this. We don't need this label, but it makes us
+ // match the traces produced pre-cleanup.
+ Label second_choice;
+ compiler->macro_assembler()->Bind(&second_choice);
+
+ preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
+
+ EmitChoices(compiler,
+ &alt_gens,
+ 0,
+ trace,
+ &preload);
+ }
+
+ // At this point we need to generate slow checks for the alternatives where
+ // the quick check was inlined. We can recognize these because the associated
+ // label was bound.
+ int new_flush_budget = trace->flush_budget() / choice_count;
+ for (int i = 0; i < choice_count; i++) {
+ AlternativeGeneration* alt_gen = alt_gens.at(i);
+ Trace new_trace(*trace);
+ // If there are actions to be flushed we have to limit how many times
+ // they are flushed. Take the budget of the parent trace and distribute
+ // it fairly amongst the children.
+ if (new_trace.actions() != NULL) {
+ new_trace.set_flush_budget(new_flush_budget);
+ }
+ bool next_expects_preload =
+ i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
+ EmitOutOfLineContinuation(compiler,
+ &new_trace,
+ alternatives_->at(i),
+ alt_gen,
+ preload.preload_characters_,
+ next_expects_preload);
}
+}
+
+
+Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
+ Trace* trace,
+ AlternativeGenerationList* alt_gens,
+ PreloadState* preload,
+ GreedyLoopState* greedy_loop_state,
+ int text_length) {
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ // Here we have special handling for greedy loops containing only text nodes
+ // and other simple nodes. These are handled by pushing the current
+ // position on the stack and then incrementing the current position each
+ // time around the switch. On backtrack we decrement the current position
+ // and check it against the pushed value. This avoids pushing backtrack
+ // information for each iteration of the loop, which could take up a lot of
+ // space.
+ DCHECK(trace->stop_node() == NULL);
+ macro_assembler->PushCurrentPosition();
+ Label greedy_match_failed;
+ Trace greedy_match_trace;
+ if (not_at_start()) greedy_match_trace.set_at_start(false);
+ greedy_match_trace.set_backtrack(&greedy_match_failed);
+ Label loop_label;
+ macro_assembler->Bind(&loop_label);
+ greedy_match_trace.set_stop_node(this);
+ greedy_match_trace.set_loop_label(&loop_label);
+ alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
+ macro_assembler->Bind(&greedy_match_failed);
Label second_choice; // For use in greedy matches.
macro_assembler->Bind(&second_choice);
- int first_normal_choice = greedy_loop ? 1 : 0;
-
- bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
- const int kEatsAtLeastNotYetInitialized = -1;
- int eats_at_least = kEatsAtLeastNotYetInitialized;
-
- bool skip_was_emitted = false;
-
- if (!greedy_loop && choice_count == 2) {
- GuardedAlternative alt1 = alternatives_->at(1);
- if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
- RegExpNode* eats_anything_node = alt1.node();
- if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
- this) {
- // At this point we know that we are at a non-greedy loop that will eat
- // any character one at a time. Any non-anchored regexp has such a
- // loop prepended to it in order to find where it starts. We look for
- // a pattern of the form ...abc... where we can look 6 characters ahead
- // and step forwards 3 if the character is not one of abc. Abc need
- // not be atoms, they can be any reasonably limited character class or
- // small alternation.
- DCHECK(trace->is_trivial()); // This is the case on LoopChoiceNodes.
- BoyerMooreLookahead* lookahead = bm_info(not_at_start);
- if (lookahead == NULL) {
- eats_at_least = Min(kMaxLookaheadForBoyerMoore,
- EatsAtLeast(kMaxLookaheadForBoyerMoore,
- kRecursionBudget,
- not_at_start));
- if (eats_at_least >= 1) {
- BoyerMooreLookahead* bm =
- new(zone()) BoyerMooreLookahead(eats_at_least,
- compiler,
- zone());
- GuardedAlternative alt0 = alternatives_->at(0);
- alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
- skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
- }
- } else {
- skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
- }
- }
- }
+ Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
+
+ EmitChoices(compiler,
+ alt_gens,
+ 1,
+ new_trace,
+ preload);
+
+ macro_assembler->Bind(greedy_loop_state->label());
+ // If we have unwound to the bottom then backtrack.
+ macro_assembler->CheckGreedyLoop(trace->backtrack());
+ // Otherwise try the second priority at an earlier position.
+ macro_assembler->AdvanceCurrentPosition(-text_length);
+ macro_assembler->GoTo(&second_choice);
+ return new_trace;
+}
+
+int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
+ Trace* trace) {
+ int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
+ if (alternatives_->length() != 2) return eats_at_least;
+
+ GuardedAlternative alt1 = alternatives_->at(1);
+ if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
+ return eats_at_least;
}
+ RegExpNode* eats_anything_node = alt1.node();
+ if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
+ return eats_at_least;
+ }
+
+ // Really we should be creating a new trace when we execute this function,
+ // but there is no need, because the code it generates cannot backtrack, and
+ // we always arrive here with a trivial trace (since it's the entry to a
+ // loop. That also implies that there are no preloaded characters, which is
+ // good, because it means we won't be violating any assumptions by
+ // overwriting those characters with new load instructions.
+ DCHECK(trace->is_trivial());
- if (eats_at_least == kEatsAtLeastNotYetInitialized) {
- // Save some time by looking at most one machine word ahead.
- eats_at_least =
- EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ // At this point we know that we are at a non-greedy loop that will eat
+ // any character one at a time. Any non-anchored regexp has such a
+ // loop prepended to it in order to find where it starts. We look for
+ // a pattern of the form ...abc... where we can look 6 characters ahead
+ // and step forwards 3 if the character is not one of abc. Abc need
+ // not be atoms, they can be any reasonably limited character class or
+ // small alternation.
+ BoyerMooreLookahead* bm = bm_info(false);
+ if (bm == NULL) {
+ eats_at_least = Min(kMaxLookaheadForBoyerMoore,
+ EatsAtLeast(kMaxLookaheadForBoyerMoore,
+ kRecursionBudget,
+ false));
+ if (eats_at_least >= 1) {
+ bm = new(zone()) BoyerMooreLookahead(eats_at_least,
+ compiler,
+ zone());
+ GuardedAlternative alt0 = alternatives_->at(0);
+ alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
+ }
+ }
+ if (bm != NULL) {
+ bm->EmitSkipInstructions(macro_assembler);
}
- int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
+ return eats_at_least;
+}
- bool preload_is_current = !skip_was_emitted &&
- (current_trace->characters_preloaded() == preload_characters);
- bool preload_has_checked_bounds = preload_is_current;
- AlternativeGenerationList alt_gens(choice_count, zone());
+void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
+ AlternativeGenerationList* alt_gens,
+ int first_choice,
+ Trace* trace,
+ PreloadState* preload) {
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ SetUpPreLoad(compiler, trace, preload);
// For now we just call all choices one after the other. The idea ultimately
// is to use the Dispatch table to try only the relevant ones.
- for (int i = first_normal_choice; i < choice_count; i++) {
+ int choice_count = alternatives_->length();
+
+ int new_flush_budget = trace->flush_budget() / choice_count;
+
+ for (int i = first_choice; i < choice_count; i++) {
+ bool is_last = i == choice_count - 1;
+ bool fall_through_on_failure = !is_last;
GuardedAlternative alternative = alternatives_->at(i);
- AlternativeGeneration* alt_gen = alt_gens.at(i);
- alt_gen->quick_check_details.set_characters(preload_characters);
+ AlternativeGeneration* alt_gen = alt_gens->at(i);
+ alt_gen->quick_check_details.set_characters(preload->preload_characters_);
ZoneList<Guard*>* guards = alternative.guards();
int guard_count = (guards == NULL) ? 0 : guards->length();
- Trace new_trace(*current_trace);
- new_trace.set_characters_preloaded(preload_is_current ?
- preload_characters :
+ Trace new_trace(*trace);
+ new_trace.set_characters_preloaded(preload->preload_is_current_ ?
+ preload->preload_characters_ :
0);
- if (preload_has_checked_bounds) {
- new_trace.set_bound_checked_up_to(preload_characters);
+ if (preload->preload_has_checked_bounds_) {
+ new_trace.set_bound_checked_up_to(preload->preload_characters_);
}
new_trace.quick_check_performed()->Clear();
if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
- alt_gen->expects_preload = preload_is_current;
+ if (!is_last) {
+ new_trace.set_backtrack(&alt_gen->after);
+ }
+ alt_gen->expects_preload = preload->preload_is_current_;
bool generate_full_check_inline = false;
if (FLAG_regexp_optimization &&
- try_to_emit_quick_check_for_alternative(i) &&
+ try_to_emit_quick_check_for_alternative(i == 0) &&
alternative.node()->EmitQuickCheck(compiler,
+ trace,
&new_trace,
- preload_has_checked_bounds,
+ preload->preload_has_checked_bounds_,
&alt_gen->possible_success,
&alt_gen->quick_check_details,
- i < choice_count - 1)) {
+ fall_through_on_failure)) {
// Quick check was generated for this choice.
- preload_is_current = true;
- preload_has_checked_bounds = true;
- // On the last choice in the ChoiceNode we generated the quick
- // check to fall through on possible success. So now we need to
- // generate the full check inline.
- if (i == choice_count - 1) {
+ preload->preload_is_current_ = true;
+ preload->preload_has_checked_bounds_ = true;
+ // If we generated the quick check to fall through on possible success,
+ // we now need to generate the full check inline.
+ if (!fall_through_on_failure) {
macro_assembler->Bind(&alt_gen->possible_success);
new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
- new_trace.set_characters_preloaded(preload_characters);
- new_trace.set_bound_checked_up_to(preload_characters);
+ new_trace.set_characters_preloaded(preload->preload_characters_);
+ new_trace.set_bound_checked_up_to(preload->preload_characters_);
generate_full_check_inline = true;
}
} else if (alt_gen->quick_check_details.cannot_match()) {
- if (i == choice_count - 1 && !greedy_loop) {
+ if (!fall_through_on_failure) {
macro_assembler->GoTo(trace->backtrack());
}
continue;
// previous cases that go here when they fail. There's no reason to
// insist that they preload characters since the slow check we are about
// to generate probably can't use it.
- if (i != first_normal_choice) {
+ if (i != first_choice) {
alt_gen->expects_preload = false;
new_trace.InvalidateCurrentCharacter();
}
- if (i < choice_count - 1) {
- new_trace.set_backtrack(&alt_gen->after);
- }
generate_full_check_inline = true;
}
if (generate_full_check_inline) {
GenerateGuard(macro_assembler, guards->at(j), &new_trace);
}
alternative.node()->Emit(compiler, &new_trace);
- preload_is_current = false;
+ preload->preload_is_current_ = false;
}
macro_assembler->Bind(&alt_gen->after);
}
- if (greedy_loop) {
- macro_assembler->Bind(&greedy_loop_label);
- // If we have unwound to the bottom then backtrack.
- macro_assembler->CheckGreedyLoop(trace->backtrack());
- // Otherwise try the second priority at an earlier position.
- macro_assembler->AdvanceCurrentPosition(-text_length);
- macro_assembler->GoTo(&second_choice);
- }
-
- // At this point we need to generate slow checks for the alternatives where
- // the quick check was inlined. We can recognize these because the associated
- // label was bound.
- for (int i = first_normal_choice; i < choice_count - 1; i++) {
- AlternativeGeneration* alt_gen = alt_gens.at(i);
- Trace new_trace(*current_trace);
- // If there are actions to be flushed we have to limit how many times
- // they are flushed. Take the budget of the parent trace and distribute
- // it fairly amongst the children.
- if (new_trace.actions() != NULL) {
- new_trace.set_flush_budget(new_flush_budget);
- }
- EmitOutOfLineContinuation(compiler,
- &new_trace,
- alternatives_->at(i),
- alt_gen,
- preload_characters,
- alt_gens.at(i + 1)->expects_preload);
- }
}
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
- bool is_ascii,
- Zone* zone) {
+ bool is_one_byte, Zone* zone) {
Isolate* isolate = zone->isolate();
uc16 bottom = from();
uc16 top = to();
- if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
+ if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
if (bottom > String::kMaxOneByteCharCode) return;
if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
}
void Analysis::VisitText(TextNode* that) {
if (ignore_case_) {
- that->MakeCaseIndependent(is_ascii_);
+ that->MakeCaseIndependent(is_one_byte_);
}
EnsureAnalyzed(that->on_success());
if (!has_failed()) {
RegExpEngine::CompilationResult RegExpEngine::Compile(
- RegExpCompileData* data,
- bool ignore_case,
- bool is_global,
- bool is_multiline,
- Handle<String> pattern,
- Handle<String> sample_subject,
- bool is_ascii,
- Zone* zone) {
+ RegExpCompileData* data, bool ignore_case, bool is_global,
+ bool is_multiline, bool is_sticky, Handle<String> pattern,
+ Handle<String> sample_subject, bool is_one_byte, Zone* zone) {
if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
return IrregexpRegExpTooBig(zone->isolate());
}
- RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);
+ RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
// Sample some characters from the middle of the string.
static const int kSampleSize = 128;
bool is_end_anchored = data->tree->IsAnchoredAtEnd();
bool is_start_anchored = data->tree->IsAnchoredAtStart();
int max_length = data->tree->max_match();
- if (!is_start_anchored) {
+ if (!is_start_anchored && !is_sticky) {
// Add a .*? at the beginning, outside the body capture, unless
- // this expression is anchored at the beginning.
+ // this expression is anchored at the beginning or sticky.
RegExpNode* loop_node =
RegExpQuantifier::ToNode(0,
RegExpTree::kInfinity,
node = loop_node;
}
}
- if (is_ascii) {
- node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
+ if (is_one_byte) {
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
// Do it again to propagate the new nodes to places where they were not
// put because they had not been calculated yet.
if (node != NULL) {
- node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
+ node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
}
}
if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
data->node = node;
- Analysis analysis(ignore_case, is_ascii);
+ Analysis analysis(ignore_case, is_one_byte);
analysis.EnsureAnalyzed(node);
if (analysis.has_failed()) {
const char* error_message = analysis.error_message();
// Native regexp implementation.
NativeRegExpMacroAssembler::Mode mode =
- is_ascii ? NativeRegExpMacroAssembler::ASCII
- : NativeRegExpMacroAssembler::UC16;
+ is_one_byte ? NativeRegExpMacroAssembler::LATIN1
+ : NativeRegExpMacroAssembler::UC16;
#if V8_TARGET_ARCH_IA32
RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,