Upstream version 10.39.225.0

[platform/framework/web/crosswalk.git] / src / v8 / src / jsregexp.cc
diff --git a/src/v8/src/jsregexp.cc b/src/v8/src/jsregexp.cc

index 27b8699..98aca72 100644 (file)
--- a/src/v8/src/jsregexp.cc
+++ b/src/v8/src/jsregexp.cc
@@ -53,7 +53,7 @@ MaybeHandle<Object> RegExpImpl::CreateRegExpLiteral(
      Handle<String> flags) {
    // Call the construct code with 2 arguments.
    Handle<Object> argv[] = { pattern, flags };
-  return Execution::New(constructor, ARRAY_SIZE(argv), argv);
+  return Execution::New(constructor, arraysize(argv), argv);
  }
  
  
@@ -70,6 +70,9 @@ static JSRegExp::Flags RegExpFlagsFromString(Handle<String> str) {
        case 'm':
          flags |= JSRegExp::MULTILINE;
          break;
+      case 'y':
+        if (FLAG_harmony_regexps) flags |= JSRegExp::STICKY;
+        break;
      }
    }
    return JSRegExp::Flags(flags);
@@ -88,8 +91,8 @@ static inline MaybeHandle<Object> ThrowRegExpException(
    elements->set(0, *pattern);
    elements->set(1, *error_text);
    Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
-  Handle<Object> regexp_err = factory->NewSyntaxError(message, array);
-  return isolate->Throw<Object>(regexp_err);
+  Handle<Object> regexp_err;
+  THROW_NEW_ERROR(isolate, NewSyntaxError(message, array), Object);
  }
  
  
@@ -185,12 +188,14 @@ MaybeHandle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
  
    if (parse_result.simple &&
        !flags.is_ignore_case() &&
+      !flags.is_sticky() &&
        !HasFewDifferentCharacters(pattern)) {
      // Parse-tree is a single atom that is equal to the pattern.
      AtomCompile(re, pattern, flags, pattern);
      has_been_compiled = true;
    } else if (parse_result.tree->IsAtom() &&
        !flags.is_ignore_case() &&
+      !flags.is_sticky() &&
        parse_result.capture_count == 0) {
      RegExpAtom* atom = parse_result.tree->AsAtom();
      Vector<const uc16> atom_pattern = atom->data();
@@ -290,25 +295,18 @@ int RegExpImpl::AtomExecRaw(Handle<JSRegExp> regexp,
      DCHECK(needle_content.IsFlat());
      DCHECK(subject_content.IsFlat());
      // dispatch on type of strings
-    index = (needle_content.IsAscii()
-             ? (subject_content.IsAscii()
-                ? SearchString(isolate,
-                               subject_content.ToOneByteVector(),
-                               needle_content.ToOneByteVector(),
-                               index)
-                : SearchString(isolate,
-                               subject_content.ToUC16Vector(),
-                               needle_content.ToOneByteVector(),
-                               index))
-             : (subject_content.IsAscii()
-                ? SearchString(isolate,
-                               subject_content.ToOneByteVector(),
-                               needle_content.ToUC16Vector(),
-                               index)
-                : SearchString(isolate,
-                               subject_content.ToUC16Vector(),
-                               needle_content.ToUC16Vector(),
-                               index)));
+    index =
+        (needle_content.IsOneByte()
+             ? (subject_content.IsOneByte()
+                    ? SearchString(isolate, subject_content.ToOneByteVector(),
+                                   needle_content.ToOneByteVector(), index)
+                    : SearchString(isolate, subject_content.ToUC16Vector(),
+                                   needle_content.ToOneByteVector(), index))
+             : (subject_content.IsOneByte()
+                    ? SearchString(isolate, subject_content.ToOneByteVector(),
+                                   needle_content.ToUC16Vector(), index)
+                    : SearchString(isolate, subject_content.ToUC16Vector(),
+                                   needle_content.ToUC16Vector(), index)));
      if (index == -1) {
        return i / 2;  // Return number of matches.
      } else {
@@ -346,14 +344,15 @@ Handle<Object> RegExpImpl::AtomExec(Handle<JSRegExp> re,
  // Irregexp implementation.
  
  // Ensures that the regexp object contains a compiled version of the
-// source for either ASCII or non-ASCII strings.
+// source for either one-byte or two-byte subject strings.
  // If the compiled version doesn't already exist, it is compiled
  // from the source pattern.
  // If compilation fails, an exception is thrown and this function
  // returns false.
-bool RegExpImpl::EnsureCompiledIrregexp(
-    Handle<JSRegExp> re, Handle<String> sample_subject, bool is_ascii) {
-  Object* compiled_code = re->DataAt(JSRegExp::code_index(is_ascii));
+bool RegExpImpl::EnsureCompiledIrregexp(Handle<JSRegExp> re,
+                                        Handle<String> sample_subject,
+                                        bool is_one_byte) {
+  Object* compiled_code = re->DataAt(JSRegExp::code_index(is_one_byte));
  #ifdef V8_INTERPRETED_REGEXP
    if (compiled_code->IsByteArray()) return true;
  #else  // V8_INTERPRETED_REGEXP (RegExp native code)
@@ -361,19 +360,18 @@ bool RegExpImpl::EnsureCompiledIrregexp(
  #endif
    // We could potentially have marked this as flushable, but have kept
    // a saved version if we did not flush it yet.
-  Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+  Object* saved_code = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
    if (saved_code->IsCode()) {
      // Reinstate the code in the original place.
-    re->SetDataAt(JSRegExp::code_index(is_ascii), saved_code);
+    re->SetDataAt(JSRegExp::code_index(is_one_byte), saved_code);
      DCHECK(compiled_code->IsSmi());
      return true;
    }
-  return CompileIrregexp(re, sample_subject, is_ascii);
+  return CompileIrregexp(re, sample_subject, is_one_byte);
  }
  
  
-static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
-                                            bool is_ascii,
+static void CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
                                              Handle<String> error_message,
                                              Isolate* isolate) {
    Factory* factory = isolate->factory();
@@ -381,23 +379,23 @@ static bool CreateRegExpErrorObjectAndThrow(Handle<JSRegExp> re,
    elements->set(0, re->Pattern());
    elements->set(1, *error_message);
    Handle<JSArray> array = factory->NewJSArrayWithElements(elements);
-  Handle<Object> regexp_err =
+  Handle<Object> error;
+  MaybeHandle<Object> maybe_error =
        factory->NewSyntaxError("malformed_regexp", array);
-  isolate->Throw(*regexp_err);
-  return false;
+  if (maybe_error.ToHandle(&error)) isolate->Throw(*error);
  }
  
  
  bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
                                   Handle<String> sample_subject,
-                                 bool is_ascii) {
+                                 bool is_one_byte) {
    // Compile the RegExp.
    Isolate* isolate = re->GetIsolate();
    Zone zone(isolate);
    PostponeInterruptsScope postpone(isolate);
    // If we had a compilation error the last time this is saved at the
    // saved code index.
-  Object* entry = re->DataAt(JSRegExp::code_index(is_ascii));
+  Object* entry = re->DataAt(JSRegExp::code_index(is_one_byte));
    // When arriving here entry can only be a smi, either representing an
    // uncompiled regexp, a previous compilation error, or code that has
    // been flushed.
@@ -411,10 +409,10 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
      // A previous compilation failed and threw an error which we store in
      // the saved code index (we store the error message, not the actual
      // error). Recreate the error object and throw it.
-    Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_ascii));
+    Object* error_string = re->DataAt(JSRegExp::saved_code_index(is_one_byte));
      DCHECK(error_string->IsString());
      Handle<String> error_message(String::cast(error_string));
-    CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
+    CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
      return false;
    }
  
@@ -435,25 +433,20 @@ bool RegExpImpl::CompileIrregexp(Handle<JSRegExp> re,
                               "malformed_regexp"));
      return false;
    }
-  RegExpEngine::CompilationResult result =
-      RegExpEngine::Compile(&compile_data,
-                            flags.is_ignore_case(),
-                            flags.is_global(),
-                            flags.is_multiline(),
-                            pattern,
-                            sample_subject,
-                            is_ascii,
-                            &zone);
+  RegExpEngine::CompilationResult result = RegExpEngine::Compile(
+      &compile_data, flags.is_ignore_case(), flags.is_global(),
+      flags.is_multiline(), flags.is_sticky(), pattern, sample_subject,
+      is_one_byte, &zone);
    if (result.error_message != NULL) {
      // Unable to compile regexp.
      Handle<String> error_message = isolate->factory()->NewStringFromUtf8(
          CStrVector(result.error_message)).ToHandleChecked();
-    CreateRegExpErrorObjectAndThrow(re, is_ascii, error_message, isolate);
+    CreateRegExpErrorObjectAndThrow(re, error_message, isolate);
      return false;
    }
  
    Handle<FixedArray> data = Handle<FixedArray>(FixedArray::cast(re->data()));
-  data->set(JSRegExp::code_index(is_ascii), result.code);
+  data->set(JSRegExp::code_index(is_one_byte), result.code);
    int register_max = IrregexpMaxRegisterCount(*data);
    if (result.num_registers > register_max) {
      SetIrregexpMaxRegisterCount(*data, result.num_registers);
@@ -484,13 +477,13 @@ int RegExpImpl::IrregexpNumberOfRegisters(FixedArray* re) {
  }
  
  
-ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_ascii) {
-  return ByteArray::cast(re->get(JSRegExp::code_index(is_ascii)));
+ByteArray* RegExpImpl::IrregexpByteCode(FixedArray* re, bool is_one_byte) {
+  return ByteArray::cast(re->get(JSRegExp::code_index(is_one_byte)));
  }
  
  
-Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_ascii) {
-  return Code::cast(re->get(JSRegExp::code_index(is_ascii)));
+Code* RegExpImpl::IrregexpNativeCode(FixedArray* re, bool is_one_byte) {
+  return Code::cast(re->get(JSRegExp::code_index(is_one_byte)));
  }
  
  
@@ -511,9 +504,9 @@ int RegExpImpl::IrregexpPrepare(Handle<JSRegExp> regexp,
                                  Handle<String> subject) {
    subject = String::Flatten(subject);
  
-  // Check the asciiness of the underlying storage.
-  bool is_ascii = subject->IsOneByteRepresentationUnderneath();
-  if (!EnsureCompiledIrregexp(regexp, subject, is_ascii)) return -1;
+  // Check representation of the underlying storage.
+  bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
+  if (!EnsureCompiledIrregexp(regexp, subject, is_one_byte)) return -1;
  
  #ifdef V8_INTERPRETED_REGEXP
    // Byte-code regexp needs space allocated for all its registers.
@@ -543,13 +536,13 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
    DCHECK(index <= subject->length());
    DCHECK(subject->IsFlat());
  
-  bool is_ascii = subject->IsOneByteRepresentationUnderneath();
+  bool is_one_byte = subject->IsOneByteRepresentationUnderneath();
  
  #ifndef V8_INTERPRETED_REGEXP
    DCHECK(output_size >= (IrregexpNumberOfCaptures(*irregexp) + 1) * 2);
    do {
-    EnsureCompiledIrregexp(regexp, subject, is_ascii);
-    Handle<Code> code(IrregexpNativeCode(*irregexp, is_ascii), isolate);
+    EnsureCompiledIrregexp(regexp, subject, is_one_byte);
+    Handle<Code> code(IrregexpNativeCode(*irregexp, is_one_byte), isolate);
      // The stack is used to allocate registers for the compiled regexp code.
      // This means that in case of failure, the output registers array is left
      // untouched and contains the capture results from the previous successful
@@ -576,10 +569,10 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
      // must restart from scratch.
      // In this case, it means we must make sure we are prepared to handle
      // the, potentially, different subject (the string can switch between
-    // being internal and external, and even between being ASCII and UC16,
+    // being internal and external, and even between being Latin1 and UC16,
      // but the characters are always the same).
      IrregexpPrepare(regexp, subject);
-    is_ascii = subject->IsOneByteRepresentationUnderneath();
+    is_one_byte = subject->IsOneByteRepresentationUnderneath();
    } while (true);
    UNREACHABLE();
    return RE_EXCEPTION;
@@ -597,7 +590,8 @@ int RegExpImpl::IrregexpExecRaw(Handle<JSRegExp> regexp,
    for (int i = number_of_capture_registers - 1; i >= 0; i--) {
      raw_output[i] = -1;
    }
-  Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_ascii), isolate);
+  Handle<ByteArray> byte_codes(IrregexpByteCode(*irregexp, is_one_byte),
+                               isolate);
  
    IrregexpResult result = IrregexpInterpreter::Match(isolate,
                                                       byte_codes,
@@ -998,7 +992,7 @@ class FrequencyCollator {
  
  class RegExpCompiler {
   public:
-  RegExpCompiler(int capture_count, bool ignore_case, bool is_ascii,
+  RegExpCompiler(int capture_count, bool ignore_case, bool is_one_byte,
                   Zone* zone);
  
    int AllocateRegister() {
@@ -1031,7 +1025,7 @@ class RegExpCompiler {
    void SetRegExpTooBig() { reg_exp_too_big_ = true; }
  
    inline bool ignore_case() { return ignore_case_; }
-  inline bool ascii() { return ascii_; }
+  inline bool one_byte() { return one_byte_; }
    FrequencyCollator* frequency_collator() { return &frequency_collator_; }
  
    int current_expansion_factor() { return current_expansion_factor_; }
@@ -1050,7 +1044,7 @@ class RegExpCompiler {
    int recursion_depth_;
    RegExpMacroAssembler* macro_assembler_;
    bool ignore_case_;
-  bool ascii_;
+  bool one_byte_;
    bool reg_exp_too_big_;
    int current_expansion_factor_;
    FrequencyCollator frequency_collator_;
@@ -1076,13 +1070,13 @@ static RegExpEngine::CompilationResult IrregexpRegExpTooBig(Isolate* isolate) {
  
  // Attempts to compile the regexp using an Irregexp code generator.  Returns
  // a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case, bool ascii,
-                               Zone* zone)
+RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case,
+                               bool one_byte, Zone* zone)
      : next_register_(2 * (capture_count + 1)),
        work_list_(NULL),
        recursion_depth_(0),
        ignore_case_(ignore_case),
-      ascii_(ascii),
+      one_byte_(one_byte),
        reg_exp_too_big_(false),
        current_expansion_factor_(1),
        frequency_collator_(),
@@ -1593,9 +1587,8 @@ void ChoiceNode::GenerateGuard(RegExpMacroAssembler* macro_assembler,
  
  // Returns the number of characters in the equivalence class, omitting those
  // that cannot occur in the source string because it is ASCII.
-static int GetCaseIndependentLetters(Isolate* isolate,
-                                     uc16 character,
-                                     bool ascii_subject,
+static int GetCaseIndependentLetters(Isolate* isolate, uc16 character,
+                                     bool one_byte_subject,
                                       unibrow::uchar* letters) {
    int length =
        isolate->jsregexp_uncanonicalize()->get(character, '\0', letters);
@@ -1605,11 +1598,14 @@ static int GetCaseIndependentLetters(Isolate* isolate,
      letters[0] = character;
      length = 1;
    }
-  if (!ascii_subject || character <= String::kMaxOneByteCharCode) {
+  if (!one_byte_subject || character <= String::kMaxOneByteCharCode) {
      return length;
    }
+
    // The standard requires that non-ASCII characters cannot have ASCII
    // character codes in their equivalence class.
+  // TODO(dcarney): issue 3550 this is not actually true for Latin1 anymore,
+  // is it?  For example, \u00C5 is equivalent to \u212B.
    return 0;
  }
  
@@ -1645,18 +1641,19 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,
                                       bool check,
                                       bool preloaded) {
    RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
-  bool ascii = compiler->ascii();
+  bool one_byte = compiler->one_byte();
    unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-  int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
+  int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
    if (length < 1) {
-    // This can't match.  Must be an ASCII subject and a non-ASCII character.
-    // We do not need to do anything since the ASCII pass already handled this.
+    // This can't match.  Must be an one-byte subject and a non-one-byte
+    // character.  We do not need to do anything since the one-byte pass
+    // already handled this.
      return false;  // Bounds not checked.
    }
    bool checked = false;
    // We handle the length > 1 case in a later pass.
    if (length == 1) {
-    if (ascii && c > String::kMaxOneByteCharCodeU) {
+    if (one_byte && c > String::kMaxOneByteCharCodeU) {
        // Can't match - see above.
        return false;  // Bounds not checked.
      }
@@ -1671,12 +1668,10 @@ static inline bool EmitAtomNonLetter(Isolate* isolate,
  
  
  static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
-                                      bool ascii,
-                                      uc16 c1,
-                                      uc16 c2,
+                                      bool one_byte, uc16 c1, uc16 c2,
                                        Label* on_failure) {
    uc16 char_mask;
-  if (ascii) {
+  if (one_byte) {
      char_mask = String::kMaxOneByteCharCode;
    } else {
      char_mask = String::kMaxUtf16CodeUnit;
@@ -1727,9 +1722,9 @@ static inline bool EmitAtomLetter(Isolate* isolate,
                                    bool check,
                                    bool preloaded) {
    RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
-  bool ascii = compiler->ascii();
+  bool one_byte = compiler->one_byte();
    unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-  int length = GetCaseIndependentLetters(isolate, c, ascii, chars);
+  int length = GetCaseIndependentLetters(isolate, c, one_byte, chars);
    if (length <= 1) return false;
    // We may not need to check against the end of the input string
    // if this character lies before a character that matched.
@@ -1740,11 +1735,8 @@ static inline bool EmitAtomLetter(Isolate* isolate,
    DCHECK(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
    switch (length) {
      case 2: {
-      if (ShortCutEmitCharacterPair(macro_assembler,
-                                    ascii,
-                                    chars[0],
-                                    chars[1],
-                                    on_failure)) {
+      if (ShortCutEmitCharacterPair(macro_assembler, one_byte, chars[0],
+                                    chars[1], on_failure)) {
        } else {
          macro_assembler->CheckCharacter(chars[0], &ok);
          macro_assembler->CheckNotCharacter(chars[1], on_failure);
@@ -1919,7 +1911,7 @@ static void SplitSearchSpace(ZoneList<int>* ranges,
    // new_start_index is the index of the first edge that is beyond the
    // current kSize space.
  
-  // For very large search spaces we do a binary chop search of the non-ASCII
+  // For very large search spaces we do a binary chop search of the non-Latin1
    // space instead of just going to the end of the current kSize space.  The
    // heuristics are complicated a little by the fact that any 128-character
    // encoding space can be quickly tested with a table lookup, so we don't
@@ -1928,14 +1920,13 @@ static void SplitSearchSpace(ZoneList<int>* ranges,
    // for example, we only want to match every second character (eg. the lower
    // case characters on some Unicode pages).
    int binary_chop_index = (end_index + start_index) / 2;
-  // The first test ensures that we get to the code that handles the ASCII
+  // The first test ensures that we get to the code that handles the Latin1
    // range with a single not-taken branch, speeding up this important
-  // character range (even non-ASCII charset-based text has spaces and
+  // character range (even non-Latin1 charset-based text has spaces and
    // punctuation).
-  if (*border - 1 > String::kMaxOneByteCharCode &&  // ASCII case.
+  if (*border - 1 > String::kMaxOneByteCharCode &&  // Latin1 case.
        end_index - start_index > (*new_start_index - start_index) * 2 &&
-      last - first > kSize * 2 &&
-      binary_chop_index > *new_start_index &&
+      last - first > kSize * 2 && binary_chop_index > *new_start_index &&
        ranges->at(binary_chop_index) >= first + 2 * kSize) {
      int scan_forward_for_section_border = binary_chop_index;;
      int new_border = (ranges->at(binary_chop_index) | kMask) + 1;
@@ -2122,20 +2113,16 @@ static void GenerateBranches(RegExpMacroAssembler* masm,
  
  
  static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
-                          RegExpCharacterClass* cc,
-                          bool ascii,
-                          Label* on_failure,
-                          int cp_offset,
-                          bool check_offset,
-                          bool preloaded,
-                          Zone* zone) {
+                          RegExpCharacterClass* cc, bool one_byte,
+                          Label* on_failure, int cp_offset, bool check_offset,
+                          bool preloaded, Zone* zone) {
    ZoneList<CharacterRange>* ranges = cc->ranges(zone);
    if (!CharacterRange::IsCanonical(ranges)) {
      CharacterRange::Canonicalize(ranges);
    }
  
    int max_char;
-  if (ascii) {
+  if (one_byte) {
      max_char = String::kMaxOneByteCharCode;
    } else {
      max_char = String::kMaxUtf16CodeUnit;
@@ -2456,6 +2443,7 @@ bool QuickCheckDetails::Rationalize(bool asc) {
  
  
  bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
+                                Trace* bounds_check_trace,
                                  Trace* trace,
                                  bool preload_has_checked_bounds,
                                  Label* on_possible_success,
@@ -2465,7 +2453,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
    GetQuickCheckDetails(
        details, compiler, 0, trace->at_start() == Trace::FALSE_VALUE);
    if (details->cannot_match()) return false;
-  if (!details->Rationalize(compiler->ascii())) return false;
+  if (!details->Rationalize(compiler->one_byte())) return false;
    DCHECK(details->characters() == 1 ||
           compiler->macro_assembler()->CanReadUnaligned());
    uint32_t mask = details->mask();
@@ -2474,8 +2462,13 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
    RegExpMacroAssembler* assembler = compiler->macro_assembler();
  
    if (trace->characters_preloaded() != details->characters()) {
+    DCHECK(trace->cp_offset() == bounds_check_trace->cp_offset());
+    // We are attempting to preload the minimum number of characters
+    // any choice would eat, so if the bounds check fails, then none of the
+    // choices can succeed, so we can just immediately backtrack, rather
+    // than go to the next choice.
      assembler->LoadCurrentCharacter(trace->cp_offset(),
-                                    trace->backtrack(),
+                                    bounds_check_trace->backtrack(),
                                      !preload_has_checked_bounds,
                                      details->characters());
    }
@@ -2487,7 +2480,7 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
      // If number of characters preloaded is 1 then we used a byte or 16 bit
      // load so the value is already masked down.
      uint32_t char_mask;
-    if (compiler->ascii()) {
+    if (compiler->one_byte()) {
        char_mask = String::kMaxOneByteCharCode;
      } else {
        char_mask = String::kMaxUtf16CodeUnit;
@@ -2495,11 +2488,11 @@ bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
      if ((mask & char_mask) == char_mask) need_mask = false;
      mask &= char_mask;
    } else {
-    // For 2-character preloads in ASCII mode or 1-character preloads in
-    // TWO_BYTE mode we also use a 16 bit load with zero extend.
-    if (details->characters() == 2 && compiler->ascii()) {
+    // For 2-character preloads in one-byte mode or 1-character preloads in
+    // two-byte mode we also use a 16 bit load with zero extend.
+    if (details->characters() == 2 && compiler->one_byte()) {
        if ((mask & 0xffff) == 0xffff) need_mask = false;
-    } else if (details->characters() == 1 && !compiler->ascii()) {
+    } else if (details->characters() == 1 && !compiler->one_byte()) {
        if ((mask & 0xffff) == 0xffff) need_mask = false;
      } else {
        if (mask == 0xffffffff) need_mask = false;
@@ -2539,7 +2532,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
    DCHECK(characters_filled_in < details->characters());
    int characters = details->characters();
    int char_mask;
-  if (compiler->ascii()) {
+  if (compiler->one_byte()) {
      char_mask = String::kMaxOneByteCharCode;
    } else {
      char_mask = String::kMaxUtf16CodeUnit;
@@ -2553,18 +2546,20 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
              details->positions(characters_filled_in);
          uc16 c = quarks[i];
          if (c > char_mask) {
-          // If we expect a non-ASCII character from an ASCII string,
-          // there is no way we can match. Not even case independent
-          // matching can turn an ASCII character into non-ASCII or
+          // If we expect a non-Latin1 character from an one-byte string,
+          // there is no way we can match. Not even case-independent
+          // matching can turn an Latin1 character into non-Latin1 or
            // vice versa.
+          // TODO(dcarney): issue 3550.  Verify that this works as expected.
+          // For example, \u0178 is uppercase of \u00ff (y-umlaut).
            details->set_cannot_match();
            pos->determines_perfectly = false;
            return;
          }
          if (compiler->ignore_case()) {
            unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-          int length = GetCaseIndependentLetters(isolate, c, compiler->ascii(),
-                                                 chars);
+          int length = GetCaseIndependentLetters(isolate, c,
+                                                 compiler->one_byte(), chars);
            DCHECK(length != 0);  // Can only happen if c > char_mask (see above).
            if (length == 1) {
              // This letter has no case equivalents, so it's nice and simple
@@ -2693,7 +2688,7 @@ void QuickCheckDetails::Clear() {
  }
  
  
-void QuickCheckDetails::Advance(int by, bool ascii) {
+void QuickCheckDetails::Advance(int by, bool one_byte) {
    DCHECK(by >= 0);
    if (by >= characters_) {
      Clear();
@@ -2757,7 +2752,7 @@ class VisitMarker {
  };
  
  
-RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* SeqRegExpNode::FilterOneByte(int depth, bool ignore_case) {
    if (info()->replacement_calculated) return replacement();
    if (depth < 0) return this;
    DCHECK(!info()->visited);
@@ -2767,7 +2762,7 @@ RegExpNode* SeqRegExpNode::FilterASCII(int depth, bool ignore_case) {
  
  
  RegExpNode* SeqRegExpNode::FilterSuccessor(int depth, bool ignore_case) {
-  RegExpNode* next = on_success_->FilterASCII(depth - 1, ignore_case);
+  RegExpNode* next = on_success_->FilterOneByte(depth - 1, ignore_case);
    if (next == NULL) return set_replacement(NULL);
    on_success_ = next;
    return set_replacement(this);
@@ -2791,7 +2786,7 @@ static bool RangesContainLatin1Equivalents(ZoneList<CharacterRange>* ranges) {
  }
  
  
-RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* TextNode::FilterOneByte(int depth, bool ignore_case) {
    if (info()->replacement_calculated) return replacement();
    if (depth < 0) return this;
    DCHECK(!info()->visited);
@@ -2845,7 +2840,7 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
  }
  
  
-RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* LoopChoiceNode::FilterOneByte(int depth, bool ignore_case) {
    if (info()->replacement_calculated) return replacement();
    if (depth < 0) return this;
    if (info()->visited) return this;
@@ -2853,17 +2848,17 @@ RegExpNode* LoopChoiceNode::FilterASCII(int depth, bool ignore_case) {
      VisitMarker marker(info());
  
      RegExpNode* continue_replacement =
-        continue_node_->FilterASCII(depth - 1, ignore_case);
+        continue_node_->FilterOneByte(depth - 1, ignore_case);
      // If we can't continue after the loop then there is no sense in doing the
      // loop.
      if (continue_replacement == NULL) return set_replacement(NULL);
    }
  
-  return ChoiceNode::FilterASCII(depth - 1, ignore_case);
+  return ChoiceNode::FilterOneByte(depth - 1, ignore_case);
  }
  
  
-RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
+RegExpNode* ChoiceNode::FilterOneByte(int depth, bool ignore_case) {
    if (info()->replacement_calculated) return replacement();
    if (depth < 0) return this;
    if (info()->visited) return this;
@@ -2883,7 +2878,7 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
    for (int i = 0; i < choice_count; i++) {
      GuardedAlternative alternative = alternatives_->at(i);
      RegExpNode* replacement =
-        alternative.node()->FilterASCII(depth - 1, ignore_case);
+        alternative.node()->FilterOneByte(depth - 1, ignore_case);
      DCHECK(replacement != this);  // No missing EMPTY_MATCH_CHECK.
      if (replacement != NULL) {
        alternatives_->at(i).set_node(replacement);
@@ -2903,7 +2898,7 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
        new(zone()) ZoneList<GuardedAlternative>(surviving, zone());
    for (int i = 0; i < choice_count; i++) {
      RegExpNode* replacement =
-        alternatives_->at(i).node()->FilterASCII(depth - 1, ignore_case);
+        alternatives_->at(i).node()->FilterOneByte(depth - 1, ignore_case);
      if (replacement != NULL) {
        alternatives_->at(i).set_node(replacement);
        new_alternatives->Add(alternatives_->at(i), zone());
@@ -2914,8 +2909,8 @@ RegExpNode* ChoiceNode::FilterASCII(int depth, bool ignore_case) {
  }
  
  
-RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,
-                                                     bool ignore_case) {
+RegExpNode* NegativeLookaheadChoiceNode::FilterOneByte(int depth,
+                                                       bool ignore_case) {
    if (info()->replacement_calculated) return replacement();
    if (depth < 0) return this;
    if (info()->visited) return this;
@@ -2923,12 +2918,12 @@ RegExpNode* NegativeLookaheadChoiceNode::FilterASCII(int depth,
    // Alternative 0 is the negative lookahead, alternative 1 is what comes
    // afterwards.
    RegExpNode* node = alternatives_->at(1).node();
-  RegExpNode* replacement = node->FilterASCII(depth - 1, ignore_case);
+  RegExpNode* replacement = node->FilterOneByte(depth - 1, ignore_case);
    if (replacement == NULL) return set_replacement(NULL);
    alternatives_->at(1).set_node(replacement);
  
    RegExpNode* neg_node = alternatives_->at(0).node();
-  RegExpNode* neg_replacement = neg_node->FilterASCII(depth - 1, ignore_case);
+  RegExpNode* neg_replacement = neg_node->FilterOneByte(depth - 1, ignore_case);
    // If the negative lookahead is always going to fail then
    // we don't need to check it.
    if (neg_replacement == NULL) return set_replacement(replacement);
@@ -3037,7 +3032,7 @@ static void EmitHat(RegExpCompiler* compiler,
    if (!assembler->CheckSpecialCharacterClass('n',
                                               new_trace.backtrack())) {
      // Newline means \n, \r, 0x2028 or 0x2029.
-    if (!compiler->ascii()) {
+    if (!compiler->one_byte()) {
        assembler->CheckCharacterAfterAnd(0x2028, 0xfffe, &ok);
      }
      assembler->CheckCharacter('\n', &ok);
@@ -3235,7 +3230,7 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
                              int* checked_up_to) {
    RegExpMacroAssembler* assembler = compiler->macro_assembler();
    Isolate* isolate = assembler->zone()->isolate();
-  bool ascii = compiler->ascii();
+  bool one_byte = compiler->one_byte();
    Label* backtrack = trace->backtrack();
    QuickCheckDetails* quick_check = trace->quick_check_performed();
    int element_count = elms_->length();
@@ -3249,8 +3244,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
          if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
          EmitCharacterFunction* emit_function = NULL;
          switch (pass) {
-          case NON_ASCII_MATCH:
-            DCHECK(ascii);
+          case NON_LATIN1_MATCH:
+            DCHECK(one_byte);
              if (quarks[j] > String::kMaxOneByteCharCode) {
                assembler->GoTo(backtrack);
                return;
@@ -3285,14 +3280,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
          if (first_element_checked && i == 0) continue;
          if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
          RegExpCharacterClass* cc = elm.char_class();
-        EmitCharClass(assembler,
-                      cc,
-                      ascii,
-                      backtrack,
-                      cp_offset,
-                      *checked_up_to < cp_offset,
-                      preloaded,
-                      zone());
+        EmitCharClass(assembler, cc, one_byte, backtrack, cp_offset,
+                      *checked_up_to < cp_offset, preloaded, zone());
          UpdateBoundsCheck(cp_offset, checked_up_to);
        }
      }
@@ -3333,9 +3322,9 @@ void TextNode::Emit(RegExpCompiler* compiler, Trace* trace) {
      return;
    }
  
-  if (compiler->ascii()) {
+  if (compiler->one_byte()) {
      int dummy = 0;
-    TextEmitPass(compiler, NON_ASCII_MATCH, false, trace, false, &dummy);
+    TextEmitPass(compiler, NON_LATIN1_MATCH, false, trace, false, &dummy);
    }
  
    bool first_elt_done = false;
@@ -3391,7 +3380,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
    // Adjust the offsets of the quick check performed information.  This
    // information is used to find out what we already determined about the
    // characters by means of mask and compare.
-  quick_check_performed_.Advance(by, compiler->ascii());
+  quick_check_performed_.Advance(by, compiler->one_byte());
    cp_offset_ += by;
    if (cp_offset_ > RegExpMacroAssembler::kMaxCPOffset) {
      compiler->SetRegExpTooBig();
@@ -3401,7 +3390,7 @@ void Trace::AdvanceCurrentPositionInTrace(int by, RegExpCompiler* compiler) {
  }
  
  
-void TextNode::MakeCaseIndependent(bool is_ascii) {
+void TextNode::MakeCaseIndependent(bool is_one_byte) {
    int element_count = elms_->length();
    for (int i = 0; i < element_count; i++) {
      TextElement elm = elms_->at(i);
@@ -3413,7 +3402,7 @@ void TextNode::MakeCaseIndependent(bool is_ascii) {
        ZoneList<CharacterRange>* ranges = cc->ranges(zone());
        int range_count = ranges->length();
        for (int j = 0; j < range_count; j++) {
-        ranges->at(j).AddCaseEquivalents(ranges, is_ascii, zone());
+        ranges->at(j).AddCaseEquivalents(ranges, is_one_byte, zone());
        }
      }
    }
@@ -3441,7 +3430,7 @@ RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
    }
    if (ranges->length() != 1) return NULL;
    uint32_t max_char;
-  if (compiler->ascii()) {
+  if (compiler->one_byte()) {
      max_char = String::kMaxOneByteCharCode;
    } else {
      max_char = String::kMaxUtf16CodeUnit;
@@ -3494,6 +3483,7 @@ void LoopChoiceNode::AddContinueAlternative(GuardedAlternative alt) {
  void LoopChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
    RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
    if (trace->stop_node() == this) {
+    // Back edge of greedy optimized loop node graph.
      int text_length =
          GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
      DCHECK(text_length != kNodeIsTooComplexForGreedyLoops);
@@ -3517,8 +3507,8 @@ int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler,
                                             int eats_at_least) {
    int preload_characters = Min(4, eats_at_least);
    if (compiler->macro_assembler()->CanReadUnaligned()) {
-    bool ascii = compiler->ascii();
-    if (ascii) {
+    bool one_byte = compiler->one_byte();
+    if (one_byte) {
        if (preload_characters > 4) preload_characters = 4;
        // We can't preload 3 characters because there is no machine instruction
        // to do that.  We can't just load 4 because we could be reading
@@ -3588,18 +3578,18 @@ static const int kSpaceRanges[] = { '\t', '\r' + 1, ' ', ' ' + 1,
      0x00A0, 0x00A1, 0x1680, 0x1681, 0x180E, 0x180F, 0x2000, 0x200B,
      0x2028, 0x202A, 0x202F, 0x2030, 0x205F, 0x2060, 0x3000, 0x3001,
      0xFEFF, 0xFF00, 0x10000 };
-static const int kSpaceRangeCount = ARRAY_SIZE(kSpaceRanges);
+static const int kSpaceRangeCount = arraysize(kSpaceRanges);
  
  static const int kWordRanges[] = {
      '0', '9' + 1, 'A', 'Z' + 1, '_', '_' + 1, 'a', 'z' + 1, 0x10000 };
-static const int kWordRangeCount = ARRAY_SIZE(kWordRanges);
+static const int kWordRangeCount = arraysize(kWordRanges);
  static const int kDigitRanges[] = { '0', '9' + 1, 0x10000 };
-static const int kDigitRangeCount = ARRAY_SIZE(kDigitRanges);
+static const int kDigitRangeCount = arraysize(kDigitRanges);
  static const int kSurrogateRanges[] = { 0xd800, 0xe000, 0x10000 };
-static const int kSurrogateRangeCount = ARRAY_SIZE(kSurrogateRanges);
+static const int kSurrogateRangeCount = arraysize(kSurrogateRanges);
  static const int kLineTerminatorRanges[] = { 0x000A, 0x000B, 0x000D, 0x000E,
      0x2028, 0x202A, 0x10000 };
-static const int kLineTerminatorRangeCount = ARRAY_SIZE(kLineTerminatorRanges);
+static const int kLineTerminatorRangeCount = arraysize(kLineTerminatorRanges);
  
  
  void BoyerMoorePositionInfo::Set(int character) {
@@ -3644,7 +3634,7 @@ BoyerMooreLookahead::BoyerMooreLookahead(
      int length, RegExpCompiler* compiler, Zone* zone)
      : length_(length),
        compiler_(compiler) {
-  if (compiler->ascii()) {
+  if (compiler->one_byte()) {
      max_char_ = String::kMaxOneByteCharCode;
    } else {
      max_char_ = String::kMaxUtf16CodeUnit;
@@ -3712,8 +3702,9 @@ int BoyerMooreLookahead::FindBestInterval(
      // dividing by 2 we switch off the skipping if the probability of skipping
      // is less than 50%.  This is because the multibyte mask-and-compare
      // skipping in quickcheck is more likely to do well on this case.
-    bool in_quickcheck_range = ((i - remembered_from < 4) ||
-        (compiler_->ascii() ? remembered_from <= 4 : remembered_from <= 2));
+    bool in_quickcheck_range =
+        ((i - remembered_from < 4) ||
+         (compiler_->one_byte() ? remembered_from <= 4 : remembered_from <= 2));
      // Called 'probability' but it is only a rough estimate and can actually
      // be outside the 0-kSize range.
      int probability = (in_quickcheck_range ? kSize / 2 : kSize) - frequency;
@@ -3760,13 +3751,13 @@ int BoyerMooreLookahead::GetSkipTable(int min_lookahead,
  
  
  // See comment above on the implementation of GetSkipTable.
-bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
+void BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
    const int kSize = RegExpMacroAssembler::kTableSize;
  
    int min_lookahead = 0;
    int max_lookahead = 0;
  
-  if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return false;
+  if (!FindWorthwhileInterval(&min_lookahead, &max_lookahead)) return;
  
    bool found_single_character = false;
    int single_character = 0;
@@ -3790,7 +3781,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
  
    if (found_single_character && lookahead_width == 1 && max_lookahead < 3) {
      // The mask-compare can probably handle this better.
-    return false;
+    return;
    }
  
    if (found_single_character) {
@@ -3807,7 +3798,7 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
      masm->AdvanceCurrentPosition(lookahead_width);
      masm->GoTo(&again);
      masm->Bind(&cont);
-    return true;
+    return;
    }
  
    Factory* factory = masm->zone()->isolate()->factory();
@@ -3823,8 +3814,6 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
    masm->AdvanceCurrentPosition(skip_distance);
    masm->GoTo(&again);
    masm->Bind(&cont);
-
-  return true;
  }
  
  
@@ -3878,10 +3867,12 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
   *     \   F   V
   *      \-----S4
   *
- * For greedy loops we reverse our expectation and expect to match rather
- * than fail. Therefore we want the loop code to look like this (U is the
- * unwind code that steps back in the greedy loop).  The following alternatives
- * look the same as above.
+ * For greedy loops we push the current position, then generate the code that
+ * eats the input specially in EmitGreedyLoop.  The other choice (the
+ * continuation) is generated by the normal code in EmitChoices, and steps back
+ * in the input to the starting position when it fails to match.  The loop code
+ * looks like this (U is the unwind code that steps back in the greedy loop).
+ *
   *              _____
   *             /     \
   *             V     |
@@ -3890,26 +3881,25 @@ bool BoyerMooreLookahead::EmitSkipInstructions(RegExpMacroAssembler* masm) {
   *           / |S    |
   *         F/  \_____/
   *         /
- *        |<-----------
- *        |            \
- *        V             \
- *        Q2 ---> S2     \
- *        |  S   /       |
- *       F|     /        |
- *        |   F/         |
- *        |   /          |
- *        |  R           |
- *        | /            |
- *   F    VL             |
- * <------U              |
- * back   |S             |
- *        \______________/
+ *        |<-----
+ *        |      \
+ *        V       |S
+ *        Q2 ---> U----->backtrack
+ *        |  F   /
+ *       S|     /
+ *        V  F /
+ *        S2--/
   */
  
-void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
-  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
-  int choice_count = alternatives_->length();
+GreedyLoopState::GreedyLoopState(bool not_at_start) {
+  counter_backtrack_trace_.set_backtrack(&label_);
+  if (not_at_start) counter_backtrack_trace_.set_at_start(false);
+}
+
+
+void ChoiceNode::AssertGuardsMentionRegisters(Trace* trace) {
  #ifdef DEBUG
+  int choice_count = alternatives_->length();
    for (int i = 0; i < choice_count - 1; i++) {
      GuardedAlternative alternative = alternatives_->at(i);
      ZoneList<Guard*>* guards = alternative.guards();
@@ -3919,12 +3909,38 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
      }
    }
  #endif
+}
+
+
+void ChoiceNode::SetUpPreLoad(RegExpCompiler* compiler,
+                              Trace* current_trace,
+                              PreloadState* state) {
+    if (state->eats_at_least_ == PreloadState::kEatsAtLeastNotYetInitialized) {
+      // Save some time by looking at most one machine word ahead.
+      state->eats_at_least_ =
+          EatsAtLeast(compiler->one_byte() ? 4 : 2, kRecursionBudget,
+                      current_trace->at_start() == Trace::FALSE_VALUE);
+    }
+    state->preload_characters_ =
+        CalculatePreloadCharacters(compiler, state->eats_at_least_);
+
+    state->preload_is_current_ =
+        (current_trace->characters_preloaded() == state->preload_characters_);
+    state->preload_has_checked_bounds_ = state->preload_is_current_;
+}
+
+
+void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
+  int choice_count = alternatives_->length();
+
+  AssertGuardsMentionRegisters(trace);
  
    LimitResult limit_result = LimitVersions(compiler, trace);
    if (limit_result == DONE) return;
    DCHECK(limit_result == CONTINUE);
  
-  int new_flush_budget = trace->flush_budget() / choice_count;
+  // For loop nodes we already flushed (see LoopChoiceNode::Emit), but for
+  // other choice nodes we only flush if we are out of code size budget.
    if (trace->flush_budget() == 0 && trace->actions() != NULL) {
      trace->Flush(compiler, this);
      return;
@@ -3932,141 +3948,217 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
  
    RecursionCheck rc(compiler);
  
-  Trace* current_trace = trace;
+  PreloadState preload;
+  preload.init();
+  GreedyLoopState greedy_loop_state(not_at_start());
  
-  int text_length = GreedyLoopTextLengthForAlternative(&(alternatives_->at(0)));
-  bool greedy_loop = false;
-  Label greedy_loop_label;
-  Trace counter_backtrack_trace;
-  counter_backtrack_trace.set_backtrack(&greedy_loop_label);
-  if (not_at_start()) counter_backtrack_trace.set_at_start(false);
+  int text_length = GreedyLoopTextLengthForAlternative(&alternatives_->at(0));
+  AlternativeGenerationList alt_gens(choice_count, zone());
  
    if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
-    // Here we have special handling for greedy loops containing only text nodes
-    // and other simple nodes.  These are handled by pushing the current
-    // position on the stack and then incrementing the current position each
-    // time around the switch.  On backtrack we decrement the current position
-    // and check it against the pushed value.  This avoids pushing backtrack
-    // information for each iteration of the loop, which could take up a lot of
-    // space.
-    greedy_loop = true;
-    DCHECK(trace->stop_node() == NULL);
-    macro_assembler->PushCurrentPosition();
-    current_trace = &counter_backtrack_trace;
-    Label greedy_match_failed;
-    Trace greedy_match_trace;
-    if (not_at_start()) greedy_match_trace.set_at_start(false);
-    greedy_match_trace.set_backtrack(&greedy_match_failed);
-    Label loop_label;
-    macro_assembler->Bind(&loop_label);
-    greedy_match_trace.set_stop_node(this);
-    greedy_match_trace.set_loop_label(&loop_label);
-    alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
-    macro_assembler->Bind(&greedy_match_failed);
+    trace = EmitGreedyLoop(compiler,
+                           trace,
+                           &alt_gens,
+                           &preload,
+                           &greedy_loop_state,
+                           text_length);
+  } else {
+    // TODO(erikcorry): Delete this.  We don't need this label, but it makes us
+    // match the traces produced pre-cleanup.
+    Label second_choice;
+    compiler->macro_assembler()->Bind(&second_choice);
+
+    preload.eats_at_least_ = EmitOptimizedUnanchoredSearch(compiler, trace);
+
+    EmitChoices(compiler,
+                &alt_gens,
+                0,
+                trace,
+                &preload);
+  }
+
+  // At this point we need to generate slow checks for the alternatives where
+  // the quick check was inlined.  We can recognize these because the associated
+  // label was bound.
+  int new_flush_budget = trace->flush_budget() / choice_count;
+  for (int i = 0; i < choice_count; i++) {
+    AlternativeGeneration* alt_gen = alt_gens.at(i);
+    Trace new_trace(*trace);
+    // If there are actions to be flushed we have to limit how many times
+    // they are flushed.  Take the budget of the parent trace and distribute
+    // it fairly amongst the children.
+    if (new_trace.actions() != NULL) {
+      new_trace.set_flush_budget(new_flush_budget);
+    }
+    bool next_expects_preload =
+        i == choice_count - 1 ? false : alt_gens.at(i + 1)->expects_preload;
+    EmitOutOfLineContinuation(compiler,
+                              &new_trace,
+                              alternatives_->at(i),
+                              alt_gen,
+                              preload.preload_characters_,
+                              next_expects_preload);
    }
+}
+
+
+Trace* ChoiceNode::EmitGreedyLoop(RegExpCompiler* compiler,
+                                  Trace* trace,
+                                  AlternativeGenerationList* alt_gens,
+                                  PreloadState* preload,
+                                  GreedyLoopState* greedy_loop_state,
+                                  int text_length) {
+  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+  // Here we have special handling for greedy loops containing only text nodes
+  // and other simple nodes.  These are handled by pushing the current
+  // position on the stack and then incrementing the current position each
+  // time around the switch.  On backtrack we decrement the current position
+  // and check it against the pushed value.  This avoids pushing backtrack
+  // information for each iteration of the loop, which could take up a lot of
+  // space.
+  DCHECK(trace->stop_node() == NULL);
+  macro_assembler->PushCurrentPosition();
+  Label greedy_match_failed;
+  Trace greedy_match_trace;
+  if (not_at_start()) greedy_match_trace.set_at_start(false);
+  greedy_match_trace.set_backtrack(&greedy_match_failed);
+  Label loop_label;
+  macro_assembler->Bind(&loop_label);
+  greedy_match_trace.set_stop_node(this);
+  greedy_match_trace.set_loop_label(&loop_label);
+  alternatives_->at(0).node()->Emit(compiler, &greedy_match_trace);
+  macro_assembler->Bind(&greedy_match_failed);
  
    Label second_choice;  // For use in greedy matches.
    macro_assembler->Bind(&second_choice);
  
-  int first_normal_choice = greedy_loop ? 1 : 0;
-
-  bool not_at_start = current_trace->at_start() == Trace::FALSE_VALUE;
-  const int kEatsAtLeastNotYetInitialized = -1;
-  int eats_at_least = kEatsAtLeastNotYetInitialized;
-
-  bool skip_was_emitted = false;
-
-  if (!greedy_loop && choice_count == 2) {
-    GuardedAlternative alt1 = alternatives_->at(1);
-    if (alt1.guards() == NULL || alt1.guards()->length() == 0) {
-      RegExpNode* eats_anything_node = alt1.node();
-      if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) ==
-          this) {
-        // At this point we know that we are at a non-greedy loop that will eat
-        // any character one at a time.  Any non-anchored regexp has such a
-        // loop prepended to it in order to find where it starts.  We look for
-        // a pattern of the form ...abc... where we can look 6 characters ahead
-        // and step forwards 3 if the character is not one of abc.  Abc need
-        // not be atoms, they can be any reasonably limited character class or
-        // small alternation.
-        DCHECK(trace->is_trivial());  // This is the case on LoopChoiceNodes.
-        BoyerMooreLookahead* lookahead = bm_info(not_at_start);
-        if (lookahead == NULL) {
-          eats_at_least = Min(kMaxLookaheadForBoyerMoore,
-                              EatsAtLeast(kMaxLookaheadForBoyerMoore,
-                                          kRecursionBudget,
-                                          not_at_start));
-          if (eats_at_least >= 1) {
-            BoyerMooreLookahead* bm =
-                new(zone()) BoyerMooreLookahead(eats_at_least,
-                                                compiler,
-                                                zone());
-            GuardedAlternative alt0 = alternatives_->at(0);
-            alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, not_at_start);
-            skip_was_emitted = bm->EmitSkipInstructions(macro_assembler);
-          }
-        } else {
-          skip_was_emitted = lookahead->EmitSkipInstructions(macro_assembler);
-        }
-      }
-    }
+  Trace* new_trace = greedy_loop_state->counter_backtrack_trace();
+
+  EmitChoices(compiler,
+              alt_gens,
+              1,
+              new_trace,
+              preload);
+
+  macro_assembler->Bind(greedy_loop_state->label());
+  // If we have unwound to the bottom then backtrack.
+  macro_assembler->CheckGreedyLoop(trace->backtrack());
+  // Otherwise try the second priority at an earlier position.
+  macro_assembler->AdvanceCurrentPosition(-text_length);
+  macro_assembler->GoTo(&second_choice);
+  return new_trace;
+}
+
+int ChoiceNode::EmitOptimizedUnanchoredSearch(RegExpCompiler* compiler,
+                                              Trace* trace) {
+  int eats_at_least = PreloadState::kEatsAtLeastNotYetInitialized;
+  if (alternatives_->length() != 2) return eats_at_least;
+
+  GuardedAlternative alt1 = alternatives_->at(1);
+  if (alt1.guards() != NULL && alt1.guards()->length() != 0) {
+    return eats_at_least;
    }
+  RegExpNode* eats_anything_node = alt1.node();
+  if (eats_anything_node->GetSuccessorOfOmnivorousTextNode(compiler) != this) {
+    return eats_at_least;
+  }
+
+  // Really we should be creating a new trace when we execute this function,
+  // but there is no need, because the code it generates cannot backtrack, and
+  // we always arrive here with a trivial trace (since it's the entry to a
+  // loop.  That also implies that there are no preloaded characters, which is
+  // good, because it means we won't be violating any assumptions by
+  // overwriting those characters with new load instructions.
+  DCHECK(trace->is_trivial());
  
-  if (eats_at_least == kEatsAtLeastNotYetInitialized) {
-    // Save some time by looking at most one machine word ahead.
-    eats_at_least =
-        EatsAtLeast(compiler->ascii() ? 4 : 2, kRecursionBudget, not_at_start);
+  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+  // At this point we know that we are at a non-greedy loop that will eat
+  // any character one at a time.  Any non-anchored regexp has such a
+  // loop prepended to it in order to find where it starts.  We look for
+  // a pattern of the form ...abc... where we can look 6 characters ahead
+  // and step forwards 3 if the character is not one of abc.  Abc need
+  // not be atoms, they can be any reasonably limited character class or
+  // small alternation.
+  BoyerMooreLookahead* bm = bm_info(false);
+  if (bm == NULL) {
+    eats_at_least = Min(kMaxLookaheadForBoyerMoore,
+                        EatsAtLeast(kMaxLookaheadForBoyerMoore,
+                                    kRecursionBudget,
+                                    false));
+    if (eats_at_least >= 1) {
+      bm = new(zone()) BoyerMooreLookahead(eats_at_least,
+                                           compiler,
+                                           zone());
+      GuardedAlternative alt0 = alternatives_->at(0);
+      alt0.node()->FillInBMInfo(0, kRecursionBudget, bm, false);
+    }
+  }
+  if (bm != NULL) {
+    bm->EmitSkipInstructions(macro_assembler);
    }
-  int preload_characters = CalculatePreloadCharacters(compiler, eats_at_least);
+  return eats_at_least;
+}
  
-  bool preload_is_current = !skip_was_emitted &&
-      (current_trace->characters_preloaded() == preload_characters);
-  bool preload_has_checked_bounds = preload_is_current;
  
-  AlternativeGenerationList alt_gens(choice_count, zone());
+void ChoiceNode::EmitChoices(RegExpCompiler* compiler,
+                             AlternativeGenerationList* alt_gens,
+                             int first_choice,
+                             Trace* trace,
+                             PreloadState* preload) {
+  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+  SetUpPreLoad(compiler, trace, preload);
  
    // For now we just call all choices one after the other.  The idea ultimately
    // is to use the Dispatch table to try only the relevant ones.
-  for (int i = first_normal_choice; i < choice_count; i++) {
+  int choice_count = alternatives_->length();
+
+  int new_flush_budget = trace->flush_budget() / choice_count;
+
+  for (int i = first_choice; i < choice_count; i++) {
+    bool is_last = i == choice_count - 1;
+    bool fall_through_on_failure = !is_last;
      GuardedAlternative alternative = alternatives_->at(i);
-    AlternativeGeneration* alt_gen = alt_gens.at(i);
-    alt_gen->quick_check_details.set_characters(preload_characters);
+    AlternativeGeneration* alt_gen = alt_gens->at(i);
+    alt_gen->quick_check_details.set_characters(preload->preload_characters_);
      ZoneList<Guard*>* guards = alternative.guards();
      int guard_count = (guards == NULL) ? 0 : guards->length();
-    Trace new_trace(*current_trace);
-    new_trace.set_characters_preloaded(preload_is_current ?
-                                         preload_characters :
+    Trace new_trace(*trace);
+    new_trace.set_characters_preloaded(preload->preload_is_current_ ?
+                                         preload->preload_characters_ :
                                           0);
-    if (preload_has_checked_bounds) {
-      new_trace.set_bound_checked_up_to(preload_characters);
+    if (preload->preload_has_checked_bounds_) {
+      new_trace.set_bound_checked_up_to(preload->preload_characters_);
      }
      new_trace.quick_check_performed()->Clear();
      if (not_at_start_) new_trace.set_at_start(Trace::FALSE_VALUE);
-    alt_gen->expects_preload = preload_is_current;
+    if (!is_last) {
+      new_trace.set_backtrack(&alt_gen->after);
+    }
+    alt_gen->expects_preload = preload->preload_is_current_;
      bool generate_full_check_inline = false;
      if (FLAG_regexp_optimization &&
-        try_to_emit_quick_check_for_alternative(i) &&
+        try_to_emit_quick_check_for_alternative(i == 0) &&
          alternative.node()->EmitQuickCheck(compiler,
+                                           trace,
                                             &new_trace,
-                                           preload_has_checked_bounds,
+                                           preload->preload_has_checked_bounds_,
                                             &alt_gen->possible_success,
                                             &alt_gen->quick_check_details,
-                                           i < choice_count - 1)) {
+                                           fall_through_on_failure)) {
        // Quick check was generated for this choice.
-      preload_is_current = true;
-      preload_has_checked_bounds = true;
-      // On the last choice in the ChoiceNode we generated the quick
-      // check to fall through on possible success.  So now we need to
-      // generate the full check inline.
-      if (i == choice_count - 1) {
+      preload->preload_is_current_ = true;
+      preload->preload_has_checked_bounds_ = true;
+      // If we generated the quick check to fall through on possible success,
+      // we now need to generate the full check inline.
+      if (!fall_through_on_failure) {
          macro_assembler->Bind(&alt_gen->possible_success);
          new_trace.set_quick_check_performed(&alt_gen->quick_check_details);
-        new_trace.set_characters_preloaded(preload_characters);
-        new_trace.set_bound_checked_up_to(preload_characters);
+        new_trace.set_characters_preloaded(preload->preload_characters_);
+        new_trace.set_bound_checked_up_to(preload->preload_characters_);
          generate_full_check_inline = true;
        }
      } else if (alt_gen->quick_check_details.cannot_match()) {
-      if (i == choice_count - 1 && !greedy_loop) {
+      if (!fall_through_on_failure) {
          macro_assembler->GoTo(trace->backtrack());
        }
        continue;
@@ -4076,13 +4168,10 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
        // previous cases that go here when they fail.  There's no reason to
        // insist that they preload characters since the slow check we are about
        // to generate probably can't use it.
-      if (i != first_normal_choice) {
+      if (i != first_choice) {
          alt_gen->expects_preload = false;
          new_trace.InvalidateCurrentCharacter();
        }
-      if (i < choice_count - 1) {
-        new_trace.set_backtrack(&alt_gen->after);
-      }
        generate_full_check_inline = true;
      }
      if (generate_full_check_inline) {
@@ -4093,38 +4182,10 @@ void ChoiceNode::Emit(RegExpCompiler* compiler, Trace* trace) {
          GenerateGuard(macro_assembler, guards->at(j), &new_trace);
        }
        alternative.node()->Emit(compiler, &new_trace);
-      preload_is_current = false;
+      preload->preload_is_current_ = false;
      }
      macro_assembler->Bind(&alt_gen->after);
    }
-  if (greedy_loop) {
-    macro_assembler->Bind(&greedy_loop_label);
-    // If we have unwound to the bottom then backtrack.
-    macro_assembler->CheckGreedyLoop(trace->backtrack());
-    // Otherwise try the second priority at an earlier position.
-    macro_assembler->AdvanceCurrentPosition(-text_length);
-    macro_assembler->GoTo(&second_choice);
-  }
-
-  // At this point we need to generate slow checks for the alternatives where
-  // the quick check was inlined.  We can recognize these because the associated
-  // label was bound.
-  for (int i = first_normal_choice; i < choice_count - 1; i++) {
-    AlternativeGeneration* alt_gen = alt_gens.at(i);
-    Trace new_trace(*current_trace);
-    // If there are actions to be flushed we have to limit how many times
-    // they are flushed.  Take the budget of the parent trace and distribute
-    // it fairly amongst the children.
-    if (new_trace.actions() != NULL) {
-      new_trace.set_flush_budget(new_flush_budget);
-    }
-    EmitOutOfLineContinuation(compiler,
-                              &new_trace,
-                              alternatives_->at(i),
-                              alt_gen,
-                              preload_characters,
-                              alt_gens.at(i + 1)->expects_preload);
-  }
  }
  
  
@@ -5272,12 +5333,11 @@ void CharacterRange::Split(ZoneList<CharacterRange>* base,
  
  
  void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges,
-                                        bool is_ascii,
-                                        Zone* zone) {
+                                        bool is_one_byte, Zone* zone) {
    Isolate* isolate = zone->isolate();
    uc16 bottom = from();
    uc16 top = to();
-  if (is_ascii && !RangeContainsLatin1Equivalents(*this)) {
+  if (is_one_byte && !RangeContainsLatin1Equivalents(*this)) {
      if (bottom > String::kMaxOneByteCharCode) return;
      if (top > String::kMaxOneByteCharCode) top = String::kMaxOneByteCharCode;
    }
@@ -5696,7 +5756,7 @@ void TextNode::CalculateOffsets() {
  
  void Analysis::VisitText(TextNode* that) {
    if (ignore_case_) {
-    that->MakeCaseIndependent(is_ascii_);
+    that->MakeCaseIndependent(is_one_byte_);
    }
    EnsureAnalyzed(that->on_success());
    if (!has_failed()) {
@@ -5972,18 +6032,13 @@ void DispatchTableConstructor::VisitAction(ActionNode* that) {
  
  
  RegExpEngine::CompilationResult RegExpEngine::Compile(
-    RegExpCompileData* data,
-    bool ignore_case,
-    bool is_global,
-    bool is_multiline,
-    Handle<String> pattern,
-    Handle<String> sample_subject,
-    bool is_ascii,
-    Zone* zone) {
+    RegExpCompileData* data, bool ignore_case, bool is_global,
+    bool is_multiline, bool is_sticky, Handle<String> pattern,
+    Handle<String> sample_subject, bool is_one_byte, Zone* zone) {
    if ((data->capture_count + 1) * 2 - 1 > RegExpMacroAssembler::kMaxRegister) {
      return IrregexpRegExpTooBig(zone->isolate());
    }
-  RegExpCompiler compiler(data->capture_count, ignore_case, is_ascii, zone);
+  RegExpCompiler compiler(data->capture_count, ignore_case, is_one_byte, zone);
  
    // Sample some characters from the middle of the string.
    static const int kSampleSize = 128;
@@ -6006,9 +6061,9 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
    bool is_end_anchored = data->tree->IsAnchoredAtEnd();
    bool is_start_anchored = data->tree->IsAnchoredAtStart();
    int max_length = data->tree->max_match();
-  if (!is_start_anchored) {
+  if (!is_start_anchored && !is_sticky) {
      // Add a .*? at the beginning, outside the body capture, unless
-    // this expression is anchored at the beginning.
+    // this expression is anchored at the beginning or sticky.
      RegExpNode* loop_node =
          RegExpQuantifier::ToNode(0,
                                   RegExpTree::kInfinity,
@@ -6030,18 +6085,18 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
        node = loop_node;
      }
    }
-  if (is_ascii) {
-    node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
+  if (is_one_byte) {
+    node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
      // Do it again to propagate the new nodes to places where they were not
      // put because they had not been calculated yet.
      if (node != NULL) {
-      node = node->FilterASCII(RegExpCompiler::kMaxRecursion, ignore_case);
+      node = node->FilterOneByte(RegExpCompiler::kMaxRecursion, ignore_case);
      }
    }
  
    if (node == NULL) node = new(zone) EndNode(EndNode::BACKTRACK, zone);
    data->node = node;
-  Analysis analysis(ignore_case, is_ascii);
+  Analysis analysis(ignore_case, is_one_byte);
    analysis.EnsureAnalyzed(node);
    if (analysis.has_failed()) {
      const char* error_message = analysis.error_message();
@@ -6053,8 +6108,8 @@ RegExpEngine::CompilationResult RegExpEngine::Compile(
    // Native regexp implementation.
  
    NativeRegExpMacroAssembler::Mode mode =
-      is_ascii ? NativeRegExpMacroAssembler::ASCII
-               : NativeRegExpMacroAssembler::UC16;
+      is_one_byte ? NativeRegExpMacroAssembler::LATIN1
+                  : NativeRegExpMacroAssembler::UC16;
  
  #if V8_TARGET_ARCH_IA32
    RegExpMacroAssemblerIA32 macro_assembler(mode, (data->capture_count + 1) * 2,