* Generate quick checks based on mask and compare for
authorerik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 19 Dec 2008 12:02:34 +0000 (12:02 +0000)
committererik.corry@gmail.com <erik.corry@gmail.com@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 19 Dec 2008 12:02:34 +0000 (12:02 +0000)
  the alternatives in a choice node.  The quick checks
  are conservative in the sense that they only detect
  failure with certainty.  Checks can do 2 or 4 characters
  at a time.
* Inline the quick checks to allow the alternatives to
  be checked without branching in the common case where
  they fail.
Review URL: http://codereview.chromium.org/14194

git-svn-id: http://v8.googlecode.com/svn/branches/bleeding_edge@1005 ce2b1a6d-e550-0410-aec6-3dcde31c8c00

12 files changed:
src/bytecodes-irregexp.h
src/interpreter-irregexp.cc
src/jsregexp.cc
src/jsregexp.h
src/regexp-macro-assembler-ia32.cc
src/regexp-macro-assembler-ia32.h
src/regexp-macro-assembler-irregexp.cc
src/regexp-macro-assembler-irregexp.h
src/regexp-macro-assembler-tracer.cc
src/regexp-macro-assembler-tracer.h
src/regexp-macro-assembler.h
test/mjsunit/regexp.js

index 64a65cb7dcea83f41500747ccfad6ebfe8dfbf1a..5ffc4b36acf1f287c63fba18784dfffce92b8cf9 100644 (file)
@@ -51,23 +51,28 @@ V(ADVANCE_CP,        15, 5) /* advance_cp offset32                          */ \
 V(GOTO,              16, 5) /* goto addr32                                  */ \
 V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32 addr32                         */ \
 V(LOAD_CURRENT_CHAR_UNCHECKED, 18, 5) /* load offset32                      */ \
-V(CHECK_CHAR,        19, 7) /* check_char uc16 addr32                       */ \
-V(CHECK_NOT_CHAR,    20, 7) /* check_not_char uc16 addr32                   */ \
-V(OR_CHECK_NOT_CHAR, 21, 9) /* or_check_not_char uc16 uc16 addr32           */ \
-V(MINUS_OR_CHECK_NOT_CHAR, 22, 9) /* minus_or_check_not_char uc16 uc16 ad...*/ \
-V(CHECK_LT,          23, 7) /* check_lt uc16 addr32                         */ \
-V(CHECK_GT,          24, 7) /* check_gr uc16 addr32                         */ \
-V(CHECK_NOT_BACK_REF, 25, 6) /* check_not_back_ref capture_idx addr32       */ \
-V(CHECK_NOT_BACK_REF_NO_CASE, 26, 6) /* check_not_back_ref_no_case captu... */ \
-V(CHECK_NOT_REGS_EQUAL, 27, 7) /* check_not_regs_equal reg1 reg2 addr32     */ \
-V(LOOKUP_MAP1,       28, 11) /* l_map1 start16 bit_map_addr32 addr32        */ \
-V(LOOKUP_MAP2,       29, 99) /* l_map2 start16 half_nibble_map_addr32*      */ \
-V(LOOKUP_MAP8,       30, 99) /* l_map8 start16 byte_map addr32*             */ \
-V(LOOKUP_HI_MAP8,    31, 99) /* l_himap8 start8 byte_map_addr32 addr32*     */ \
-V(CHECK_REGISTER_LT, 32, 8) /* check_reg_lt register_index value16 addr32   */ \
-V(CHECK_REGISTER_GE, 33, 8) /* check_reg_ge register_index value16 addr32   */ \
-V(CHECK_NOT_AT_START, 34, 5) /* check_not_at_start addr32                   */ \
-V(CHECK_GREEDY,      35, 5) /* check_greedy addr32                          */
+V(LOAD_2_CURRENT_CHARS, 19, 9) /* load offset32 addr32                      */ \
+V(LOAD_2_CURRENT_CHARS_UNCHECKED, 20, 5) /* load offset32                   */ \
+V(LOAD_4_CURRENT_CHARS, 21, 9) /* load offset32 addr32                      */ \
+V(LOAD_4_CURRENT_CHARS_UNCHECKED, 22, 5) /* load offset32                   */ \
+V(CHECK_CHAR,        23, 9) /* check_char uint32 addr32                     */ \
+V(CHECK_NOT_CHAR,    24, 9) /* check_not_char uint32 addr32                 */ \
+V(AND_CHECK_CHAR,    25, 13) /* and_check_char uint32 uint32 addr32         */ \
+V(AND_CHECK_NOT_CHAR, 26, 13) /* and_check_not_char uint32 uint32 addr32    */ \
+V(MINUS_AND_CHECK_NOT_CHAR, 27, 11) /* minus_and_check_not_char uc16 uc16...*/ \
+V(CHECK_LT,          28, 7) /* check_lt uc16 addr32                         */ \
+V(CHECK_GT,          29, 7) /* check_gr uc16 addr32                         */ \
+V(CHECK_NOT_BACK_REF, 30, 6) /* check_not_back_ref capture_idx addr32       */ \
+V(CHECK_NOT_BACK_REF_NO_CASE, 31, 6) /* check_not_back_ref_no_case captu... */ \
+V(CHECK_NOT_REGS_EQUAL, 32, 7) /* check_not_regs_equal reg1 reg2 addr32     */ \
+V(LOOKUP_MAP1,       33, 11) /* l_map1 start16 bit_map_addr32 addr32        */ \
+V(LOOKUP_MAP2,       34, 99) /* l_map2 start16 half_nibble_map_addr32*      */ \
+V(LOOKUP_MAP8,       35, 99) /* l_map8 start16 byte_map addr32*             */ \
+V(LOOKUP_HI_MAP8,    36, 99) /* l_himap8 start8 byte_map_addr32 addr32*     */ \
+V(CHECK_REGISTER_LT, 37, 8) /* check_reg_lt register_index value16 addr32   */ \
+V(CHECK_REGISTER_GE, 38, 8) /* check_reg_ge register_index value16 addr32   */ \
+V(CHECK_NOT_AT_START, 39, 5) /* check_not_at_start addr32                   */ \
+V(CHECK_GREEDY,      40, 5) /* check_greedy addr32                          */
 
 #define DECLARE_BYTECODES(name, code, length) \
   static const int BC_##name = code;
index 14184438ef81f80af9f723ef0def621e652fbb11..13ba619567c6a20dc76f40c9c22c142140902cfa 100644 (file)
@@ -81,17 +81,34 @@ static void TraceInterpreter(const byte* code_base,
                              const byte* pc,
                              int stack_depth,
                              int current_position,
+                             uint32_t current_char,
                              int bytecode_length,
                              const char* bytecode_name) {
   if (FLAG_trace_regexp_bytecodes) {
-    PrintF("pc = %02x, sp = %d, current = %d, bc = %s",
+    bool printable = (current_char < 127 && current_char >= 32);
+    const char* format =
+        printable ?
+        "pc = %02x, sp = %d, curpos = %d, curchar = %08x (%c), bc = %s" :
+        "pc = %02x, sp = %d, curpos = %d, curchar = %08x .%c., bc = %s";
+    PrintF(format,
            pc - code_base,
            stack_depth,
            current_position,
+           current_char,
+           printable ? current_char : '.',
            bytecode_name);
     for (int i = 1; i < bytecode_length; i++) {
       printf(", %02x", pc[i]);
     }
+    printf(" ");
+    for (int i = 1; i < bytecode_length; i++) {
+      unsigned char b = pc[i];
+      if (b < 127 && b >= 32) {
+        printf("%c", b);
+      } else {
+        printf(".");
+      }
+    }
     printf("\n");
   }
 }
@@ -103,6 +120,7 @@ static void TraceInterpreter(const byte* code_base,
                      pc,                                \
                      backtrack_sp - backtrack_stack,    \
                      current,                           \
+                     current_char,                      \
                      BC_##name##_LENGTH,                \
                      #name);
 #else
@@ -117,7 +135,7 @@ static bool RawMatch(const byte* code_base,
                      Vector<const Char> subject,
                      int* registers,
                      int current,
-                     int current_char) {
+                     uint32_t current_char) {
   const byte* pc = code_base;
   static const int kBacktrackStackSize = 10000;
   int backtrack_stack[kBacktrackStackSize];
@@ -233,45 +251,104 @@ static bool RawMatch(const byte* code_base,
         pc += BC_LOAD_CURRENT_CHAR_UNCHECKED_LENGTH;
         break;
       }
+      BYTECODE(LOAD_2_CURRENT_CHARS) {
+        int pos = current + Load32(pc + 1);
+        if (pos + 2 > subject.length()) {
+          pc = code_base + Load32(pc + 5);
+        } else {
+          Char next = subject[pos + 1];
+          current_char =
+              (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
+          pc += BC_LOAD_2_CURRENT_CHARS_LENGTH;
+        }
+        break;
+      }
+      BYTECODE(LOAD_2_CURRENT_CHARS_UNCHECKED) {
+        int pos = current + Load32(pc + 1);
+        Char next = subject[pos + 1];
+        current_char = (subject[pos] | (next << (kBitsPerByte * sizeof(Char))));
+        pc += BC_LOAD_2_CURRENT_CHARS_UNCHECKED_LENGTH;
+        break;
+      }
+      BYTECODE(LOAD_4_CURRENT_CHARS) {
+        ASSERT(sizeof(Char) == 1);
+        int pos = current + Load32(pc + 1);
+        if (pos + 4 > subject.length()) {
+          pc = code_base + Load32(pc + 5);
+        } else {
+          Char next1 = subject[pos + 1];
+          Char next2 = subject[pos + 2];
+          Char next3 = subject[pos + 3];
+          current_char = (subject[pos] |
+                          (next1 << 8) |
+                          (next2 << 16) |
+                          (next3 << 24));
+          pc += BC_LOAD_4_CURRENT_CHARS_LENGTH;
+        }
+        break;
+      }
+      BYTECODE(LOAD_4_CURRENT_CHARS_UNCHECKED) {
+        ASSERT(sizeof(Char) == 1);
+        int pos = current + Load32(pc + 1);
+        Char next1 = subject[pos + 1];
+        Char next2 = subject[pos + 2];
+        Char next3 = subject[pos + 3];
+        current_char = (subject[pos] |
+                        (next1 << 8) |
+                        (next2 << 16) |
+                        (next3 << 24));
+        pc += BC_LOAD_4_CURRENT_CHARS_UNCHECKED_LENGTH;
+        break;
+      }
       BYTECODE(CHECK_CHAR) {
-        int c = Load16(pc + 1);
+        uint32_t c = Load32(pc + 1);
         if (c == current_char) {
-          pc = code_base + Load32(pc + 3);
+          pc = code_base + Load32(pc + 5);
         } else {
           pc += BC_CHECK_CHAR_LENGTH;
         }
         break;
       }
       BYTECODE(CHECK_NOT_CHAR) {
-        int c = Load16(pc + 1);
+        uint32_t c = Load32(pc + 1);
         if (c != current_char) {
-          pc = code_base + Load32(pc + 3);
+          pc = code_base + Load32(pc + 5);
         } else {
           pc += BC_CHECK_NOT_CHAR_LENGTH;
         }
         break;
       }
-      BYTECODE(OR_CHECK_NOT_CHAR) {
-        int c = Load16(pc + 1);
-        if (c != (current_char | Load16(pc + 3))) {
-          pc = code_base + Load32(pc + 5);
+      BYTECODE(AND_CHECK_CHAR) {
+        uint32_t c = Load32(pc + 1);
+        if (c == (current_char & Load32(pc + 5))) {
+          pc = code_base + Load32(pc + 9);
         } else {
-          pc += BC_OR_CHECK_NOT_CHAR_LENGTH;
+          pc += BC_AND_CHECK_CHAR_LENGTH;
         }
         break;
       }
-      BYTECODE(MINUS_OR_CHECK_NOT_CHAR) {
-        int c = Load16(pc + 1);
-        int m = Load16(pc + 3);
-        if (c != ((current_char - m) | m)) {
-          pc = code_base + Load32(pc + 5);
+      BYTECODE(AND_CHECK_NOT_CHAR) {
+        uint32_t c = Load32(pc + 1);
+        if (c != (current_char & Load32(pc + 5))) {
+          pc = code_base + Load32(pc + 9);
+        } else {
+          pc += BC_AND_CHECK_NOT_CHAR_LENGTH;
+        }
+        break;
+      }
+      BYTECODE(MINUS_AND_CHECK_NOT_CHAR) {
+        uint32_t c = Load16(pc + 1);
+        uint32_t minus = Load16(pc + 3);
+        uint32_t mask = Load16(pc + 5);
+        if (c != ((current_char - minus) & mask)) {
+          pc = code_base + Load32(pc + 7);
         } else {
-          pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH;
+          pc += BC_MINUS_AND_CHECK_NOT_CHAR_LENGTH;
         }
         break;
       }
       BYTECODE(CHECK_LT) {
-        int limit = Load16(pc + 1);
+        uint32_t limit = Load16(pc + 1);
         if (current_char < limit) {
           pc = code_base + Load32(pc + 3);
         } else {
@@ -280,7 +357,7 @@ static bool RawMatch(const byte* code_base,
         break;
       }
       BYTECODE(CHECK_GT) {
-        int limit = Load16(pc + 1);
+        uint32_t limit = Load16(pc + 1);
         if (current_char > limit) {
           pc = code_base + Load32(pc + 3);
         } else {
index cf9831426ff2d4c99167e03dbf1dda1e9d528b01..a566b9b45200ac264fbef53cb269c07da5086f87 100644 (file)
@@ -214,24 +214,14 @@ class OffsetsVector {
       vector_ = static_offsets_vector_;
     }
   }
-
-
   inline ~OffsetsVector() {
     if (offsets_vector_length_ > kStaticOffsetsVectorSize) {
       DeleteArray(vector_);
       vector_ = NULL;
     }
   }
-
-
-  inline int* vector() {
-    return vector_;
-  }
-
-
-  inline int length() {
-    return offsets_vector_length_;
-  }
+  inline int* vector() { return vector_; }
+  inline int length() { return offsets_vector_length_; }
 
  private:
   int* vector_;
@@ -803,6 +793,11 @@ Handle<Object> RegExpImpl::IrregexpExec(Handle<JSRegExp> regexp,
   }
 #endif
   LOG(RegExpExecEvent(regexp, previous_index, subject));
+
+  if (!subject->IsFlat(StringShape(*subject))) {
+    FlattenString(subject);
+  }
+
   return IrregexpExecOnce(irregexp,
                           num_captures,
                           subject,
@@ -837,11 +832,12 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
     subject->Flatten(shape);
   }
 
-  do {
+  while (true) {
     if (previous_index > subject->length() || previous_index < 0) {
       // Per ECMA-262 15.10.6.2, if the previous index is greater than the
       // string length, there is no match.
       matches = Factory::null_value();
+      return result;
     } else {
 #ifdef DEBUG
       if (FLAG_trace_regexp_bytecodes) {
@@ -865,17 +861,12 @@ Handle<Object> RegExpImpl::IrregexpExecGlobal(Handle<JSRegExp> regexp,
         if (offsets.vector()[0] == offsets.vector()[1]) {
           previous_index++;
         }
+      } else if (matches->IsNull()) {
+        return result;
+      } else {
+        return matches;
       }
     }
-  } while (matches->IsJSArray());
-
-  // If we exited the loop with an exception, throw it.
-  if (matches->IsNull()) {
-    // Exited loop normally.
-    return result;
-  } else {
-    // Exited loop with the exception in matches.
-    return matches;
   }
 }
 
@@ -886,14 +877,11 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
                                             int previous_index,
                                             int* offsets_vector,
                                             int offsets_vector_length) {
+  ASSERT(subject->IsFlat(StringShape(*subject)));
   bool rc;
 
   int tag = Smi::cast(irregexp->get(kIrregexpImplementationIndex))->value();
 
-  if (!subject->IsFlat(StringShape(*subject))) {
-    FlattenString(subject);
-  }
-
   switch (tag) {
     case RegExpMacroAssembler::kIA32Implementation: {
 #ifndef ARM
@@ -997,9 +985,9 @@ Handle<Object> RegExpImpl::IrregexpExecOnce(Handle<FixedArray> irregexp,
 
   Handle<FixedArray> array = Factory::NewFixedArray(2 * (num_captures+1));
   // The captures come in (start, end+1) pairs.
-  for (int i = 0; i < 2 * (num_captures+1); i += 2) {
+  for (int i = 0; i < 2 * (num_captures + 1); i += 2) {
     array->set(i, Smi::FromInt(offsets_vector[i]));
-    array->set(i+1, Smi::FromInt(offsets_vector[i+1]));
+    array->set(i + 1, Smi::FromInt(offsets_vector[i + 1]));
   }
   return Factory::NewJSArrayWithElements(array);
 }
@@ -1344,25 +1332,26 @@ int GenerationVariant::FindAffectedRegisters(OutSet* affected_registers) {
 }
 
 
-void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* macro,
+void GenerationVariant::PushAffectedRegisters(RegExpMacroAssembler* assembler,
                                               int max_register,
                                               OutSet& affected_registers) {
   for (int reg = 0; reg <= max_register; reg++) {
-    if (affected_registers.Get(reg)) macro->PushRegister(reg);
+    if (affected_registers.Get(reg)) assembler->PushRegister(reg);
   }
 }
 
 
-void GenerationVariant::RestoreAffectedRegisters(RegExpMacroAssembler* macro,
-                                                 int max_register,
-                                                 OutSet& affected_registers) {
+void GenerationVariant::RestoreAffectedRegisters(
+    RegExpMacroAssembler* assembler,
+    int max_register,
+    OutSet& affected_registers) {
   for (int reg = max_register; reg >= 0; reg--) {
-    if (affected_registers.Get(reg)) macro->PopRegister(reg);
+    if (affected_registers.Get(reg)) assembler->PopRegister(reg);
   }
 }
 
 
-void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro,
+void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* assembler,
                                                int max_register,
                                                OutSet& affected_registers) {
   for (int reg = 0; reg <= max_register; reg++) {
@@ -1410,13 +1399,13 @@ void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro,
       }
     }
     if (store_position != -1) {
-      macro->WriteCurrentPositionToRegister(reg, store_position);
+      assembler->WriteCurrentPositionToRegister(reg, store_position);
     } else {
       if (absolute) {
-        macro->SetRegister(reg, value);
+        assembler->SetRegister(reg, value);
       } else {
         if (value != 0) {
-          macro->AdvanceRegister(reg, value);
+          assembler->AdvanceRegister(reg, value);
         }
       }
     }
@@ -1428,14 +1417,19 @@ void GenerationVariant::PerformDeferredActions(RegExpMacroAssembler* macro,
 // nodes.  It normalises the state of the code generator to ensure we can
 // generate generic code.
 bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
-  RegExpMacroAssembler* macro = compiler->macro_assembler();
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
 
-  ASSERT(actions_ != NULL || cp_offset_ != 0 || backtrack() != NULL);
+  ASSERT(actions_ != NULL ||
+         cp_offset_ != 0 ||
+         backtrack() != NULL ||
+         characters_preloaded_ != 0 ||
+         quick_check_performed_.characters() != 0);
 
   if (actions_ == NULL && backtrack() == NULL) {
     // Here we just have some deferred cp advances to fix and we are back to
-    // a normal situation.
-    macro->AdvanceCurrentPosition(cp_offset_);
+    // a normal situation.  We may also have to forget some information gained
+    // through a quick check that was already performed.
+    if (cp_offset_ != 0) assembler->AdvanceCurrentPosition(cp_offset_);
     // Create a new trivial state and generate the node with that.
     GenerationVariant new_state;
     return successor->Emit(compiler, &new_state);
@@ -1444,50 +1438,50 @@ bool GenerationVariant::Flush(RegExpCompiler* compiler, RegExpNode* successor) {
   // Generate deferred actions here along with code to undo them again.
   OutSet affected_registers;
   int max_register = FindAffectedRegisters(&affected_registers);
-  PushAffectedRegisters(macro, max_register, affected_registers);
-  PerformDeferredActions(macro, max_register, affected_registers);
+  PushAffectedRegisters(assembler, max_register, affected_registers);
+  PerformDeferredActions(assembler, max_register, affected_registers);
   if (backtrack() != NULL) {
     // Here we have a concrete backtrack location.  These are set up by choice
     // nodes and so they indicate that we have a deferred save of the current
     // position which we may need to emit here.
-    macro->PushCurrentPosition();
+    assembler->PushCurrentPosition();
   }
   if (cp_offset_ != 0) {
-    macro->AdvanceCurrentPosition(cp_offset_);
+    assembler->AdvanceCurrentPosition(cp_offset_);
   }
 
   // Create a new trivial state and generate the node with that.
   Label undo;
-  macro->PushBacktrack(&undo);
+  assembler->PushBacktrack(&undo);
   GenerationVariant new_state;
   bool ok = successor->Emit(compiler, &new_state);
 
   // On backtrack we need to restore state.
-  macro->Bind(&undo);
+  assembler->Bind(&undo);
   if (!ok) return false;
   if (backtrack() != NULL) {
-    macro->PopCurrentPosition();
+    assembler->PopCurrentPosition();
   }
-  RestoreAffectedRegisters(macro, max_register, affected_registers);
+  RestoreAffectedRegisters(assembler, max_register, affected_registers);
   if (backtrack() == NULL) {
-    macro->Backtrack();
+    assembler->Backtrack();
   } else {
-    macro->GoTo(backtrack());
+    assembler->GoTo(backtrack());
   }
 
   return true;
 }
 
 
-void EndNode::EmitInfoChecks(RegExpMacroAssembler* macro,
+void EndNode::EmitInfoChecks(RegExpMacroAssembler* assembler,
                              GenerationVariant* variant) {
   if (info()->at_end) {
     Label succeed;
     // LoadCurrentCharacter will go to the label if we are at the end of the
     // input string.
-    macro->LoadCurrentCharacter(0, &succeed);
-    macro->GoTo(variant->backtrack());
-    macro->Bind(&succeed);
+    assembler->LoadCurrentCharacter(0, &succeed);
+    assembler->GoTo(variant->backtrack());
+    assembler->Bind(&succeed);
   }
 }
 
@@ -1497,16 +1491,16 @@ bool NegativeSubmatchSuccess::Emit(RegExpCompiler* compiler,
   if (!variant->is_trivial()) {
     return variant->Flush(compiler, this);
   }
-  RegExpMacroAssembler* macro = compiler->macro_assembler();
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
   if (!label()->is_bound()) {
-    macro->Bind(label());
+    assembler->Bind(label());
   }
-  EmitInfoChecks(macro, variant);
-  macro->ReadCurrentPositionFromRegister(current_position_register_);
-  macro->ReadStackPointerFromRegister(stack_pointer_register_);
+  EmitInfoChecks(assembler, variant);
+  assembler->ReadCurrentPositionFromRegister(current_position_register_);
+  assembler->ReadStackPointerFromRegister(stack_pointer_register_);
   // Now that we have unwound the stack we find at the top of the stack the
   // backtrack that the BeginSubmatch node got.
-  macro->Backtrack();
+  assembler->Backtrack();
   return true;
 }
 
@@ -1515,18 +1509,18 @@ bool EndNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
   if (!variant->is_trivial()) {
     return variant->Flush(compiler, this);
   }
-  RegExpMacroAssembler* macro = compiler->macro_assembler();
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
   if (!label()->is_bound()) {
-    macro->Bind(label());
+    assembler->Bind(label());
   }
   switch (action_) {
     case ACCEPT:
-      EmitInfoChecks(macro, variant);
-      macro->Succeed();
+      EmitInfoChecks(assembler, variant);
+      assembler->Succeed();
       return true;
     case BACKTRACK:
       ASSERT(!info()->at_end);
-      macro->GoTo(variant->backtrack());
+      assembler->GoTo(variant->backtrack());
       return true;
     case NEGATIVE_SUBMATCH_SUCCESS:
       // This case is handled in a different virtual method.
@@ -1629,30 +1623,26 @@ static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
 static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
 
 
-static inline void EmitAtomNonLetters(
+// Only emits non-letters (things that don't have case).  Only used for case
+// independent matches.
+static inline bool EmitAtomNonLetter(
     RegExpMacroAssembler* macro_assembler,
-    TextElement elm,
-    Vector<const uc16> quarks,
+    uc16 c,
     Label* on_failure,
     int cp_offset,
-    bool check_offset) {
+    bool check,
+    bool preloaded) {
   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-  // It is vital that this loop is backwards due to the unchecked character
-  // load below.
-  for (int i = quarks.length() - 1; i >= 0; i--) {
-    uc16 c = quarks[i];
-    int length = uncanonicalize.get(c, '\0', chars);
-    if (length <= 1) {
-      if (check_offset && i == quarks.length() - 1) {
-        macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
-      } else {
-        // Here we don't need to check against the end of the input string
-        // since this character lies before a character that matched.
-        macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
-      }
-      macro_assembler->CheckNotCharacter(c, on_failure);
+  int length = uncanonicalize.get(c, '\0', chars);
+  bool checked = false;
+  if (length <= 1) {
+    if (!preloaded) {
+      macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
+      checked = check;
     }
+    macro_assembler->CheckNotCharacter(c, on_failure);
   }
+  return checked;
 }
 
 
@@ -1666,7 +1656,8 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
     // If c1 and c2 differ only by one bit.
     // Ecma262UnCanonicalize always gives the highest number last.
     ASSERT(c2 > c1);
-    macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
+    uc16 mask = String::kMaxUC16CharCode ^ exor;
+    macro_assembler->CheckNotCharacterAfterAnd(c1, mask, on_failure);
     return true;
   }
   ASSERT(c2 > c1);
@@ -1676,65 +1667,63 @@ static bool ShortCutEmitCharacterPair(RegExpMacroAssembler* macro_assembler,
     // subtract the difference from the found character, then do the or
     // trick.  We avoid the theoretical case where negative numbers are
     // involved in order to simplify code generation.
-    macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
-                                                   diff,
-                                                   on_failure);
+    uc16 mask = String::kMaxUC16CharCode ^ diff;
+    macro_assembler->CheckNotCharacterAfterMinusAnd(c1 - diff,
+                                                    diff,
+                                                    mask,
+                                                    on_failure);
     return true;
   }
   return false;
 }
 
 
-static inline void EmitAtomLetters(
+// Only emits letters (things that have case).  Only used for case independent
+// matches.
+static inline bool EmitAtomLetter(
     RegExpMacroAssembler* macro_assembler,
-    TextElement elm,
-    Vector<const uc16> quarks,
+    uc16 c,
     Label* on_failure,
     int cp_offset,
-    bool check_offset) {
+    bool check,
+    bool preloaded) {
   unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
-  // It is vital that this loop is backwards due to the unchecked character
-  // load below.
-  for (int i = quarks.length() - 1; i >= 0; i--) {
-    uc16 c = quarks[i];
-    int length = uncanonicalize.get(c, '\0', chars);
-    if (length <= 1) continue;
-    if (check_offset && i == quarks.length() - 1) {
-      macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
-    } else {
-      // Here we don't need to check against the end of the input string
-      // since this character lies before a character that matched.
-      macro_assembler->LoadCurrentCharacterUnchecked(cp_offset + i);
-    }
-    Label ok;
-    ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
-    switch (length) {
-      case 2: {
-        if (ShortCutEmitCharacterPair(macro_assembler,
-                                      chars[0],
-                                      chars[1],
-                                      on_failure)) {
-        } else {
-          macro_assembler->CheckCharacter(chars[0], &ok);
-          macro_assembler->CheckNotCharacter(chars[1], on_failure);
-          macro_assembler->Bind(&ok);
-        }
-        break;
-      }
-      case 4:
-        macro_assembler->CheckCharacter(chars[3], &ok);
-        // Fall through!
-      case 3:
+  int length = uncanonicalize.get(c, '\0', chars);
+  if (length <= 1) return false;
+  // We may not need to check against the end of the input string
+  // if this character lies before a character that matched.
+  if (!preloaded) {
+    macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check);
+  }
+  Label ok;
+  ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
+  switch (length) {
+    case 2: {
+      if (ShortCutEmitCharacterPair(macro_assembler,
+                                    chars[0],
+                                    chars[1],
+                                    on_failure)) {
+      } else {
         macro_assembler->CheckCharacter(chars[0], &ok);
-        macro_assembler->CheckCharacter(chars[1], &ok);
-        macro_assembler->CheckNotCharacter(chars[2], on_failure);
+        macro_assembler->CheckNotCharacter(chars[1], on_failure);
         macro_assembler->Bind(&ok);
-        break;
-      default:
-        UNREACHABLE();
-        break;
+      }
+      break;
     }
+    case 4:
+      macro_assembler->CheckCharacter(chars[3], &ok);
+      // Fall through!
+    case 3:
+      macro_assembler->CheckCharacter(chars[0], &ok);
+      macro_assembler->CheckCharacter(chars[1], &ok);
+      macro_assembler->CheckNotCharacter(chars[2], on_failure);
+      macro_assembler->Bind(&ok);
+      break;
+    default:
+      UNREACHABLE();
+      break;
   }
+  return true;
 }
 
 
@@ -1743,7 +1732,8 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
                           int cp_offset,
                           Label* on_failure,
                           bool check_offset,
-                          bool ascii) {
+                          bool ascii,
+                          bool preloaded) {
   ZoneList<CharacterRange>* ranges = cc->ranges();
   int max_char;
   if (ascii) {
@@ -1789,15 +1779,11 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
     return;
   }
 
-  if (check_offset) {
-    macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
-  } else {
-    // Here we don't need to check against the end of the input string
-    // since this character lies before a character that matched.
-    macro_assembler->LoadCurrentCharacterUnchecked(cp_offset);
+  if (!preloaded) {
+    macro_assembler->LoadCurrentCharacter(cp_offset, on_failure, check_offset);
   }
 
-  for (int i = 0; i <= last_valid_range; i++) {
+  for (int i = 0; i < last_valid_range; i++) {
     CharacterRange& range = ranges->at(i);
     Label next_range;
     uc16 from = range.from();
@@ -1858,6 +1844,10 @@ static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
 }
 
 
+RegExpNode::~RegExpNode() {
+}
+
+
 RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
                                                   GenerationVariant* variant) {
   // TODO(erikcorry): Implement support.
@@ -1908,112 +1898,580 @@ RegExpNode::LimitResult RegExpNode::LimitVersions(RegExpCompiler* compiler,
 }
 
 
-// This generates the code to match a text node.  A text node can contain
-// straight character sequences (possibly to be matched in a case-independent
-// way) and character classes.  In order to be most efficient we test for the
-// simple things first and then move on to the more complicated things.  The
-// simplest thing is a non-letter or a letter if we are matching case.  The
-// next-most simple thing is a case-independent letter.  The least simple is
-// a character class.  Another optimization is that we test the last one first.
-// If that succeeds we don't need to test for the end of the string when we
-// load other characters.
-bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
-  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
-  Label *backtrack = variant->backtrack();
-  LimitResult limit_result = LimitVersions(compiler, variant);
-  if (limit_result == FAIL) return false;
-  if (limit_result == DONE) return true;
-  ASSERT(limit_result == CONTINUE);
+int ActionNode::EatsAtLeast(int recursion_depth) {
+  if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
+  if (type_ == POSITIVE_SUBMATCH_SUCCESS) return 0;  // Rewinds input!
+  return on_success()->EatsAtLeast(recursion_depth + 1);
+}
 
-  int element_count = elms_->length();
-  ASSERT(element_count != 0);
-  if (info()->at_end) {
-    macro_assembler->GoTo(backtrack);
-    return true;
+
+int TextNode::EatsAtLeast(int recursion_depth) {
+  int answer = Length();
+  if (answer >= 4) return answer;
+  if (recursion_depth > RegExpCompiler::kMaxRecursion) return answer;
+  return answer + on_success()->EatsAtLeast(recursion_depth + 1);
+}
+
+
+int ChoiceNode::EatsAtLeastHelper(int recursion_depth,
+                                  RegExpNode* ignore_this_node) {
+  if (recursion_depth > RegExpCompiler::kMaxRecursion) return 0;
+  int min = 100;
+  int choice_count = alternatives_->length();
+  for (int i = 0; i < choice_count; i++) {
+    RegExpNode* node = alternatives_->at(i).node();
+    if (node == ignore_this_node) continue;
+    int node_eats_at_least = node->EatsAtLeast(recursion_depth + 1);
+    if (node_eats_at_least < min) min = node_eats_at_least;
+  }
+  return min;
+}
+
+
+int LoopChoiceNode::EatsAtLeast(int recursion_depth) {
+  return EatsAtLeastHelper(recursion_depth, loop_node_);
+}
+
+
+int ChoiceNode::EatsAtLeast(int recursion_depth) {
+  return EatsAtLeastHelper(recursion_depth, NULL);
+}
+
+
+// Takes the left-most 1-bit and smears it out, setting all bits to its right.
+static inline uint32_t SmearBitsRight(uint32_t v) {
+  v |= v >> 1;
+  v |= v >> 2;
+  v |= v >> 4;
+  v |= v >> 8;
+  v |= v >> 16;
+  return v;
+}
+
+
+bool QuickCheckDetails::Rationalize(bool asc) {
+  bool found_useful_op = false;
+  uint32_t char_mask;
+  if (asc) {
+    char_mask = String::kMaxAsciiCharCode;
+  } else {
+    char_mask = String::kMaxUC16CharCode;
+  }
+  mask_ = 0;
+  value_ = 0;
+  int char_shift = 0;
+  for (int i = 0; i < characters_; i++) {
+    Position* pos = &positions_[i];
+    if ((pos->mask & String::kMaxAsciiCharCode) != 0) {
+      found_useful_op = true;
+    }
+    mask_ |= (pos->mask & char_mask) << char_shift;
+    value_ |= (pos->value & char_mask) << char_shift;
+    char_shift += asc ? 8 : 16;
+  }
+  return found_useful_op;
+}
+
+
+bool RegExpNode::EmitQuickCheck(RegExpCompiler* compiler,
+                                GenerationVariant* variant,
+                                bool preload_has_checked_bounds,
+                                Label* on_possible_success,
+                                QuickCheckDetails* details,
+                                bool fall_through_on_failure) {
+  if (details->characters() == 0) return false;
+  GetQuickCheckDetails(details, compiler, 0);
+  if (!details->Rationalize(compiler->ascii())) return false;
+  uint32_t mask = details->mask();
+  uint32_t value = details->value();
+
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
+
+  if (variant->characters_preloaded() != details->characters()) {
+    assembler->LoadCurrentCharacter(variant->cp_offset(),
+                                    variant->backtrack(),
+                                    !preload_has_checked_bounds,
+                                    details->characters());
+  }
+
+
+  bool need_mask = true;
+
+  if (details->characters() == 1) {
+    // If number of characters preloaded is 1 then we used a byte or 16 bit
+    // load so the value is already masked down.
+    uint32_t char_mask;
+    if (compiler->ascii()) {
+      char_mask = String::kMaxAsciiCharCode;
+    } else {
+      char_mask = String::kMaxUC16CharCode;
+    }
+    if ((mask & char_mask) == char_mask) need_mask = false;
+  } else {
+    // For 2-character preloads in ASCII mode we also use a 16 bit load with
+    // zero extend.
+    if (details->characters() == 2 && compiler->ascii()) {
+      if ((mask & 0xffff) == 0xffff) need_mask = false;
+    } else {
+      if (mask == 0xffffffff) need_mask = false;
+    }
   }
-  // First check for non-ASCII text.
-  // TODO(plesner): We should do this at node level.
+
+  if (fall_through_on_failure) {
+    if (need_mask) {
+      assembler->CheckCharacterAfterAnd(value, mask, on_possible_success);
+    } else {
+      assembler->CheckCharacter(value, on_possible_success);
+    }
+  } else {
+    if (need_mask) {
+      assembler->CheckNotCharacterAfterAnd(value, mask, variant->backtrack());
+    } else {
+      assembler->CheckNotCharacter(value, variant->backtrack());
+    }
+  }
+  return true;
+}
+
+
+// Here is the meat of GetQuickCheckDetails (see also the comment on the
+// super-class in the .h file).
+//
+// We iterate along the text object, building up for each character a
+// mask and value that can be used to test for a quick failure to match.
+// The masks and values for the positions will be combined into a single
+// machine word for the current character width in order to be used in
+// generating a quick check.
+void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int characters_filled_in) {
+  ASSERT(characters_filled_in < details->characters());
+  int characters = details->characters();
+  int char_mask;
+  int char_shift;
   if (compiler->ascii()) {
-    for (int i = element_count - 1; i >= 0; i--) {
-      TextElement elm = elms_->at(i);
-      if (elm.type == TextElement::ATOM) {
-        Vector<const uc16> quarks = elm.data.u_atom->data();
-        for (int j = quarks.length() - 1; j >= 0; j--) {
-          if (quarks[j] > String::kMaxAsciiCharCode) {
-            macro_assembler->GoTo(backtrack);
-            return true;
+    char_mask = String::kMaxAsciiCharCode;
+    char_shift = 8;
+  } else {
+    char_mask = String::kMaxUC16CharCode;
+    char_shift = 16;
+  }
+  for (int k = 0; k < elms_->length(); k++) {
+    TextElement elm = elms_->at(k);
+    if (elm.type == TextElement::ATOM) {
+      Vector<const uc16> quarks = elm.data.u_atom->data();
+      for (int i = 0; i < characters && i < quarks.length(); i++) {
+        QuickCheckDetails::Position* pos =
+            details->positions(characters_filled_in);
+        if (compiler->ignore_case()) {
+          unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+          uc16 c = quarks[i];
+          int length = uncanonicalize.get(c, '\0', chars);
+          if (length < 2) {
+            // This letter has no case equivalents, so it's nice and simple
+            // and the mask-compare will determine definitely whether we have
+            // a match at this character position.
+            pos->mask = char_mask;
+            pos->value = c;
+            pos->determines_perfectly = true;
+          } else {
+            uint32_t common_bits = char_mask;
+            uint32_t bits = chars[0];
+            for (int j = 1; j < length; j++) {
+              uint32_t differing_bits = ((chars[j] & common_bits) ^ bits);
+              common_bits ^= differing_bits;
+              bits &= common_bits;
+            }
+            // If length is 2 and common bits has only one zero in it then
+            // our mask and compare instruction will determine definitely
+            // whether we have a match at this character position.  Otherwise
+            // it can only be an approximate check.
+            uint32_t one_zero = (common_bits | ~char_mask);
+            if (length == 2 && ((~one_zero) & ((~one_zero) - 1)) == 0) {
+              pos->determines_perfectly = true;
+            }
+            pos->mask = common_bits;
+            pos->value = bits;
           }
+        } else {
+          // Don't ignore case.  Nice simple case where the mask-compare will
+          // determine definitely whether we have a match at this character
+          // position.
+          pos->mask = char_mask;
+          pos->value = quarks[i];
+          pos->determines_perfectly = true;
+        }
+        characters_filled_in++;
+        ASSERT(characters_filled_in <= details->characters());
+        if (characters_filled_in == details->characters()) {
+          return;
         }
+      }
+    } else {
+      QuickCheckDetails::Position* pos =
+          details->positions(characters_filled_in);
+      RegExpCharacterClass* tree = elm.data.u_char_class;
+      ZoneList<CharacterRange>* ranges = tree->ranges();
+      CharacterRange range = ranges->at(0);
+      if (tree->is_negated()) {
+        // A quick check uses multi-character mask and compare.  There is no
+        // useful way to incorporate a negative char class into this scheme
+        // so we just conservatively create a mask and value that will always
+        // succeed.
+        pos->mask = 0;
+        pos->value = 0;
       } else {
-        ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
+        uint32_t differing_bits = (range.from() ^ range.to());
+        // A mask and compare is only perfect if the differing bits form a
+        // number like 00011111 with one single block of trailing 1s.
+        if ((differing_bits & (differing_bits + 1)) == 0) {
+          pos->determines_perfectly = true;
+        }
+        uint32_t common_bits = ~SmearBitsRight(differing_bits);
+        uint32_t bits = (range.from() & common_bits);
+        for (int i = 1; i < ranges->length(); i++) {
+          // Here we are combining more ranges into the mask and compare
+          // value.  With each new range the mask becomes more sparse and
+          // so the chances of a false positive rise.  A character class
+          // with multiple ranges is assumed never to be equivalent to a
+          // mask and compare operation.
+          pos->determines_perfectly = false;
+          CharacterRange range = ranges->at(i);
+          uint32_t new_common_bits = (range.from() ^ range.to());
+          new_common_bits = ~SmearBitsRight(new_common_bits);
+          common_bits &= new_common_bits;
+          bits &= new_common_bits;
+          uint32_t differing_bits = (range.from() & common_bits) ^ bits;
+          common_bits ^= differing_bits;
+          bits &= common_bits;
+        }
+        pos->mask = common_bits;
+        pos->value = bits;
+      }
+      characters_filled_in++;
+      ASSERT(characters_filled_in <= details->characters());
+      if (characters_filled_in == details->characters()) {
+        return;
       }
     }
   }
-  // Second, handle straight character matches.
-  int checked_up_to = -1;
-  for (int i = element_count - 1; i >= 0; i--) {
+  ASSERT(characters_filled_in != details->characters());
+  on_success()-> GetQuickCheckDetails(details, compiler, characters_filled_in);
+}
+
+
+void QuickCheckDetails::Clear() {
+  for (int i = 0; i < characters_; i++) {
+    positions_[i].mask = 0;
+    positions_[i].value = 0;
+    positions_[i].determines_perfectly = false;
+  }
+  characters_ = 0;
+}
+
+
+void QuickCheckDetails::Advance(int by, bool ascii) {
+  ASSERT(by > 0);
+  if (by >= characters_) {
+    Clear();
+    return;
+  }
+  for (int i = 0; i < characters_ - by; i++) {
+    positions_[i] = positions_[by + i];
+  }
+  for (int i = characters_ - by; i < characters_; i++) {
+    positions_[i].mask = 0;
+    positions_[i].value = 0;
+    positions_[i].determines_perfectly = false;
+  }
+  characters_ -= by;
+  // We could change mask_ and value_ here but we would never advance unless
+  // they had already been used in a check and they won't be used again because
+  // it would gain us nothing.  So there's no point.
+}
+
+
+void QuickCheckDetails::Merge(QuickCheckDetails* other, int from_index) {
+  ASSERT(characters_ == other->characters_);
+  for (int i = from_index; i < characters_; i++) {
+    QuickCheckDetails::Position* pos = positions(i);
+    QuickCheckDetails::Position* other_pos = other->positions(i);
+    if (pos->mask != other_pos->mask ||
+        pos->value != other_pos->value ||
+        !other_pos->determines_perfectly) {
+      // Our mask-compare operation will be approximate unless we have the
+      // exact same operation on both sides of the alternation.
+      pos->determines_perfectly = false;
+    }
+    pos->mask &= other_pos->mask;
+    pos->value &= pos->mask;
+    other_pos->value &= pos->mask;
+    uc16 differing_bits = (pos->value ^ other_pos->value);
+    pos->mask &= ~differing_bits;
+    pos->value &= pos->mask;
+  }
+}
+
+
+void LoopChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
+                                          RegExpCompiler* compiler,
+                                          int characters_filled_in) {
+  if (body_can_be_zero_length_) return;
+  return ChoiceNode::GetQuickCheckDetails(details,
+                                          compiler,
+                                          characters_filled_in);
+}
+
+
+void ChoiceNode::GetQuickCheckDetails(QuickCheckDetails* details,
+                                      RegExpCompiler* compiler,
+                                      int characters_filled_in) {
+  int choice_count = alternatives_->length();
+  ASSERT(choice_count > 0);
+  alternatives_->at(0).node()->GetQuickCheckDetails(details,
+                                                    compiler,
+                                                    characters_filled_in);
+  for (int i = 1; i < choice_count; i++) {
+    QuickCheckDetails new_details(details->characters());
+    RegExpNode* node = alternatives_->at(i).node();
+    node->GetQuickCheckDetails(&new_details, compiler, characters_filled_in);
+    // Here we merge the quick match details of the two branches.
+    details->Merge(&new_details, characters_filled_in);
+  }
+}
+
+
+// We call this repeatedly to generate code for each pass over the text node.
+// The passes are in increasing order of difficulty because we hope one
+// of the first passes will fail in which case we are saved the work of the
+// later passes.  for example for the case independent regexp /%[asdfghjkl]a/
+// we will check the '%' in the first pass, the case independent 'a' in the
+// second pass and the character class in the last pass.
+//
+// The passes are done from right to left, so for example to test for /bar/
+// we will first test for an 'r' with offset 2, then an 'a' with offset 1
+// and then a 'b' with offset 0.  This means we can avoid the end-of-input
+// bounds check most of the time.  In the example we only need to check for
+// end-of-input when loading the putative 'r'.
+//
+// A slight complication involves the fact that the first character may already
+// be fetched into a register by the previous node.  In this case we want to
+// do the test for that character first.  We do this in separate passes.  The
+// 'preloaded' argument indicates that we are doing such a 'pass'.  If such a
+// pass has been performed then subsequent passes will have true in
+// first_element_checked to indicate that that character does not need to be
+// checked again.
+//
+// In addition to all this we are passed a GenerationVariant, which can
+// contain an AlternativeGeneration object.  In this AlternativeGeneration
+// object we can see details of any quick check that was already passed in
+// order to get to the code we are now generating.  The quick check can involve
+// loading characters, which means we do not need to recheck the bounds
+// up to the limit the quick check already checked.  In addition the quick
+// check can have involved a mask and compare operation which may simplify
+// or obviate the need for further checks at some character positions.
+void TextNode::TextEmitPass(RegExpCompiler* compiler,
+                            TextEmitPassType pass,
+                            bool preloaded,
+                            GenerationVariant* variant,
+                            bool first_element_checked,
+                            int* checked_up_to) {
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
+  bool ascii = compiler->ascii();
+  Label* backtrack = variant->backtrack();
+  QuickCheckDetails* quick_check = variant->quick_check_performed();
+  int element_count = elms_->length();
+  for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
     TextElement elm = elms_->at(i);
-    ASSERT(elm.cp_offset >= 0);
     int cp_offset = variant->cp_offset() + elm.cp_offset;
     if (elm.type == TextElement::ATOM) {
-      Vector<const uc16> quarks = elm.data.u_atom->data();
-      int last_cp_offset = cp_offset + quarks.length();
-      if (compiler->ignore_case()) {
-        EmitAtomNonLetters(macro_assembler,
-                           elm,
-                           quarks,
-                           backtrack,
-                           cp_offset,
-                           checked_up_to < last_cp_offset);
-      } else {
-        macro_assembler->CheckCharacters(quarks,
-                                         cp_offset,
-                                         backtrack,
-                                         checked_up_to < last_cp_offset);
+      if (pass == NON_ASCII_MATCH ||
+          pass == CHARACTER_MATCH ||
+          pass == CASE_CHARACTER_MATCH) {
+        Vector<const uc16> quarks = elm.data.u_atom->data();
+        for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
+          bool bound_checked = true;  // Most ops will check their bounds.
+          if (first_element_checked && i == 0 && j == 0) continue;
+          if (quick_check != NULL &&
+              elm.cp_offset + j < quick_check->characters() &&
+              quick_check->positions(elm.cp_offset + j)->determines_perfectly) {
+            continue;
+          }
+          if (pass == NON_ASCII_MATCH) {
+            ASSERT(ascii);
+            if (quarks[j] > String::kMaxAsciiCharCode) {
+              assembler->GoTo(backtrack);
+              return;
+            }
+          } else if (pass == CHARACTER_MATCH) {
+            if (compiler->ignore_case()) {
+              bound_checked = EmitAtomNonLetter(assembler,
+                                                quarks[j],
+                                                backtrack,
+                                                cp_offset + j,
+                                                *checked_up_to < cp_offset + j,
+                                                preloaded);
+            } else {
+              if (!preloaded) {
+                assembler->LoadCurrentCharacter(cp_offset + j,
+                                                backtrack,
+                                                *checked_up_to < cp_offset + j);
+              }
+              assembler->CheckNotCharacter(quarks[j], backtrack);
+            }
+          } else {
+            ASSERT_EQ(pass, CASE_CHARACTER_MATCH);
+            ASSERT(compiler->ignore_case());
+            bound_checked = EmitAtomLetter(assembler,
+                                           quarks[j],
+                                           backtrack,
+                                           cp_offset + j,
+                                           *checked_up_to < cp_offset + j,
+                                           preloaded);
+          }
+          if (pass != NON_ASCII_MATCH && bound_checked) {
+            if (cp_offset + j > *checked_up_to) {
+              *checked_up_to = cp_offset + j;
+            }
+          }
+        }
       }
-      if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
     } else {
       ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
-    }
-  }
-  // Third, handle case independent letter matches if any.
-  if (compiler->ignore_case()) {
-    for (int i = element_count - 1; i >= 0; i--) {
-      TextElement elm = elms_->at(i);
-      int cp_offset = variant->cp_offset() + elm.cp_offset;
-      if (elm.type == TextElement::ATOM) {
-        Vector<const uc16> quarks = elm.data.u_atom->data();
-        int last_cp_offset = cp_offset + quarks.length();
-        EmitAtomLetters(macro_assembler,
-                        elm,
-                        quarks,
-                        backtrack,
-                        cp_offset,
-                        checked_up_to < last_cp_offset);
-        if (last_cp_offset > checked_up_to) checked_up_to = last_cp_offset - 1;
+      if (first_element_checked && i == 0) continue;
+      if (quick_check != NULL &&
+          elm.cp_offset < quick_check->characters() &&
+          quick_check->positions(elm.cp_offset)->determines_perfectly) {
+        continue;
+      }
+      if (pass == CHARACTER_CLASS_MATCH) {
+        RegExpCharacterClass* cc = elm.data.u_char_class;
+        EmitCharClass(assembler,
+                      cc,
+                      cp_offset,
+                      backtrack,
+                      *checked_up_to < cp_offset,
+                      ascii,
+                      preloaded);
+        if (cp_offset > *checked_up_to) {
+          *checked_up_to = cp_offset;
+        }
       }
     }
   }
-  // If the fast character matches passed then do the character classes.
-  for (int i = element_count - 1; i >= 0; i--) {
-    TextElement elm = elms_->at(i);
-    int cp_offset = variant->cp_offset() + elm.cp_offset;
-    if (elm.type == TextElement::CHAR_CLASS) {
-      RegExpCharacterClass* cc = elm.data.u_char_class;
-      EmitCharClass(macro_assembler,
-                    cc,
-                    cp_offset,
-                    backtrack,
-                    checked_up_to < cp_offset,
-                    compiler->ascii());
-      if (cp_offset > checked_up_to) checked_up_to = cp_offset;
-    }
+}
+
+
+int TextNode::Length() {
+  TextElement elm = elms_->last();
+  ASSERT(elm.cp_offset >= 0);
+  if (elm.type == TextElement::ATOM) {
+    return elm.cp_offset + elm.data.u_atom->data().length();
+  } else {
+    return elm.cp_offset + 1;
+  }
+}
+
+
+// This generates the code to match a text node.  A text node can contain
+// straight character sequences (possibly to be matched in a case-independent
+// way) and character classes.  For efficiency we do not do this in a single
+// pass from left to right.  Instead we pass over the text node several times,
+// emitting code for some character positions every time.  See the comment on
+// TextEmitPass for details.
+bool TextNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
+  LimitResult limit_result = LimitVersions(compiler, variant);
+  if (limit_result == FAIL) return false;
+  if (limit_result == DONE) return true;
+  ASSERT(limit_result == CONTINUE);
+
+  if (info()->follows_word_interest ||
+      info()->follows_newline_interest ||
+      info()->follows_start_interest) {
+    return false;
+  }
+
+  if (info()->at_end) {
+    compiler->macro_assembler()->GoTo(variant->backtrack());
+    return true;
   }
 
-  GenerationVariant new_variant(*variant);
-  new_variant.set_cp_offset(checked_up_to + 1);
+  if (compiler->ascii()) {
+    int dummy = 0;
+    TextEmitPass(compiler, NON_ASCII_MATCH, false, variant, false, &dummy);
+  }
+
+  bool first_elt_done = false;
+  int bound_checked_to = variant->cp_offset() - 1;
+  QuickCheckDetails* quick_check = variant->quick_check_performed();
+  bound_checked_to += Max(quick_check->characters(),
+                          variant->characters_preloaded());
+
+  // If a character is preloaded into the current character register then
+  // check that now.
+  if (variant->characters_preloaded() == 1) {
+    TextEmitPass(compiler,
+                 CHARACTER_MATCH,
+                 true,
+                 variant,
+                 false,
+                 &bound_checked_to);
+    if (compiler->ignore_case()) {
+      TextEmitPass(compiler,
+                   CASE_CHARACTER_MATCH,
+                   true,
+                   variant,
+                   false,
+                   &bound_checked_to);
+    }
+    TextEmitPass(compiler,
+                 CHARACTER_CLASS_MATCH,
+                 true,
+                 variant,
+                 false,
+                 &bound_checked_to);
+    first_elt_done = true;
+  }
+
+  TextEmitPass(compiler,
+               CHARACTER_MATCH,
+               false,
+               variant,
+               first_elt_done,
+               &bound_checked_to);
+  if (compiler->ignore_case()) {
+    TextEmitPass(compiler,
+                 CASE_CHARACTER_MATCH,
+                 false,
+                 variant,
+                 first_elt_done,
+                 &bound_checked_to);
+  }
+  TextEmitPass(compiler,
+               CHARACTER_CLASS_MATCH,
+               false,
+               variant,
+               first_elt_done,
+               &bound_checked_to);
+
+  GenerationVariant successor_variant(*variant);
+  successor_variant.AdvanceVariant(Length(), compiler->ascii());
   RecursionCheck rc(compiler);
-  return on_success()->Emit(compiler, &new_variant);
+  return on_success()->Emit(compiler, &successor_variant);
+}
+
+
+void GenerationVariant::AdvanceVariant(int by, bool ascii) {
+  ASSERT(by > 0);
+  // We don't have an instruction for shifting the current character register
+  // down or for using a shifted value for anything so lets just forget that
+  // we preloaded any characters into it.
+  characters_preloaded_ = 0;
+  // Adjust the offsets of the quick check performed information.  This
+  // information is used to find out what we already determined about the
+  // characters by means of mask and compare.
+  quick_check_performed_.Advance(by, ascii);
+  cp_offset_ += by;
 }
 
 
@@ -2110,6 +2568,155 @@ bool LoopChoiceNode::Emit(RegExpCompiler* compiler,
 }
 
 
+int ChoiceNode::CalculatePreloadCharacters(RegExpCompiler* compiler) {
+  bool ascii = compiler->ascii();
+  int preload_characters = EatsAtLeast(0);
+#ifdef CAN_READ_UNALIGNED
+  if (ascii) {
+    if (preload_characters > 4) preload_characters = 4;
+    // We can't preload 3 characters because there is no machine instruction
+    // to do that.  We can't just load 4 because we could be reading
+    // beyond the end of the string, which could cause a memory fault.
+    if (preload_characters == 3) preload_characters = 2;
+  } else {
+    if (preload_characters > 2) preload_characters = 2;
+  }
+#else
+  if (preload_characters > 1) preload_characters = 1;
+#endif
+  return preload_characters;
+}
+
+
+// This class is used when generating the alternatives in a choice node.  It
+// records the way the alternative is being code generated.
+class AlternativeGeneration: public Malloced {
+ public:
+  AlternativeGeneration()
+      : possible_success(),
+        expects_preload(false),
+        after(),
+        quick_check_details() { }
+  Label possible_success;
+  bool expects_preload;
+  Label after;
+  QuickCheckDetails quick_check_details;
+};
+
+
+// Creates a list of AlternativeGenerations.  If the list has a reasonable
+// size then it is on the stack, otherwise the excess is on the heap.
+class AlternativeGenerationList {
+ public:
+  explicit AlternativeGenerationList(int count)
+      : alt_gens_(count) {
+    for (int i = 0; i < count && i < kAFew; i++) {
+      alt_gens_.Add(a_few_alt_gens_ + i);
+    }
+    for (int i = kAFew; i < count; i++) {
+      alt_gens_.Add(new AlternativeGeneration());
+    }
+  }
+  ~AlternativeGenerationList() {
+    for (int i = 0; i < alt_gens_.length(); i++) {
+      alt_gens_[i]->possible_success.Unuse();
+      alt_gens_[i]->after.Unuse();
+    }
+    for (int i = kAFew; i < alt_gens_.length(); i++) {
+      delete alt_gens_[i];
+      alt_gens_[i] = NULL;
+    }
+  }
+
+  AlternativeGeneration* at(int i) {
+    return alt_gens_[i];
+  }
+ private:
+  static const int kAFew = 10;
+  ZoneList<AlternativeGeneration*> alt_gens_;
+  AlternativeGeneration a_few_alt_gens_[kAFew];
+};
+
+
+/* Code generation for choice nodes.
+ *
+ * We generate quick checks that do a mask and compare to eliminate a
+ * choice.  If the quick check succeeds then it jumps to the continuation to
+ * do slow checks and check subsequent nodes.  If it fails (the common case)
+ * it falls through to the next choice.
+ *
+ * Here is the desired flow graph.  Nodes directly below each other imply
+ * fallthrough.  Alternatives 1 and 2 have quick checks.  Alternative
+ * 3 doesn't have a quick check so we have to call the slow check.
+ * Nodes are marked Qn for quick checks and Sn for slow checks.  The entire
+ * regexp continuation is generated directly after the Sn node, up to the
+ * next GoTo if we decide to reuse some already generated code.  Some
+ * nodes expect preload_characters to be preloaded into the current
+ * character register.  R nodes do this preloading.  Vertices are marked
+ * F for failures and S for success (possible success in the case of quick
+ * nodes).  L, V, < and > are used as arrow heads.
+ *
+ * ----------> R
+ *             |
+ *             V
+ *            Q1 -----> S1
+ *             |   S   /
+ *            F|      /
+ *             |    F/
+ *             |    /
+ *             |   R
+ *             |  /
+ *             V L
+ *            Q2 -----> S2
+ *             |   S   /
+ *            F|      /
+ *             |    F/
+ *             |    /
+ *             |   R
+ *             |  /
+ *             V L
+ *            S3
+ *             |
+ *            F|
+ *             |
+ *             R
+ *             |
+ * backtrack   V
+ * <----------Q4
+ *   \    F    |
+ *    \        |S
+ *     \   F   V
+ *      \-----S4
+ *
+ * For greedy loops we reverse our expectation and expect to match rather
+ * than fail. Therefore we want the loop code to look like this (U is the
+ * unwind code that steps back in the greedy loop).  The following alternatives
+ * look the same as above.
+ *              _____
+ *             /     \
+ *             V     |
+ * ----------> S1    |
+ *            /|     |
+ *           / |S    |
+ *         F/  \_____/
+ *         /
+ *        |<-----------
+ *        |            \
+ *        V             \
+ *        Q2 ---> S2     \
+ *        |  S   /       |
+ *       F|     /        |
+ *        |   F/         |
+ *        |   /          |
+ *        |  R           |
+ *        | /            |
+ *   F    VL             |
+ * <------U              |
+ * back   |S             |
+ *        \______________/
+ */
+
+
 bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
   RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
   int choice_count = alternatives_->length();
@@ -2136,7 +2743,8 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
   int text_length = GreedyLoopTextLength(&(alternatives_->at(0)));
   bool greedy_loop = false;
   Label greedy_loop_label;
-  GenerationVariant counter_backtrack_variant(&greedy_loop_label);
+  GenerationVariant counter_backtrack_variant;
+  counter_backtrack_variant.set_backtrack(&greedy_loop_label);
   if (choice_count > 1 && text_length != kNodeIsTooComplexForGreedyLoops) {
     // Here we have special handling for greedy loops containing only text nodes
     // and other simple nodes.  These are handled by pushing the current
@@ -2150,7 +2758,8 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
     macro_assembler->PushCurrentPosition();
     current_variant = &counter_backtrack_variant;
     Label greedy_match_failed;
-    GenerationVariant greedy_match_variant(&greedy_match_failed);
+    GenerationVariant greedy_match_variant;
+    greedy_match_variant.set_backtrack(&greedy_match_failed);
     Label loop_label;
     macro_assembler->Bind(&loop_label);
     greedy_match_variant.set_stop_node(this);
@@ -2167,32 +2776,70 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
   Label second_choice;  // For use in greedy matches.
   macro_assembler->Bind(&second_choice);
 
+  int first_normal_choice = greedy_loop ? 1 : 0;
+
+  int preload_characters = CalculatePreloadCharacters(compiler);
+  bool preload_is_current = false;
+  bool preload_has_checked_bounds = false;
+
+  AlternativeGenerationList alt_gens(choice_count);
+
   // For now we just call all choices one after the other.  The idea ultimately
   // is to use the Dispatch table to try only the relevant ones.
-  for (int i = greedy_loop ? 1 : 0; i < choice_count - 1; i++) {
+  for (int i = first_normal_choice; i < choice_count; i++) {
     GuardedAlternative alternative = alternatives_->at(i);
-    Label after;
+    AlternativeGeneration* alt_gen(alt_gens.at(i));
+    alt_gen->quick_check_details.set_characters(preload_characters);
     ZoneList<Guard*>* guards = alternative.guards();
     int guard_count = (guards == NULL) ? 0 : guards->length();
+
     GenerationVariant new_variant(*current_variant);
-    new_variant.set_backtrack(&after);
-    for (int j = 0; j < guard_count; j++) {
-      GenerateGuard(macro_assembler, guards->at(j), &new_variant);
+    new_variant.set_characters_preloaded(preload_is_current ?
+                                         preload_characters :
+                                         0);
+    new_variant.quick_check_performed()->Clear();
+    alt_gen->expects_preload = preload_is_current;
+    bool generate_full_check_inline = false;
+    if (alternative.node()->EmitQuickCheck(compiler,
+                                           &new_variant,
+                                           preload_has_checked_bounds,
+                                           &alt_gen->possible_success,
+                                           &alt_gen->quick_check_details,
+                                           i < choice_count - 1)) {
+      // Quick check was generated for this choice.
+      preload_is_current = true;
+      preload_has_checked_bounds = true;
+      // On the last choice in the ChoiceNode we generated the quick
+      // check to fall through on possible success.  So now we need to
+      // generate the full check inline.
+      if (i == choice_count - 1) {
+        macro_assembler->Bind(&alt_gen->possible_success);
+        new_variant.set_quick_check_performed(&alt_gen->quick_check_details);
+        new_variant.set_characters_preloaded(preload_characters);
+        generate_full_check_inline = true;
+      }
+    } else {
+      // No quick check was generated.  Put the full code here.
+      if (i < choice_count - 1) {
+        new_variant.set_backtrack(&alt_gen->after);
+      }
+      generate_full_check_inline = true;
     }
-    if (!alternative.node()->Emit(compiler, &new_variant)) {
-      after.Unuse();
-      return false;
+    if (generate_full_check_inline) {
+      if (preload_is_current) {
+        new_variant.set_characters_preloaded(preload_characters);
+      }
+      for (int j = 0; j < guard_count; j++) {
+        GenerateGuard(macro_assembler, guards->at(j), &new_variant);
+      }
+      if (!alternative.node()->Emit(compiler, &new_variant)) {
+        greedy_loop_label.Unuse();
+        return false;
+      }
+      preload_is_current = false;
     }
-    macro_assembler->Bind(&after);
+    macro_assembler->Bind(&alt_gen->after);
   }
-  GuardedAlternative alternative = alternatives_->at(choice_count - 1);
-  ZoneList<Guard*>* guards = alternative.guards();
-  int guard_count = (guards == NULL) ? 0 : guards->length();
-  for (int j = 0; j < guard_count; j++) {
-    GenerateGuard(macro_assembler, guards->at(j), current_variant);
-  }
-  bool ok = alternative.node()->Emit(compiler, current_variant);
-  if (!ok) return false;
   if (greedy_loop) {
     macro_assembler->Bind(&greedy_loop_label);
     // If we have unwound to the bottom then backtrack.
@@ -2201,12 +2848,68 @@ bool ChoiceNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
     macro_assembler->AdvanceCurrentPosition(-text_length);
     macro_assembler->GoTo(&second_choice);
   }
+  // At this point we need to generate slow checks for the alternatives where
+  // the quick check was inlined.  We can recognize these because the associated
+  // label was bound.
+  for (int i = first_normal_choice; i < choice_count - 1; i++) {
+    AlternativeGeneration* alt_gen = alt_gens.at(i);
+    if (!EmitOutOfLineContinuation(compiler,
+                                   current_variant,
+                                   alternatives_->at(i),
+                                   alt_gen,
+                                   preload_characters,
+                                   alt_gens.at(i + 1)->expects_preload)) {
+      return false;
+    }
+  }
   return true;
 }
 
 
+bool ChoiceNode::EmitOutOfLineContinuation(RegExpCompiler* compiler,
+                                           GenerationVariant* variant,
+                                           GuardedAlternative alternative,
+                                           AlternativeGeneration* alt_gen,
+                                           int preload_characters,
+                                           bool next_expects_preload) {
+  if (!alt_gen->possible_success.is_linked()) return true;
+
+  RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+  macro_assembler->Bind(&alt_gen->possible_success);
+  GenerationVariant out_of_line_variant(*variant);
+  out_of_line_variant.set_characters_preloaded(preload_characters);
+  out_of_line_variant.set_quick_check_performed(&alt_gen->quick_check_details);
+  ZoneList<Guard*>* guards = alternative.guards();
+  int guard_count = (guards == NULL) ? 0 : guards->length();
+  if (next_expects_preload) {
+    Label reload_current_char;
+    out_of_line_variant.set_backtrack(&reload_current_char);
+    for (int j = 0; j < guard_count; j++) {
+      GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
+    }
+    bool ok = alternative.node()->Emit(compiler, &out_of_line_variant);
+    macro_assembler->Bind(&reload_current_char);
+    // Reload the current character, since the next quick check expects that.
+    // We don't need to check bounds here because we only get into this
+    // code through a quick check which already did the checked load.
+    macro_assembler->LoadCurrentCharacter(variant->cp_offset(),
+                                          NULL,
+                                          false,
+                                          preload_characters);
+    macro_assembler->GoTo(&(alt_gen->after));
+    return ok;
+  } else {
+    out_of_line_variant.set_backtrack(&(alt_gen->after));
+    for (int j = 0; j < guard_count; j++) {
+      GenerateGuard(macro_assembler, guards->at(j), &out_of_line_variant);
+    }
+    return alternative.node()->Emit(compiler, &out_of_line_variant);
+  }
+}
+
+
 bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
-  RegExpMacroAssembler* macro = compiler->macro_assembler();
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
   LimitResult limit_result = LimitVersions(compiler, variant);
   if (limit_result == DONE) return true;
   if (limit_result == FAIL) return false;
@@ -2238,9 +2941,9 @@ bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
     }
     case BEGIN_SUBMATCH:
       if (!variant->is_trivial()) return variant->Flush(compiler, this);
-      macro->WriteCurrentPositionToRegister(
+      assembler->WriteCurrentPositionToRegister(
           data_.u_submatch.current_position_register, 0);
-      macro->WriteStackPointerToRegister(
+      assembler->WriteStackPointerToRegister(
           data_.u_submatch.stack_pointer_register);
       return on_success()->Emit(compiler, variant);
     case POSITIVE_SUBMATCH_SUCCESS:
@@ -2255,13 +2958,13 @@ bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
         Label at_end;
         // Load current character jumps to the label if we are beyond the string
         // end.
-        macro->LoadCurrentCharacter(0, &at_end);
-        macro->GoTo(variant->backtrack());
-        macro->Bind(&at_end);
+        assembler->LoadCurrentCharacter(0, &at_end);
+        assembler->GoTo(variant->backtrack());
+        assembler->Bind(&at_end);
       }
-      macro->ReadCurrentPositionFromRegister(
+      assembler->ReadCurrentPositionFromRegister(
           data_.u_submatch.current_position_register);
-      macro->ReadStackPointerFromRegister(
+      assembler->ReadStackPointerFromRegister(
           data_.u_submatch.stack_pointer_register);
       return on_success()->Emit(compiler, variant);
     default:
@@ -2273,7 +2976,7 @@ bool ActionNode::Emit(RegExpCompiler* compiler, GenerationVariant* variant) {
 
 bool BackReferenceNode::Emit(RegExpCompiler* compiler,
                              GenerationVariant* variant) {
-  RegExpMacroAssembler* macro = compiler->macro_assembler();
+  RegExpMacroAssembler* assembler = compiler->macro_assembler();
   if (!variant->is_trivial()) {
     return variant->Flush(compiler, this);
   }
@@ -2289,12 +2992,15 @@ bool BackReferenceNode::Emit(RegExpCompiler* compiler,
   if (info()->at_end) {
     // If we are constrained to match at the end of the input then succeed
     // iff the back reference is empty.
-    macro->CheckNotRegistersEqual(start_reg_, end_reg_, variant->backtrack());
+    assembler->CheckNotRegistersEqual(start_reg_,
+                                      end_reg_,
+                                      variant->backtrack());
   } else {
     if (compiler->ignore_case()) {
-      macro->CheckNotBackReferenceIgnoreCase(start_reg_, variant->backtrack());
+      assembler->CheckNotBackReferenceIgnoreCase(start_reg_,
+                                                 variant->backtrack());
     } else {
-      macro->CheckNotBackReference(start_reg_, variant->backtrack());
+      assembler->CheckNotBackReference(start_reg_, variant->backtrack());
     }
   }
   return on_success()->Emit(compiler, variant);
@@ -2735,7 +3441,7 @@ RegExpNode* RegExpQuantifier::ToNode(int min,
   bool has_max = max < RegExpTree::kInfinity;
   bool needs_counter = has_min || has_max;
   int reg_ctr = needs_counter ? compiler->AllocateRegister() : -1;
-  LoopChoiceNode* center = new LoopChoiceNode();
+  LoopChoiceNode* center = new LoopChoiceNode(body->min_match() == 0);
   RegExpNode* loop_return = needs_counter
       ? static_cast<RegExpNode*>(ActionNode::IncrementRegister(reg_ctr, center))
       : static_cast<RegExpNode*>(center);
index 17ae6236d94f8fbeb287697236e55a4955f1b47f..513ad5843b79f918fd3364d829ebd8b533c18261 100644 (file)
@@ -536,15 +536,79 @@ class SiblingList {
 };
 
 
+// Details of a quick mask-compare check that can look ahead in the
+// input stream.
+class QuickCheckDetails {
+ public:
+  QuickCheckDetails()
+      : characters_(0),
+        mask_(0),
+        value_(0) { }
+  explicit QuickCheckDetails(int characters)
+      : characters_(characters),
+        mask_(0),
+        value_(0) { }
+  bool Rationalize(bool ascii);
+  // Merge in the information from another branch of an alternation.
+  void Merge(QuickCheckDetails* other, int from_index);
+  // Advance the current position by some amount.
+  void Advance(int by, bool ascii);
+  void Clear();
+  struct Position {
+    Position() : mask(0), value(0), determines_perfectly(false) { }
+    uc16 mask;
+    uc16 value;
+    bool determines_perfectly;
+  };
+  int characters() { return characters_; }
+  void set_characters(int characters) { characters_ = characters; }
+  Position* positions(int index) {
+    ASSERT(index >= 0);
+    ASSERT(index < characters_);
+    return positions_ + index;
+  }
+  uint32_t mask() { return mask_; }
+  uint32_t value() { return value_; }
+
+ private:
+  // How many characters do we have quick check information from.  This is
+  // the same for all branches of a choice node.
+  int characters_;
+  Position positions_[4];
+  // These values are the condensate of the above array after Rationalize().
+  uint32_t mask_;
+  uint32_t value_;
+};
+
+
 class RegExpNode: public ZoneObject {
  public:
   RegExpNode() : variants_generated_(0) { }
-  virtual ~RegExpNode() { }
+  virtual ~RegExpNode();
   virtual void Accept(NodeVisitor* visitor) = 0;
   // Generates a goto to this node or actually generates the code at this point.
   // Until the implementation is complete we will return true for success and
   // false for failure.
   virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant) = 0;
+  // How many characters must this node consume at a minimum in order to
+  // succeed.
+  virtual int EatsAtLeast(int recursion_depth) = 0;
+  // Emits some quick code that checks whether the preloaded characters match.
+  // Falls through on certain failure, jumps to the label on possible success.
+  // If the node cannot make a quick check it does nothing and returns false.
+  bool EmitQuickCheck(RegExpCompiler* compiler,
+                      GenerationVariant* variant,
+                      bool preload_has_checked_bounds,
+                      Label* on_possible_success,
+                      QuickCheckDetails* details_return,
+                      bool fall_through_on_failure);
+  // For a given number of characters this returns a mask and a value.  The
+  // next n characters are anded with the mask and compared with the value.
+  // A comparison failure indicates the node cannot match the next n characters.
+  // A comparison success indicates the node may match.
+  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int characters_filled_in) = 0;
   static const int kNodeIsTooComplexForGreedyLoops = -1;
   virtual int GreedyLoopTextLength() { return kNodeIsTooComplexForGreedyLoops; }
   Label* label() { return &label_; }
@@ -633,6 +697,12 @@ class ActionNode: public SeqRegExpNode {
       RegExpNode* on_success);
   virtual void Accept(NodeVisitor* visitor);
   virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+  virtual int EatsAtLeast(int recursion_depth);
+  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int filled_in) {
+    return on_success()->GetQuickCheckDetails(details, compiler, filled_in);
+  }
   virtual RegExpNode* PropagateForward(NodeInfo* info);
   Type type() { return type_; }
   // TODO(erikcorry): We should allow some action nodes in greedy loops.
@@ -679,6 +749,10 @@ class TextNode: public SeqRegExpNode {
   virtual void Accept(NodeVisitor* visitor);
   virtual RegExpNode* PropagateForward(NodeInfo* info);
   virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+  virtual int EatsAtLeast(int recursion_depth);
+  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int characters_filled_in);
   ZoneList<TextElement>* elements() { return elms_; }
   void MakeCaseIndependent();
   virtual int GreedyLoopTextLength();
@@ -690,6 +764,19 @@ class TextNode: public SeqRegExpNode {
   void CalculateOffsets();
 
  private:
+  enum TextEmitPassType {
+    NON_ASCII_MATCH,
+    CHARACTER_MATCH,
+    CASE_CHARACTER_MATCH,
+    CHARACTER_CLASS_MATCH
+  };
+  void TextEmitPass(RegExpCompiler* compiler,
+                    TextEmitPassType pass,
+                    bool preloaded,
+                    GenerationVariant* variant,
+                    bool first_element_checked,
+                    int* checked_up_to);
+  int Length();
   ZoneList<TextElement>* elms_;
 };
 
@@ -706,6 +793,12 @@ class BackReferenceNode: public SeqRegExpNode {
   int start_register() { return start_reg_; }
   int end_register() { return end_reg_; }
   virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+  virtual int EatsAtLeast(int recursion_depth) { return 0; }
+  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int characters_filled_in) {
+    return;
+  }
   virtual RegExpNode* PropagateForward(NodeInfo* info);
   virtual BackReferenceNode* Clone() { return new BackReferenceNode(*this); }
 
@@ -721,6 +814,13 @@ class EndNode: public RegExpNode {
   explicit EndNode(Action action) : action_(action) { }
   virtual void Accept(NodeVisitor* visitor);
   virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+  virtual int EatsAtLeast(int recursion_depth) { return 0; }
+  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int characters_filled_in) {
+    // Returning 0 from EatsAtLeast should ensure we never get here.
+    UNREACHABLE();
+  }
   virtual RegExpNode* PropagateForward(NodeInfo* info);
   virtual EndNode* Clone() { return new EndNode(*this); }
 
@@ -778,6 +878,9 @@ class GuardedAlternative {
 };
 
 
+class AlternativeGeneration;
+
+
 class ChoiceNode: public RegExpNode {
  public:
   explicit ChoiceNode(int expected_size)
@@ -789,6 +892,11 @@ class ChoiceNode: public RegExpNode {
   ZoneList<GuardedAlternative>* alternatives() { return alternatives_; }
   DispatchTable* GetTable(bool ignore_case);
   virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+  virtual int EatsAtLeast(int recursion_depth);
+  int EatsAtLeastHelper(int recursion_depth, RegExpNode* ignore_this_node);
+  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int characters_filled_in);
   virtual RegExpNode* PropagateForward(NodeInfo* info);
   virtual ChoiceNode* Clone() { return new ChoiceNode(*this); }
 
@@ -805,6 +913,13 @@ class ChoiceNode: public RegExpNode {
   void GenerateGuard(RegExpMacroAssembler* macro_assembler,
                      Guard *guard,
                      GenerationVariant* variant);
+  int CalculatePreloadCharacters(RegExpCompiler* compiler);
+  bool EmitOutOfLineContinuation(RegExpCompiler* compiler,
+                                 GenerationVariant* variant,
+                                 GuardedAlternative alternative,
+                                 AlternativeGeneration* alt_gen,
+                                 int preload_characters,
+                                 bool next_expects_preload);
   DispatchTable* table_;
   bool being_calculated_;
 };
@@ -812,16 +927,22 @@ class ChoiceNode: public RegExpNode {
 
 class LoopChoiceNode: public ChoiceNode {
  public:
-  explicit LoopChoiceNode()
+  explicit LoopChoiceNode(bool body_can_be_zero_length)
       : ChoiceNode(2),
         loop_node_(NULL),
-        continue_node_(NULL) { }
+        continue_node_(NULL),
+        body_can_be_zero_length_(body_can_be_zero_length) { }
   void AddLoopAlternative(GuardedAlternative alt);
   void AddContinueAlternative(GuardedAlternative alt);
   virtual bool Emit(RegExpCompiler* compiler, GenerationVariant* variant);
+  virtual int EatsAtLeast(int recursion_depth);  // Returns 0.
+  virtual void GetQuickCheckDetails(QuickCheckDetails* details,
+                                    RegExpCompiler* compiler,
+                                    int characters_filled_in);
   virtual LoopChoiceNode* Clone() { return new LoopChoiceNode(*this); }
   RegExpNode* loop_node() { return loop_node_; }
   RegExpNode* continue_node() { return continue_node_; }
+  bool body_can_be_zero_length() { return body_can_be_zero_length_; }
   virtual void Accept(NodeVisitor* visitor);
 
  private:
@@ -834,6 +955,7 @@ class LoopChoiceNode: public ChoiceNode {
 
   RegExpNode* loop_node_;
   RegExpNode* continue_node_;
+  bool body_can_be_zero_length_;
 };
 
 
@@ -883,42 +1005,47 @@ class GenerationVariant {
         : DeferredAction(ActionNode::INCREMENT_REGISTER, reg) { }
   };
 
-  explicit GenerationVariant(Label* backtrack)
-      : cp_offset_(0),
-        actions_(NULL),
-        backtrack_(backtrack),
-        stop_node_(NULL),
-        loop_label_(NULL) { }
   GenerationVariant()
       : cp_offset_(0),
         actions_(NULL),
         backtrack_(NULL),
         stop_node_(NULL),
-        loop_label_(NULL) { }
+        loop_label_(NULL),
+        characters_preloaded_(0) { }
   bool Flush(RegExpCompiler* compiler, RegExpNode* successor);
   int cp_offset() { return cp_offset_; }
   DeferredAction* actions() { return actions_; }
   bool is_trivial() {
-    return backtrack_ == NULL && actions_ == NULL && cp_offset_ == 0;
+    return backtrack_ == NULL &&
+           actions_ == NULL &&
+           cp_offset_ == 0 &&
+           characters_preloaded_ == 0 &&
+           quick_check_performed_.characters() == 0;
   }
   Label* backtrack() { return backtrack_; }
   Label* loop_label() { return loop_label_; }
   RegExpNode* stop_node() { return stop_node_; }
-  // These set methods should be used only on new GenerationVariants - the
-  // intention is that GenerationVariants are immutable after creation.
+  int characters_preloaded() { return characters_preloaded_; }
+  QuickCheckDetails* quick_check_performed() { return &quick_check_performed_; }
+  bool mentions_reg(int reg);
+  // These set methods and AdvanceVariant should be used only on new
+  // GenerationVariants - the intention is that GenerationVariants are
+  // immutable after creation.
   void add_action(DeferredAction* new_action) {
     ASSERT(new_action->next_ == NULL);
     new_action->next_ = actions_;
     actions_ = new_action;
   }
-  void set_cp_offset(int new_cp_offset) {
-    ASSERT(new_cp_offset >= cp_offset_);
-    cp_offset_ = new_cp_offset;
-  }
   void set_backtrack(Label* backtrack) { backtrack_ = backtrack; }
   void set_stop_node(RegExpNode* node) { stop_node_ = node; }
   void set_loop_label(Label* label) { loop_label_ = label; }
-  bool mentions_reg(int reg);
+  void set_characters_preloaded(int cpre) { characters_preloaded_ = cpre; }
+  void set_quick_check_performed(QuickCheckDetails* d) {
+    quick_check_performed_ = *d;
+  }
+  void clear_quick_check_performed() {
+  }
+  void AdvanceVariant(int by, bool ascii);
  private:
   int FindAffectedRegisters(OutSet* affected_registers);
   void PerformDeferredActions(RegExpMacroAssembler* macro,
@@ -935,7 +1062,11 @@ class GenerationVariant {
   Label* backtrack_;
   RegExpNode* stop_node_;
   Label* loop_label_;
+  int characters_preloaded_;
+  QuickCheckDetails quick_check_performed_;
 };
+
+
 class NodeVisitor {
  public:
   virtual ~NodeVisitor() { }
index fe38075b4f010ac589051f6d030cc8ac992af986..2fa23bcf164c07ae567947fb93ef11b8e25a277d 100644 (file)
@@ -154,7 +154,7 @@ void RegExpMacroAssemblerIA32::CheckBitmap(uc16 start,
 }
 
 
-void RegExpMacroAssemblerIA32::CheckCharacter(uc16 c, Label* on_equal) {
+void RegExpMacroAssemblerIA32::CheckCharacter(uint32_t c, Label* on_equal) {
   __ cmp(current_character(), c);
   BranchOrBacktrack(equal, on_equal);
 }
@@ -365,28 +365,41 @@ void RegExpMacroAssemblerIA32::CheckNotRegistersEqual(int reg1,
 }
 
 
-void RegExpMacroAssemblerIA32::CheckNotCharacter(uc16 c, Label* on_not_equal) {
+void RegExpMacroAssemblerIA32::CheckNotCharacter(uint32_t c,
+                                                 Label* on_not_equal) {
   __ cmp(current_character(), c);
   BranchOrBacktrack(not_equal, on_not_equal);
 }
 
 
-void RegExpMacroAssemblerIA32::CheckNotCharacterAfterOr(uc16 c,
-                                                        uc16 mask,
-                                                        Label* on_not_equal) {
+void RegExpMacroAssemblerIA32::CheckCharacterAfterAnd(uint32_t c,
+                                                      uint32_t mask,
+                                                      Label* on_equal) {
   __ mov(eax, current_character());
-  __ or_(eax, mask);
+  __ and_(eax, mask);
+  __ cmp(eax, c);
+  BranchOrBacktrack(equal, on_equal);
+}
+
+
+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterAnd(uint32_t c,
+                                                         uint32_t mask,
+                                                         Label* on_not_equal) {
+  __ mov(eax, current_character());
+  __ and_(eax, mask);
   __ cmp(eax, c);
   BranchOrBacktrack(not_equal, on_not_equal);
 }
 
 
-void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusOr(
+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusAnd(
     uc16 c,
+    uc16 minus,
     uc16 mask,
     Label* on_not_equal) {
-  __ lea(eax, Operand(current_character(), -mask));
-  __ or_(eax, mask);
+  ASSERT(minus < String::kMaxUC16CharCode);
+  __ lea(eax, Operand(current_character(), -minus));
+  __ and_(eax, mask);
   __ cmp(eax, c);
   BranchOrBacktrack(not_equal, on_not_equal);
 }
@@ -516,7 +529,7 @@ Handle<Object> RegExpMacroAssemblerIA32::GetCode(Handle<String> source) {
   Label at_start;
   __ cmp(Operand(ebp, kAtStart), Immediate(0));
   __ j(not_equal, &at_start);
-  LoadCurrentCharacterUnchecked(-1);  // Load previous char.
+  LoadCurrentCharacterUnchecked(-1, 1);  // Load previous char.
   __ jmp(&start_label_);
   __ bind(&at_start);
   __ mov(current_character(), '\n');
@@ -631,12 +644,16 @@ RegExpMacroAssembler::IrregexpImplementation
 
 
 void RegExpMacroAssemblerIA32::LoadCurrentCharacter(int cp_offset,
-                                                    Label* on_end_of_input) {
+                                                    Label* on_end_of_input,
+                                                    bool check_bounds,
+                                                    int characters) {
   ASSERT(cp_offset >= 0);
   ASSERT(cp_offset < (1<<30));  // Be sane! (And ensure negation works)
-  __ cmp(edi, -cp_offset * char_size());
-  BranchOrBacktrack(greater_equal, on_end_of_input);
-  LoadCurrentCharacterUnchecked(cp_offset);
+  if (check_bounds) {
+    __ cmp(edi, -(cp_offset + characters) * char_size());
+    BranchOrBacktrack(greater, on_end_of_input);
+  }
+  LoadCurrentCharacterUnchecked(cp_offset, characters);
 }
 
 
@@ -871,13 +888,27 @@ void RegExpMacroAssemblerIA32::CallCFunction(Address function_address,
 }
 
 
-void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset) {
+void RegExpMacroAssemblerIA32::LoadCurrentCharacterUnchecked(int cp_offset,
+                                                             int characters) {
   if (mode_ == ASCII) {
-    __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
+    if (characters == 4) {
+      __ mov(current_character(), Operand(esi, edi, times_1, cp_offset));
+    } else if (characters == 2) {
+      __ movzx_w(current_character(), Operand(esi, edi, times_1, cp_offset));
+    } else {
+      ASSERT(characters == 1);
+      __ movzx_b(current_character(), Operand(esi, edi, times_1, cp_offset));
+    }
   } else {
     ASSERT(mode_ == UC16);
-    __ movzx_w(current_character(),
-               Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
+    if (characters == 2) {
+      __ mov(current_character(),
+             Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
+    } else {
+      ASSERT(characters == 1);
+      __ movzx_w(current_character(),
+                 Operand(esi, edi, times_1, cp_offset * sizeof(uc16)));
+    }
   }
 }
 
index 7c2d5728e469c5eae257f5c24551daf636484813..d639a2eef46235df34836f6f6c1f26da99694805 100644 (file)
@@ -43,7 +43,10 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
   virtual void Backtrack();
   virtual void Bind(Label* label);
   virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
-  virtual void CheckCharacter(uc16 c, Label* on_equal);
+  virtual void CheckCharacter(uint32_t c, Label* on_equal);
+  virtual void CheckCharacterAfterAnd(uint32_t c,
+                                      uint32_t mask,
+                                      Label* on_equal);
   virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
   virtual void CheckCharacterLT(uc16 limit, Label* on_less);
   virtual void CheckCharacters(Vector<const uc16> str,
@@ -56,11 +59,14 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
   virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
                                                Label* on_no_match);
   virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
-  virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
-  virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
-  virtual void CheckNotCharacterAfterMinusOr(uc16 c,
-                                             uc16 mask,
-                                             Label* on_not_equal);
+  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
+  virtual void CheckNotCharacterAfterAnd(uint32_t c,
+                                         uint32_t mask,
+                                         Label* on_not_equal);
+  virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+                                              uc16 minus,
+                                              uc16 mask,
+                                              Label* on_not_equal);
   virtual void DispatchByteMap(uc16 start,
                                Label* byte_map,
                                const Vector<Label*>& destinations);
@@ -77,9 +83,10 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
   virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
   virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
   virtual IrregexpImplementation Implementation();
-  virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
-  virtual void LoadCurrentCharacterUnchecked(int cp_offset);
-
+  virtual void LoadCurrentCharacter(int cp_offset,
+                                    Label* on_end_of_input,
+                                    bool check_bounds = true,
+                                    int characters = 1);
   virtual void PopCurrentPosition();
   virtual void PopRegister(int register_index);
   virtual void PushBacktrack(Label* label);
@@ -135,6 +142,8 @@ class RegExpMacroAssemblerIA32: public RegExpMacroAssembler {
                                         int byte_offset2,
                                         size_t byte_length);
 
+  void LoadCurrentCharacterUnchecked(int cp_offset, int characters);
+
   // Called from RegExp if the stack-guard is triggered.
   // If the code object is relocated, the return address is fixed before
   // returning.
index 44fa33cf45ffd37160e03a471caa0165c4083009..cfcae325a6fe17c59da0dac8c9cfde8c7d2fa334 100644 (file)
@@ -44,6 +44,7 @@ RegExpMacroAssemblerIrregexp::RegExpMacroAssemblerIrregexp(Vector<byte> buffer)
 
 
 RegExpMacroAssemblerIrregexp::~RegExpMacroAssemblerIrregexp() {
+  if (backtrack_.is_linked()) backtrack_.Unuse();
 }
 
 
@@ -196,17 +197,32 @@ void RegExpMacroAssemblerIrregexp::CheckGreedyLoop(
 
 
 void RegExpMacroAssemblerIrregexp::LoadCurrentCharacter(int cp_offset,
-                                                        Label* on_failure) {
-  Emit(BC_LOAD_CURRENT_CHAR);
-  Emit32(cp_offset);
-  EmitOrLink(on_failure);
-}
-
-
-void RegExpMacroAssemblerIrregexp::LoadCurrentCharacterUnchecked(
-      int cp_offset) {
-  Emit(BC_LOAD_CURRENT_CHAR_UNCHECKED);
+                                                        Label* on_failure,
+                                                        bool check_bounds,
+                                                        int characters) {
+  int bytecode;
+  if (check_bounds) {
+    if (characters == 4) {
+      bytecode = BC_LOAD_4_CURRENT_CHARS;
+    } else if (characters == 2) {
+      bytecode = BC_LOAD_2_CURRENT_CHARS;
+    } else {
+      ASSERT(characters == 1);
+      bytecode = BC_LOAD_CURRENT_CHAR;
+    }
+  } else {
+    if (characters == 4) {
+      bytecode = BC_LOAD_4_CURRENT_CHARS_UNCHECKED;
+    } else if (characters == 2) {
+      bytecode = BC_LOAD_2_CURRENT_CHARS_UNCHECKED;
+    } else {
+      ASSERT(characters == 1);
+      bytecode = BC_LOAD_CURRENT_CHAR_UNCHECKED;
+    }
+  }
+  Emit(bytecode);
   Emit32(cp_offset);
+  if (check_bounds) EmitOrLink(on_failure);
 }
 
 
@@ -226,9 +242,9 @@ void RegExpMacroAssemblerIrregexp::CheckCharacterGT(uc16 limit,
 }
 
 
-void RegExpMacroAssemblerIrregexp::CheckCharacter(uc16 c, Label* on_equal) {
+void RegExpMacroAssemblerIrregexp::CheckCharacter(uint32_t c, Label* on_equal) {
   Emit(BC_CHECK_CHAR);
-  Emit16(c);
+  Emit32(c);
   EmitOrLink(on_equal);
 }
 
@@ -239,31 +255,44 @@ void RegExpMacroAssemblerIrregexp::CheckNotAtStart(Label* on_not_at_start) {
 }
 
 
-void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uc16 c,
+void RegExpMacroAssemblerIrregexp::CheckNotCharacter(uint32_t c,
                                                      Label* on_not_equal) {
   Emit(BC_CHECK_NOT_CHAR);
-  Emit16(c);
+  Emit32(c);
   EmitOrLink(on_not_equal);
 }
 
 
-void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterOr(
-    uc16 c,
-    uc16 mask,
+void RegExpMacroAssemblerIrregexp::CheckCharacterAfterAnd(
+    uint32_t c,
+    uint32_t mask,
+    Label* on_equal) {
+  Emit(BC_AND_CHECK_CHAR);
+  Emit32(c);
+  Emit32(mask);
+  EmitOrLink(on_equal);
+}
+
+
+void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterAnd(
+    uint32_t c,
+    uint32_t mask,
     Label* on_not_equal) {
-  Emit(BC_OR_CHECK_NOT_CHAR);
-  Emit16(c);
-  Emit16(mask);
+  Emit(BC_AND_CHECK_NOT_CHAR);
+  Emit32(c);
+  Emit32(mask);
   EmitOrLink(on_not_equal);
 }
 
 
-void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusOr(
+void RegExpMacroAssemblerIrregexp::CheckNotCharacterAfterMinusAnd(
     uc16 c,
+    uc16 minus,
     uc16 mask,
     Label* on_not_equal) {
-  Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
+  Emit(BC_MINUS_AND_CHECK_NOT_CHAR);
   Emit16(c);
+  Emit16(minus);
   Emit16(mask);
   EmitOrLink(on_not_equal);
 }
@@ -344,7 +373,7 @@ void RegExpMacroAssemblerIrregexp::CheckCharacters(
       Emit32(cp_offset + i);
     }
     Emit(BC_CHECK_NOT_CHAR);
-    Emit16(str[i]);
+    Emit32(str[i]);
     EmitOrLink(on_failure);
   }
 }
index 722e7791d37e86d978d688b56e34cb234bf1f291..871e1e57d304fec74e2315cbaa0f280d819c4579 100644 (file)
@@ -66,18 +66,26 @@ class RegExpMacroAssemblerIrregexp: public RegExpMacroAssembler {
   virtual void ReadCurrentPositionFromRegister(int reg);
   virtual void WriteStackPointerToRegister(int reg);
   virtual void ReadStackPointerFromRegister(int reg);
-  virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
-  virtual void LoadCurrentCharacterUnchecked(int cp_offset);
-  virtual void CheckCharacterLT(uc16 limit, Label* on_less);
+  virtual void LoadCurrentCharacter(int cp_offset,
+                                    Label* on_end_of_input,
+                                    bool check_bounds = true,
+                                    int characters = 1);
+  virtual void CheckCharacter(uint32_t c, Label* on_equal);
+  virtual void CheckCharacterAfterAnd(uint32_t c,
+                                      uint32_t mask,
+                                      Label* on_equal);
   virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
-  virtual void CheckCharacter(uc16 c, Label* on_equal);
+  virtual void CheckCharacterLT(uc16 limit, Label* on_less);
   virtual void CheckGreedyLoop(Label* on_tos_equals_current_position);
   virtual void CheckNotAtStart(Label* on_not_at_start);
-  virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
-  virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label* on_not_equal);
-  virtual void CheckNotCharacterAfterMinusOr(uc16 c,
-                                             uc16 mask,
-                                             Label* on_not_equal);
+  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
+  virtual void CheckNotCharacterAfterAnd(uint32_t c,
+                                         uint32_t mask,
+                                         Label* on_not_equal);
+  virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+                                              uc16 minus,
+                                              uc16 mask,
+                                              Label* on_not_equal);
   virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
   virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
                                                Label* on_no_match);
index fc3629cb987fa5193ec5b5eaffe0a8d3401e9157..2a618cc73477b998629ab75a7371074b2bab6201 100644 (file)
@@ -164,18 +164,19 @@ void RegExpMacroAssemblerTracer::ReadStackPointerFromRegister(int reg) {
 
 
 void RegExpMacroAssemblerTracer::LoadCurrentCharacter(int cp_offset,
-                                                      Label* on_end_of_input) {
-  PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]);\n",
+                                                      Label* on_end_of_input,
+                                                      bool check_bounds,
+                                                      int characters) {
+  const char* check_msg = check_bounds ? "" : " (unchecked)";
+  PrintF(" LoadCurrentCharacter(cp_offset=%d, label[%08x]%s (%d chars));\n",
          cp_offset,
-         on_end_of_input);
-  assembler_->LoadCurrentCharacter(cp_offset, on_end_of_input);
-}
-
-
-void RegExpMacroAssemblerTracer::LoadCurrentCharacterUnchecked(int cp_offset) {
-  PrintF(" LoadCurrentCharacterUnchecked(cp_offset=%d);\n",
-         cp_offset);
-  assembler_->LoadCurrentCharacterUnchecked(cp_offset);
+         on_end_of_input,
+         check_msg,
+         characters);
+  assembler_->LoadCurrentCharacter(cp_offset,
+                                   on_end_of_input,
+                                   check_bounds,
+                                   characters);
 }
 
 
@@ -192,7 +193,7 @@ void RegExpMacroAssemblerTracer::CheckCharacterGT(uc16 limit,
 }
 
 
-void RegExpMacroAssemblerTracer::CheckCharacter(uc16 c, Label* on_equal) {
+void RegExpMacroAssemblerTracer::CheckCharacter(uint32_t c, Label* on_equal) {
   PrintF(" CheckCharacter(c='u%04x', label[%08x]);\n", c, on_equal);
   assembler_->CheckCharacter(c, on_equal);
 }
@@ -204,28 +205,49 @@ void RegExpMacroAssemblerTracer::CheckNotAtStart(Label* on_not_at_start) {
 }
 
 
-void RegExpMacroAssemblerTracer::CheckNotCharacter(uc16 c,
+void RegExpMacroAssemblerTracer::CheckNotCharacter(uint32_t c,
                                                    Label* on_not_equal) {
   PrintF(" CheckNotCharacter(c='u%04x', label[%08x]);\n", c, on_not_equal);
   assembler_->CheckNotCharacter(c, on_not_equal);
 }
 
 
-void RegExpMacroAssemblerTracer::CheckNotCharacterAfterOr(uc16 c, uc16 mask,
-                                                          Label* on_not_equal) {
-  PrintF(" CheckNotCharacterAfterOr(c='u%04x', mask=0x%04x, label[%08x]);\n", c,
-         mask, on_not_equal);
-  assembler_->CheckNotCharacterAfterOr(c, mask, on_not_equal);
+void RegExpMacroAssemblerTracer::CheckCharacterAfterAnd(
+    uint32_t c,
+    uint32_t mask,
+    Label* on_equal) {
+  PrintF(" CheckCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
+         c,
+         mask,
+         on_equal);
+  assembler_->CheckCharacterAfterAnd(c, mask, on_equal);
+}
+
+
+void RegExpMacroAssemblerTracer::CheckNotCharacterAfterAnd(
+    uint32_t c,
+    uint32_t mask,
+    Label* on_not_equal) {
+  PrintF(" CheckNotCharacterAfterAnd(c='u%04x', mask=0x%04x, label[%08x]);\n",
+         c,
+         mask,
+         on_not_equal);
+  assembler_->CheckNotCharacterAfterAnd(c, mask, on_not_equal);
 }
 
 
-void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusOr(
+void RegExpMacroAssemblerTracer::CheckNotCharacterAfterMinusAnd(
     uc16 c,
+    uc16 minus,
     uc16 mask,
     Label* on_not_equal) {
-  PrintF(" CheckNotCharacterAfterMinusOr(c='u%04x', mask=0x%04x, "
-    "label[%08x]);\n", c, mask, on_not_equal);
-  assembler_->CheckNotCharacterAfterMinusOr(c, mask, on_not_equal);
+  PrintF(" CheckNotCharacterAfterMinusAnd(c='u%04x', minus=%04x, mask=0x%04x, "
+             "label[%08x]);\n",
+         c,
+         minus,
+         mask,
+         on_not_equal);
+  assembler_->CheckNotCharacterAfterMinusAnd(c, minus, mask, on_not_equal);
 }
 
 
index 88d4cc1de946f580a26340dfc72b154327cbcef7..0b47b61035ef22e5336912a8e463c49a378d6b5f 100644 (file)
@@ -41,7 +41,10 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
   virtual void Backtrack();
   virtual void Bind(Label* label);
   virtual void CheckBitmap(uc16 start, Label* bitmap, Label* on_zero);
-  virtual void CheckCharacter(uc16 c, Label* on_equal);
+  virtual void CheckCharacter(uint32_t c, Label* on_equal);
+  virtual void CheckCharacterAfterAnd(uint32_t c,
+                                      uint32_t and_with,
+                                      Label* on_equal);
   virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
   virtual void CheckCharacterLT(uc16 limit, Label* on_less);
   virtual void CheckCharacters(
@@ -55,13 +58,14 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
   virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
                                                Label* on_no_match);
   virtual void CheckNotRegistersEqual(int reg1, int reg2, Label* on_not_equal);
-  virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
-  virtual void CheckNotCharacterAfterOr(uc16 c,
-                                        uc16 or_with,
-                                        Label* on_not_equal);
-  virtual void CheckNotCharacterAfterMinusOr(uc16 c,
-                                             uc16 minus_then_or_with,
-                                             Label* on_not_equal);
+  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal);
+  virtual void CheckNotCharacterAfterAnd(uint32_t c,
+                                         uint32_t and_with,
+                                         Label* on_not_equal);
+  virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+                                              uc16 minus,
+                                              uc16 and_with,
+                                              Label* on_not_equal);
   virtual void DispatchByteMap(
       uc16 start,
       Label* byte_map,
@@ -81,8 +85,10 @@ class RegExpMacroAssemblerTracer: public RegExpMacroAssembler {
   virtual void IfRegisterGE(int reg, int comparand, Label* if_ge);
   virtual void IfRegisterLT(int reg, int comparand, Label* if_lt);
   virtual IrregexpImplementation Implementation();
-  virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input);
-  virtual void LoadCurrentCharacterUnchecked(int cp_offset);
+  virtual void LoadCurrentCharacter(int cp_offset,
+                                    Label* on_end_of_input,
+                                    bool check_bounds = true,
+                                    int characters = 1);
   virtual void PopCurrentPosition();
   virtual void PopRegister(int register_index);
   virtual void PushBacktrack(Label* label);
index 5ed1523b14c950bb27c14f0d357d262ae5eb25c7..b9fbacaed425bfd3105d43daeea3d2c5766b7685 100644 (file)
@@ -58,7 +58,12 @@ class RegExpMacroAssembler {
       Label* on_zero) = 0;  // Where to go if the bit is 0.  Fall through on 1.
   // Dispatch after looking the current character up in a 2-bits-per-entry
   // map.  The destinations vector has up to 4 labels.
-  virtual void CheckCharacter(uc16 c, Label* on_equal) = 0;
+  virtual void CheckCharacter(uint32_t c, Label* on_equal) = 0;
+  // Bitwise and the current character with the given constant and then
+  // check for a match with c.
+  virtual void CheckCharacterAfterAnd(uint32_t c,
+                                      uint32_t and_with,
+                                      Label* on_equal) = 0;
   virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
   virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
   // Check the current character for a match with a literal string.  If we
@@ -81,17 +86,16 @@ class RegExpMacroAssembler {
   // fail to match then goto the on_failure label.  End of input always
   // matches.  If the label is NULL then we should pop a backtrack address off
   // the stack and go to that.
-  virtual void CheckNotCharacter(uc16 c, Label* on_not_equal) = 0;
-  // Bitwise or the current character with the given constant and then
-  // check for a match with c.
-  virtual void CheckNotCharacterAfterOr(uc16 c,
-                                        uc16 or_with,
-                                        Label* on_not_equal) = 0;
+  virtual void CheckNotCharacter(uint32_t c, Label* on_not_equal) = 0;
+  virtual void CheckNotCharacterAfterAnd(uint32_t c,
+                                         uint32_t and_with,
+                                         Label* on_not_equal) = 0;
   // Subtract a constant from the current character, then or with the given
   // constant and then check for a match with c.
-  virtual void CheckNotCharacterAfterMinusOr(uc16 c,
-                                             uc16 minus_then_or_with,
-                                             Label* on_not_equal) = 0;
+  virtual void CheckNotCharacterAfterMinusAnd(uc16 c,
+                                              uc16 minus,
+                                              uc16 and_with,
+                                              Label* on_not_equal) = 0;
   virtual void CheckNotRegistersEqual(int reg1,
                                       int reg2,
                                       Label* on_not_equal) = 0;
@@ -122,8 +126,10 @@ class RegExpMacroAssembler {
   // Backtracks instead if the label is NULL.
   virtual void IfRegisterLT(int reg, int comparand, Label* if_lt) = 0;
   virtual IrregexpImplementation Implementation() = 0;
-  virtual void LoadCurrentCharacter(int cp_offset, Label* on_end_of_input) = 0;
-  virtual void LoadCurrentCharacterUnchecked(int cp_offset) = 0;
+  virtual void LoadCurrentCharacter(int cp_offset,
+                                    Label* on_end_of_input,
+                                    bool check_bounds = true,
+                                    int characters = 1) = 0;
   virtual void PopCurrentPosition() = 0;
   virtual void PopRegister(int register_index) = 0;
   virtual void PushBacktrack(Label* label) = 0;
index 46374ccb01a597f92c322381ba1065c407b17cd9..4422211f2ccdc7b619c88f52df9ce1ce8eb87a14 100644 (file)
@@ -306,3 +306,13 @@ assertFalse(/f[abc]/i.test('x'));
 assertFalse(/f[abc]/i.test('xa'));
 assertFalse(/<[abc]/i.test('x'));
 assertFalse(/<[abc]/i.test('xa'));
+
+// Test that merging of quick test masks gets it right.
+assertFalse(/x([0-7]%%x|[0-6]%%y)/.test('x7%%y'), 'qt');
+assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy7%%%y'), 'qt2');
+assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt3');
+assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt4');
+assertFalse(/()x\1(y([0-7]%%%x|[0-6]%%%y)|dkjasldkas)/.test('xy%%%y'), 'qt5');
+assertFalse(/()x\1y([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt6');
+assertFalse(/xy([0-7]%%%x|[0-6]%%%y)/.test('xy7%%%y'), 'qt7');
+assertFalse(/x([0-7]%%%x|[0-6]%%%y)/.test('x7%%%y'), 'qt8');