Fix crash due RegExpAtom method called on RegExpCharacterClass object.

author bmeurer@chromium.org <bmeurer@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Fri, 23 Aug 2013 11:06:16 +0000 (11:06 +0000)

committer bmeurer@chromium.org <bmeurer@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>

Fri, 23 Aug 2013 11:06:16 +0000 (11:06 +0000)
author bmeurer@chromium.org <bmeurer@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 23 Aug 2013 11:06:16 +0000 (11:06 +0000)
committer bmeurer@chromium.org <bmeurer@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
Fri, 23 Aug 2013 11:06:16 +0000 (11:06 +0000)
diff --git a/src/ast.cc b/src/ast.cc

index 8f69bd7..38c6ddd 100644 (file)
--- a/src/ast.cc
+++ b/src/ast.cc
@@ -962,12 +962,12 @@ void* RegExpUnparser::VisitAtom(RegExpAtom* that, void* data) {
  
  void* RegExpUnparser::VisitText(RegExpText* that, void* data) {
    if (that->elements()->length() == 1) {
-    that->elements()->at(0).data.u_atom->Accept(this, data);
+    that->elements()->at(0).tree()->Accept(this, data);
    } else {
      stream()->Add("(!");
      for (int i = 0; i < that->elements()->length(); i++) {
        stream()->Add(" ");
-      that->elements()->at(i).data.u_atom->Accept(this, data);
+      that->elements()->at(i).tree()->Accept(this, data);
      }
      stream()->Add(")");
    }
diff --git a/src/jsregexp.cc b/src/jsregexp.cc

index 666866e..0ce10b8 100644 (file)
--- a/src/jsregexp.cc
+++ b/src/jsregexp.cc
@@ -933,27 +933,25 @@ void RegExpText::AppendToText(RegExpText* text, Zone* zone) {
  
  
  TextElement TextElement::Atom(RegExpAtom* atom) {
-  TextElement result = TextElement(ATOM);
-  result.data.u_atom = atom;
-  return result;
+  return TextElement(ATOM, atom);
  }
  
  
-TextElement TextElement::CharClass(
-      RegExpCharacterClass* char_class) {
-  TextElement result = TextElement(CHAR_CLASS);
-  result.data.u_char_class = char_class;
-  return result;
+TextElement TextElement::CharClass(RegExpCharacterClass* char_class) {
+  return TextElement(CHAR_CLASS, char_class);
  }
  
  
-int TextElement::length() {
-  if (text_type == ATOM) {
-    return data.u_atom->length();
-  } else {
-    ASSERT(text_type == CHAR_CLASS);
-    return 1;
+int TextElement::length() const {
+  switch (text_type()) {
+    case ATOM:
+      return atom()->length();
+
+    case CHAR_CLASS:
+      return 1;
    }
+  UNREACHABLE();
+  return 0;
  }
  
  
@@ -2561,8 +2559,8 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
    }
    for (int k = 0; k < elms_->length(); k++) {
      TextElement elm = elms_->at(k);
-    if (elm.text_type == TextElement::ATOM) {
-      Vector<const uc16> quarks = elm.data.u_atom->data();
+    if (elm.text_type() == TextElement::ATOM) {
+      Vector<const uc16> quarks = elm.atom()->data();
        for (int i = 0; i < characters && i < quarks.length(); i++) {
          QuickCheckDetails::Position* pos =
              details->positions(characters_filled_in);
@@ -2624,7 +2622,7 @@ void TextNode::GetQuickCheckDetails(QuickCheckDetails* details,
      } else {
        QuickCheckDetails::Position* pos =
            details->positions(characters_filled_in);
-      RegExpCharacterClass* tree = elm.data.u_char_class;
+      RegExpCharacterClass* tree = elm.char_class();
        ZoneList<CharacterRange>* ranges = tree->ranges(zone());
        if (tree->is_negated()) {
          // A quick check uses multi-character mask and compare.  There is no
@@ -2814,8 +2812,8 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
    int element_count = elms_->length();
    for (int i = 0; i < element_count; i++) {
      TextElement elm = elms_->at(i);
-    if (elm.text_type == TextElement::ATOM) {
-      Vector<const uc16> quarks = elm.data.u_atom->data();
+    if (elm.text_type() == TextElement::ATOM) {
+      Vector<const uc16> quarks = elm.atom()->data();
        for (int j = 0; j < quarks.length(); j++) {
          uint16_t c = quarks[j];
          if (c <= String::kMaxOneByteCharCode) continue;
@@ -2830,8 +2828,8 @@ RegExpNode* TextNode::FilterASCII(int depth, bool ignore_case) {
          copy[j] = converted;
        }
      } else {
-      ASSERT(elm.text_type == TextElement::CHAR_CLASS);
-      RegExpCharacterClass* cc = elm.data.u_char_class;
+      ASSERT(elm.text_type() == TextElement::CHAR_CLASS);
+      RegExpCharacterClass* cc = elm.char_class();
        ZoneList<CharacterRange>* ranges = cc->ranges(zone());
        if (!CharacterRange::IsCanonical(ranges)) {
          CharacterRange::Canonicalize(ranges);
@@ -3256,12 +3254,12 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
    int element_count = elms_->length();
    for (int i = preloaded ? 0 : element_count - 1; i >= 0; i--) {
      TextElement elm = elms_->at(i);
-    int cp_offset = trace->cp_offset() + elm.cp_offset;
-    if (elm.text_type == TextElement::ATOM) {
-      Vector<const uc16> quarks = elm.data.u_atom->data();
+    int cp_offset = trace->cp_offset() + elm.cp_offset();
+    if (elm.text_type() == TextElement::ATOM) {
+      Vector<const uc16> quarks = elm.atom()->data();
        for (int j = preloaded ? 0 : quarks.length() - 1; j >= 0; j--) {
          if (first_element_checked && i == 0 && j == 0) continue;
-        if (DeterminedAlready(quick_check, elm.cp_offset + j)) continue;
+        if (DeterminedAlready(quick_check, elm.cp_offset() + j)) continue;
          EmitCharacterFunction* emit_function = NULL;
          switch (pass) {
            case NON_ASCII_MATCH:
@@ -3295,11 +3293,11 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
          }
        }
      } else {
-      ASSERT_EQ(elm.text_type, TextElement::CHAR_CLASS);
+      ASSERT_EQ(TextElement::CHAR_CLASS, elm.text_type());
        if (pass == CHARACTER_CLASS_MATCH) {
          if (first_element_checked && i == 0) continue;
-        if (DeterminedAlready(quick_check, elm.cp_offset)) continue;
-        RegExpCharacterClass* cc = elm.data.u_char_class;
+        if (DeterminedAlready(quick_check, elm.cp_offset())) continue;
+        RegExpCharacterClass* cc = elm.char_class();
          EmitCharClass(assembler,
                        cc,
                        ascii,
@@ -3317,12 +3315,8 @@ void TextNode::TextEmitPass(RegExpCompiler* compiler,
  
  int TextNode::Length() {
    TextElement elm = elms_->last();
-  ASSERT(elm.cp_offset >= 0);
-  if (elm.text_type == TextElement::ATOM) {
-    return elm.cp_offset + elm.data.u_atom->data().length();
-  } else {
-    return elm.cp_offset + 1;
-  }
+  ASSERT(elm.cp_offset() >= 0);
+  return elm.cp_offset() + elm.length();
  }
  
  
@@ -3424,8 +3418,8 @@ void TextNode::MakeCaseIndependent(bool is_ascii) {
    int element_count = elms_->length();
    for (int i = 0; i < element_count; i++) {
      TextElement elm = elms_->at(i);
-    if (elm.text_type == TextElement::CHAR_CLASS) {
-      RegExpCharacterClass* cc = elm.data.u_char_class;
+    if (elm.text_type() == TextElement::CHAR_CLASS) {
+      RegExpCharacterClass* cc = elm.char_class();
        // None of the standard character classes is different in the case
        // independent case and it slows us down if we don't know that.
        if (cc->is_standard(zone())) continue;
@@ -3441,11 +3435,7 @@ void TextNode::MakeCaseIndependent(bool is_ascii) {
  
  int TextNode::GreedyLoopTextLength() {
    TextElement elm = elms_->at(elms_->length() - 1);
-  if (elm.text_type == TextElement::CHAR_CLASS) {
-    return elm.cp_offset + 1;
-  } else {
-    return elm.cp_offset + elm.data.u_atom->data().length();
-  }
+  return elm.cp_offset() + elm.length();
  }
  
  
@@ -3453,8 +3443,8 @@ RegExpNode* TextNode::GetSuccessorOfOmnivorousTextNode(
      RegExpCompiler* compiler) {
    if (elms_->length() != 1) return NULL;
    TextElement elm = elms_->at(0);
-  if (elm.text_type != TextElement::CHAR_CLASS) return NULL;
-  RegExpCharacterClass* node = elm.data.u_char_class;
+  if (elm.text_type() != TextElement::CHAR_CLASS) return NULL;
+  RegExpCharacterClass* node = elm.char_class();
    ZoneList<CharacterRange>* ranges = node->ranges(zone());
    if (!CharacterRange::IsCanonical(ranges)) {
      CharacterRange::Canonicalize(ranges);
@@ -4528,13 +4518,13 @@ void DotPrinter::VisitText(TextNode* that) {
    for (int i = 0; i < that->elements()->length(); i++) {
      if (i > 0) stream()->Add(" ");
      TextElement elm = that->elements()->at(i);
-    switch (elm.text_type) {
+    switch (elm.text_type()) {
        case TextElement::ATOM: {
-        stream()->Add("'%w'", elm.data.u_atom->data());
+        stream()->Add("'%w'", elm.atom()->data());
          break;
        }
        case TextElement::CHAR_CLASS: {
-        RegExpCharacterClass* node = elm.data.u_char_class;
+        RegExpCharacterClass* node = elm.char_class();
          stream()->Add("[");
          if (node->is_negated())
            stream()->Add("^");
@@ -5716,12 +5706,8 @@ void TextNode::CalculateOffsets() {
    int cp_offset = 0;
    for (int i = 0; i < element_count; i++) {
      TextElement& elm = elements()->at(i);
-    elm.cp_offset = cp_offset;
-    if (elm.text_type == TextElement::ATOM) {
-      cp_offset += elm.data.u_atom->data().length();
-    } else {
-      cp_offset++;
-    }
+    elm.set_cp_offset(cp_offset);
+    cp_offset += elm.length();
    }
  }
  
@@ -5837,8 +5823,8 @@ void TextNode::FillInBMInfo(int initial_offset,
        return;
      }
      TextElement text = elements()->at(i);
-    if (text.text_type == TextElement::ATOM) {
-      RegExpAtom* atom = text.data.u_atom;
+    if (text.text_type() == TextElement::ATOM) {
+      RegExpAtom* atom = text.atom();
        for (int j = 0; j < atom->length(); j++, offset++) {
          if (offset >= bm->length()) {
            if (initial_offset == 0) set_bm_info(not_at_start, bm);
@@ -5860,8 +5846,8 @@ void TextNode::FillInBMInfo(int initial_offset,
          }
        }
      } else {
-      ASSERT(text.text_type == TextElement::CHAR_CLASS);
-      RegExpCharacterClass* char_class = text.data.u_char_class;
+      ASSERT_EQ(TextElement::CHAR_CLASS, text.text_type());
+      RegExpCharacterClass* char_class = text.char_class();
        ZoneList<CharacterRange>* ranges = char_class->ranges(zone());
        if (char_class->is_negated()) {
          bm->SetAll(offset);
@@ -5973,14 +5959,14 @@ void DispatchTableConstructor::AddInverse(ZoneList<CharacterRange>* ranges) {
  
  void DispatchTableConstructor::VisitText(TextNode* that) {
    TextElement elm = that->elements()->at(0);
-  switch (elm.text_type) {
+  switch (elm.text_type()) {
      case TextElement::ATOM: {
-      uc16 c = elm.data.u_atom->data()[0];
+      uc16 c = elm.atom()->data()[0];
        AddRange(CharacterRange(c, c));
        break;
      }
      case TextElement::CHAR_CLASS: {
-      RegExpCharacterClass* tree = elm.data.u_char_class;
+      RegExpCharacterClass* tree = elm.char_class();
        ZoneList<CharacterRange>* ranges = tree->ranges(that->zone());
        if (tree->is_negated()) {
          AddInverse(ranges);
diff --git a/src/jsregexp.h b/src/jsregexp.h

index 20c0ac4..bab8756 100644 (file)
--- a/src/jsregexp.h
+++ b/src/jsregexp.h
@@ -426,20 +426,41 @@ FOR_EACH_REG_EXP_TREE_TYPE(FORWARD_DECLARE)
  #undef FORWARD_DECLARE
  
  
-class TextElement {
+class TextElement V8_FINAL BASE_EMBEDDED {
   public:
-  enum TextType {UNINITIALIZED, ATOM, CHAR_CLASS};
-  TextElement() : text_type(UNINITIALIZED) { }
-  explicit TextElement(TextType t) : text_type(t), cp_offset(-1) { }
+  enum TextType {
+    ATOM,
+    CHAR_CLASS
+  };
+
    static TextElement Atom(RegExpAtom* atom);
    static TextElement CharClass(RegExpCharacterClass* char_class);
-  int length();
-  TextType text_type;
-  union {
-    RegExpAtom* u_atom;
-    RegExpCharacterClass* u_char_class;
-  } data;
-  int cp_offset;
+
+  int cp_offset() const { return cp_offset_; }
+  void set_cp_offset(int cp_offset) { cp_offset_ = cp_offset; }
+  int length() const;
+
+  TextType text_type() const { return text_type_; }
+
+  RegExpTree* tree() const { return tree_; }
+
+  RegExpAtom* atom() const {
+    ASSERT(text_type() == ATOM);
+    return reinterpret_cast<RegExpAtom*>(tree());
+  }
+
+  RegExpCharacterClass* char_class() const {
+    ASSERT(text_type() == CHAR_CLASS);
+    return reinterpret_cast<RegExpCharacterClass*>(tree());
+  }
+
+ private:
+  TextElement(TextType text_type, RegExpTree* tree)
+      : cp_offset_(-1), text_type_(text_type), tree_(tree) {}
+
+  int cp_offset_;
+  TextType text_type_;
+  RegExpTree* tree_;
  };
author	bmeurer@chromium.org <bmeurer@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Fri, 23 Aug 2013 11:06:16 +0000 (11:06 +0000)
committer	bmeurer@chromium.org <bmeurer@chromium.org@ce2b1a6d-e550-0410-aec6-3dcde31c8c00>
	Fri, 23 Aug 2013 11:06:16 +0000 (11:06 +0000)
src/ast.cc		patch \| blob \| history
src/jsregexp.cc		patch \| blob \| history
src/jsregexp.h		patch \| blob \| history