FlattenString(pattern);
RegExpParseResult parse_result;
FlatStringReader reader(pattern);
- if (!ParseRegExp(&reader, &parse_result)) {
+ if (!ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
// Throw an exception if we fail to parse the pattern.
ThrowRegExpException(re,
pattern,
Handle<FixedArray> irregexp_data =
RegExpEngine::Compile(&parse_result,
&node,
- flags.is_ignore_case());
+ flags.is_ignore_case(),
+ flags.is_multiline());
if (irregexp_data.is_null()) {
if (FLAG_disable_jscre) {
UNIMPLEMENTED();
// TODO(erikcorry): Implement support.
if (info_.follows_word_interest ||
info_.follows_newline_interest ||
- info_.follows_start_interest) {
+ info_.follows_start_interest ||
+ info_.at_end) {
return false;
}
if (label_.is_bound()) {
bool EndNode::GoTo(RegExpCompiler* compiler) {
if (info()->follows_word_interest ||
info()->follows_newline_interest ||
- info()->follows_start_interest) {
+ info()->follows_start_interest ||
+ info()->at_end) {
return false;
}
if (!label()->is_bound()) {
"fontcolor=lightgrey, margin=0.1, fontsize=10, label=\"{",
that);
NodeInfo* info = that->info();
- stream()->Add("{NI|%i}|{SI|%i}|{WI|%i}",
+ stream()->Add("{NI|%i}|{WI|%i}|{SI|%i}",
info->follows_newline_interest,
- info->follows_start_interest,
- info->follows_word_interest);
- stream()->Add("|{DN|%i}|{DS|%i}|{DW|%i}",
+ info->follows_word_interest,
+ info->follows_start_interest);
+ stream()->Add("|{DN|%i}|{DW|%i}|{DS|%i}|{AE|%i}",
info->determine_newline,
+ info->determine_word,
info->determine_start,
- info->determine_word);
+ info->at_end);
+ if (info->follows_newline != NodeInfo::UNKNOWN)
+ stream()->Add("|{FN|%i}", info->follows_newline);
+ if (info->follows_word != NodeInfo::UNKNOWN)
+ stream()->Add("|{FW|%i}", info->follows_word);
+ if (info->follows_start != NodeInfo::UNKNOWN)
+ stream()->Add("|{FS|%i}", info->follows_start);
Label* label = that->label();
if (label->is_bound())
stream()->Add("|{@|%x}", label->pos());
case BOUNDARY: case NON_BOUNDARY:
info.follows_word_interest = true;
break;
- case END_OF_LINE: case END_OF_INPUT:
+ case END_OF_INPUT:
+ info.at_end = true;
+ break;
+ case END_OF_LINE:
// This is wrong but has the effect of making the compiler abort.
- info.follows_start_interest = true;
+ info.at_end = true;
}
- return on_success->PropagateInterest(&info);
+ return on_success->PropagateForward(&info);
}
RegExpNode* RegExpNode::GetSibling(NodeInfo* info) {
for (int i = 0; i < siblings_.length(); i++) {
RegExpNode* sibling = siblings_.Get(i);
- if (sibling->info()->SameInterests(info))
+ if (sibling->info()->HasSameForwardInterests(info))
return sibling;
}
return NULL;
template <class C>
static RegExpNode* PropagateToEndpoint(C* node, NodeInfo* info) {
- RegExpNode* sibling = node->GetSibling(info);
+ NodeInfo full_info(*node->info());
+ full_info.AddFromPreceding(info);
+ RegExpNode* sibling = node->GetSibling(&full_info);
if (sibling != NULL) return sibling;
node->EnsureSiblings();
sibling = new C(*node);
- sibling->info()->AdoptInterests(info);
+ sibling->info()->AddFromPreceding(&full_info);
node->AddSibling(sibling);
return sibling;
}
-RegExpNode* ActionNode::PropagateInterest(NodeInfo* info) {
- RegExpNode* sibling = GetSibling(info);
+RegExpNode* ActionNode::PropagateForward(NodeInfo* info) {
+ NodeInfo full_info(*this->info());
+ full_info.AddFromPreceding(info);
+ RegExpNode* sibling = GetSibling(&full_info);
if (sibling != NULL) return sibling;
EnsureSiblings();
ActionNode* action = new ActionNode(*this);
- action->info()->AdoptInterests(info);
+ action->info()->AddFromPreceding(&full_info);
AddSibling(action);
- action->set_on_success(action->on_success()->PropagateInterest(info));
+ action->set_on_success(action->on_success()->PropagateForward(info));
return action;
}
-RegExpNode* ChoiceNode::PropagateInterest(NodeInfo* info) {
- RegExpNode* sibling = GetSibling(info);
+RegExpNode* ChoiceNode::PropagateForward(NodeInfo* info) {
+ NodeInfo full_info(*this->info());
+ full_info.AddFromPreceding(info);
+ RegExpNode* sibling = GetSibling(&full_info);
if (sibling != NULL) return sibling;
EnsureSiblings();
ChoiceNode* choice = new ChoiceNode(*this);
- choice->info()->AdoptInterests(info);
+ choice->info()->AddFromPreceding(&full_info);
AddSibling(choice);
ZoneList<GuardedAlternative>* old_alternatives = alternatives();
int count = old_alternatives->length();
choice->alternatives_ = new ZoneList<GuardedAlternative>(count);
for (int i = 0; i < count; i++) {
GuardedAlternative alternative = old_alternatives->at(i);
- alternative.set_node(alternative.node()->PropagateInterest(info));
+ alternative.set_node(alternative.node()->PropagateForward(info));
choice->alternatives()->Add(alternative);
}
return choice;
}
-RegExpNode* EndNode::PropagateInterest(NodeInfo* info) {
+RegExpNode* EndNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
-RegExpNode* BackReferenceNode::PropagateInterest(NodeInfo* info) {
+RegExpNode* BackReferenceNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
-RegExpNode* TextNode::PropagateInterest(NodeInfo* info) {
+RegExpNode* TextNode::PropagateForward(NodeInfo* info) {
return PropagateToEndpoint(this, info);
}
}
EnsureAnalyzed(that->on_success());
EnsureAnalyzed(that->on_failure());
+ NodeInfo* info = that->info();
+ NodeInfo* next_info = that->on_success()->info();
+ // If the following node is interested in what it follows then this
+ // node must determine it.
+ info->determine_newline = next_info->follows_newline_interest;
+ info->determine_word = next_info->follows_word_interest;
+ info->determine_start = next_info->follows_start_interest;
}
void Analysis::VisitAction(ActionNode* that) {
- RegExpNode* next = that->on_success();
- EnsureAnalyzed(next);
- that->info()->determine_newline = next->info()->prev_determine_newline();
- that->info()->determine_word = next->info()->prev_determine_word();
- that->info()->determine_start = next->info()->prev_determine_start();
+ EnsureAnalyzed(that->on_success());
+ // If the next node is interested in what it follows then this node
+ // has to be interested too so it can pass the information on.
+ that->info()->AddFromFollowing(that->on_success()->info());
}
for (int i = 0; i < that->alternatives()->length(); i++) {
RegExpNode* node = that->alternatives()->at(i).node();
EnsureAnalyzed(node);
- info->determine_newline |= node->info()->prev_determine_newline();
- info->determine_word |= node->info()->prev_determine_word();
- info->determine_start |= node->info()->prev_determine_start();
+ // Anything the following nodes need to know has to be known by
+ // this node also, so it can pass it on.
+ info->AddFromFollowing(node->info());
}
if (!that->table_calculated()) {
DispatchTableConstructor cons(that->table());
Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
RegExpNode** node_return,
- bool ignore_case) {
+ bool ignore_case,
+ bool is_multiline) {
RegExpCompiler compiler(input->capture_count, ignore_case);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
struct NodeInfo {
+ enum PrecedingInfo {
+ UNKNOWN = -1, FALSE = 0, TRUE = 1
+ };
+
NodeInfo()
: being_analyzed(false),
been_analyzed(false),
determine_start(false),
follows_word_interest(false),
follows_newline_interest(false),
- follows_start_interest(false) { }
- bool SameInterests(NodeInfo* that) {
- return (follows_word_interest == that->follows_word_interest)
+ follows_start_interest(false),
+ at_end(false),
+ follows_word(UNKNOWN),
+ follows_newline(UNKNOWN),
+ follows_start(UNKNOWN) { }
+
+ bool HasSameForwardInterests(NodeInfo* that) {
+ return (at_end == that->at_end)
+ && (follows_word_interest == that->follows_word_interest)
&& (follows_newline_interest == that->follows_newline_interest)
&& (follows_start_interest == that->follows_start_interest);
}
- void AdoptInterests(NodeInfo* that) {
- follows_word_interest = that->follows_word_interest;
- follows_newline_interest = that->follows_newline_interest;
- follows_start_interest = that->follows_start_interest;
- }
- bool prev_determine_word() {
- return determine_word || follows_word_interest;
- }
- bool prev_determine_newline() {
- return determine_newline || follows_newline_interest;
+
+ // Updates the interests of this node given the interests of the
+ // node preceding it.
+ void AddFromPreceding(NodeInfo* that) {
+ at_end |= that->at_end;
+ follows_word_interest |= that->follows_word_interest;
+ follows_newline_interest |= that->follows_newline_interest;
+ follows_start_interest |= that->follows_start_interest;
}
- bool prev_determine_start() {
- return determine_start || follows_start_interest;
+
+ // Sets the interests of this node to include the interests of the
+ // following node.
+ void AddFromFollowing(NodeInfo* that) {
+ follows_word_interest |= that->follows_word_interest;
+ follows_newline_interest |= that->follows_newline_interest;
+ follows_start_interest |= that->follows_start_interest;
}
+
bool being_analyzed: 1;
bool been_analyzed: 1;
+
+ // These bits are set if this node must propagate forward information
+ // about the last character it consumed (or, in the case of 'start',
+ // if it is at the start of the input).
bool determine_word: 1;
bool determine_newline: 1;
bool determine_start: 1;
+
+ // These bits are set of this node has to know what the preceding
+ // character was.
bool follows_word_interest: 1;
bool follows_newline_interest: 1;
bool follows_start_interest: 1;
+
+ bool at_end: 1;
+
+ // These bits are set if the node can make assumptions about what
+ // the previous character was.
+ PrecedingInfo follows_word: 2;
+ PrecedingInfo follows_newline: 2;
+ PrecedingInfo follows_start: 2;
};
// Until the implementation is complete we will return true for success and
// false for failure.
virtual bool Emit(RegExpCompiler* compiler) = 0;
- virtual RegExpNode* PropagateInterest(NodeInfo* info) = 0;
+
+ // Propagates the given interest information forward. When seeing
+ // \bfoo for instance, the \b is implemented by propagating forward
+ // to the 'foo' string that it should only succeed if its first
+ // character is a letter xor the previous character was a letter.
+ virtual RegExpNode* PropagateForward(NodeInfo* info) = 0;
+
NodeInfo* info() { return &info_; }
virtual bool IsBacktrack() { return false; }
RegExpNode* GetSibling(NodeInfo* info);
static ActionNode* EscapeSubmatch(int reg, RegExpNode* on_success);
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
- virtual RegExpNode* PropagateInterest(NodeInfo* info);
+ virtual RegExpNode* PropagateForward(NodeInfo* info);
private:
union {
struct {
on_failure_(on_failure),
elms_(elms) { }
virtual void Accept(NodeVisitor* visitor);
- virtual RegExpNode* PropagateInterest(NodeInfo* info);
+ virtual RegExpNode* PropagateForward(NodeInfo* info);
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
int start_register() { return start_reg_; }
int end_register() { return end_reg_; }
virtual bool Emit(RegExpCompiler* compiler);
- virtual RegExpNode* PropagateInterest(NodeInfo* info);
+ virtual RegExpNode* PropagateForward(NodeInfo* info);
private:
RegExpNode* on_failure_;
int start_reg_;
explicit EndNode(Action action) : action_(action) { }
virtual void Accept(NodeVisitor* visitor);
virtual bool Emit(RegExpCompiler* compiler);
- virtual RegExpNode* PropagateInterest(NodeInfo* info);
+ virtual RegExpNode* PropagateForward(NodeInfo* info);
virtual bool IsBacktrack() { return action_ == BACKTRACK; }
virtual bool GoTo(RegExpCompiler* compiler);
private:
DispatchTable* table() { return &table_; }
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
- virtual RegExpNode* PropagateInterest(NodeInfo* info);
+ virtual RegExpNode* PropagateForward(NodeInfo* info);
bool table_calculated() { return table_calculated_; }
void set_table_calculated(bool b) { table_calculated_ = b; }
bool being_calculated() { return being_calculated_; }
public:
static Handle<FixedArray> Compile(RegExpParseResult* input,
RegExpNode** node_return,
- bool ignore_case);
+ bool ignore_case,
+ bool multiline);
static void DotPrint(const char* label, RegExpNode* node);
};
bool CaptureAvailable(int index);
uc32 current_;
bool has_more_;
- bool multiline_mode_;
+ bool multiline_;
int next_pos_;
FlatStringReader* in_;
Handle<String>* error_;
RegExpParser::RegExpParser(FlatStringReader* in,
Handle<String>* error,
- bool multiline_mode)
+ bool multiline)
: current_(kEndMarker),
has_more_(true),
- multiline_mode_(multiline_mode),
+ multiline_(multiline),
next_pos_(0),
in_(in),
error_(error),
case '^': {
Advance();
RegExpAssertion::Type type =
- multiline_mode_ ? RegExpAssertion::START_OF_LINE :
- RegExpAssertion::START_OF_INPUT;
+ multiline_ ? RegExpAssertion::START_OF_LINE :
+ RegExpAssertion::START_OF_INPUT;
builder.AddAssertion(new RegExpAssertion(type));
continue;
}
case '$': {
Advance();
RegExpAssertion::Type type =
- multiline_mode_ ? RegExpAssertion::END_OF_LINE :
- RegExpAssertion::END_OF_INPUT;
+ multiline_ ? RegExpAssertion::END_OF_LINE :
+ RegExpAssertion::END_OF_INPUT;
builder.AddAssertion(new RegExpAssertion(type));
continue;
}
}
-bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result) {
+bool ParseRegExp(FlatStringReader* input,
+ bool multiline,
+ RegExpParseResult* result) {
ASSERT(result != NULL);
- // TODO(plesner): Get multiline flag somehow
- RegExpParser parser(input, &result->error, false);
+ RegExpParser parser(input, &result->error, multiline);
bool ok = true;
result->tree = parser.ParsePattern(&ok);
if (!ok) {
v8::Extension* extension);
-bool ParseRegExp(FlatStringReader* input, RegExpParseResult* result);
+bool ParseRegExp(FlatStringReader* input,
+ bool multiline,
+ RegExpParseResult* result);
// Support for doing lazy compilation. The script is the script containing full
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- CHECK(v8::internal::ParseRegExp(&reader, &result));
+ CHECK(v8::internal::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
SmartPointer<const char> output = result.tree->ToString();
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- CHECK(v8::internal::ParseRegExp(&reader, &result));
+ CHECK(v8::internal::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
return result.has_character_escapes;
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- CHECK_EQ(false, v8::internal::ParseRegExp(&reader, &result));
+ CHECK_EQ(false, v8::internal::ParseRegExp(&reader, false, &result));
CHECK(result.tree == NULL);
CHECK(!result.error.is_null());
SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
}
-static RegExpNode* Compile(const char* input) {
+static RegExpNode* Compile(const char* input, bool multiline) {
FlatStringReader reader(CStrVector(input));
RegExpParseResult result;
- if (!v8::internal::ParseRegExp(&reader, &result))
+ if (!v8::internal::ParseRegExp(&reader, multiline, &result))
return NULL;
RegExpNode* node = NULL;
- RegExpEngine::Compile(&result, &node, false);
+ RegExpEngine::Compile(&result, &node, false, multiline);
return node;
}
static void Execute(const char* input,
- const char* str,
+ bool multiline,
bool dot_output = false) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
- RegExpNode* node = Compile(input);
+ RegExpNode* node = Compile(input, multiline);
USE(node);
#ifdef DEBUG
if (dot_output) {
}
-TEST(Execution) {
- V8::Initialize(NULL);
- Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbegh");
- Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefh");
- Execute(".*?(?:a[bc]d|e[fg]h)", "xxxabbefd");
-}
-
-
class TestConfig {
public:
typedef int Key;
TEST(SimplePropagation) {
v8::HandleScope scope;
ZoneScope zone_scope(DELETE_ON_EXIT);
- RegExpNode* node = Compile("(a|^b|c)");
- CHECK(node->info()->determine_start);
+ RegExpNode* node = Compile("(a|^b|c)", false);
+ CHECK(node->info()->follows_start_interest);
}
TEST(RangeCanonicalization) {
- ASSERT((CanonRange(0) & CharacterRange::kStartMarker) != 0);
+ CHECK((CanonRange(0) & CharacterRange::kStartMarker) != 0);
// Check that we arrive at the same result when using the basic
// range canonicalization primitives as when using immediate
// canonicalization.
TEST(Graph) {
V8::Initialize(NULL);
- Execute("\\w+", "", true);
+ Execute("(?:foo|bar$)", false, true);
}