break;
// Just because a particular node is atomic doesn't mean all its descendants are.
- // Process them as well.
+ // Process them as well. Lookarounds are implicitly atomic.
case RegexNodeKind.Atomic:
+ case RegexNodeKind.PositiveLookaround:
+ case RegexNodeKind.NegativeLookaround:
node = node.Child(0);
continue;
node.Child(i).EliminateEndingBacktracking();
}
- if (node.Kind != RegexNodeKind.ExpressionConditional) // ReduceTestgroup will have already applied ending backtracking removal
+ if (node.Kind != RegexNodeKind.ExpressionConditional) // ReduceExpressionConditional will have already applied ending backtracking removal
{
node = node.Child(0);
continue;
RegexNodeKind.Concatenate => ReduceConcatenation(),
RegexNodeKind.Group => ReduceGroup(),
RegexNodeKind.Loop or RegexNodeKind.Lazyloop => ReduceLoops(),
- RegexNodeKind.NegativeLookaround => ReducePrevent(),
- RegexNodeKind.PositiveLookaround => ReduceRequire(),
+ RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround => ReduceLookaround(),
RegexNodeKind.Set or RegexNodeKind.Setloop or RegexNodeKind.Setloopatomic or RegexNodeKind.Setlazy => ReduceSet(),
- RegexNodeKind.ExpressionConditional => ReduceTestgroup(),
- RegexNodeKind.BackreferenceConditional => ReduceTestref(),
+ RegexNodeKind.ExpressionConditional => ReduceExpressionConditional(),
+ RegexNodeKind.BackreferenceConditional => ReduceBackreferenceConditional(),
_ => this,
};
}
// can be made atomic. Then if we do end up backtracking into the alternation,
// we at least won't need to backtrack into that loop. The same is true for
// conditionals, though we don't want to process the condition expression
- // itself, as it's already considered atomic and handled as part of ReduceTestgroup.
+ // itself, as it's already considered atomic and handled as part of ReduceExpressionConditional.
{
int alternateBranches = node.ChildCount();
for (int b = node.Kind == RegexNodeKind.ExpressionConditional ? 1 : 0; b < alternateBranches; b++)
return null;
}
- /// <summary>Optimizations for positive lookaheads/behinds.</summary>
- private RegexNode ReduceRequire()
+ /// <summary>Optimizations for positive and negative lookaheads/behinds.</summary>
+ private RegexNode ReduceLookaround()
{
- Debug.Assert(Kind == RegexNodeKind.PositiveLookaround);
+ Debug.Assert(Kind is RegexNodeKind.PositiveLookaround or RegexNodeKind.NegativeLookaround);
Debug.Assert(ChildCount() == 1);
- // A positive lookaround is a zero-width atomic assertion.
+ // A lookaround is a zero-width atomic assertion.
// As it's atomic, nothing will backtrack into it, and we can
// eliminate any ending backtracking from it.
EliminateEndingBacktracking();
- // A positive lookaround wrapped around an empty is a nop, and can just
- // be made into an empty. A developer typically doesn't write this, but
- // rather it evolves due to optimizations resulting in empty.
- if (Child(0).Kind == RegexNodeKind.Empty)
- {
- Kind = RegexNodeKind.Empty;
- Children = null;
- }
-
- return this;
- }
-
- /// <summary>Optimizations for negative lookaheads/behinds.</summary>
- private RegexNode ReducePrevent()
- {
- Debug.Assert(Kind == RegexNodeKind.NegativeLookaround);
- Debug.Assert(ChildCount() == 1);
+ // A positive lookaround wrapped around an empty is a nop, and we can reduce it
+ // to simply Empty. A developer typically doesn't write this, but rather it evolves
+ // due to optimizations resulting in empty.
// A negative lookaround wrapped around an empty child, i.e. (?!), is
- // sometimes used as a way to insert a guaranteed no-match into the expression.
- // We can reduce it to simply Nothing.
+ // sometimes used as a way to insert a guaranteed no-match into the expression,
+ // often as part of a conditional. We can reduce it to simply Nothing.
+
if (Child(0).Kind == RegexNodeKind.Empty)
{
- Kind = RegexNodeKind.Nothing;
+ Kind = Kind == RegexNodeKind.PositiveLookaround ? RegexNodeKind.Empty : RegexNodeKind.Nothing;
Children = null;
}
}
/// <summary>Optimizations for backreference conditionals.</summary>
- private RegexNode ReduceTestref()
+ private RegexNode ReduceBackreferenceConditional()
{
Debug.Assert(Kind == RegexNodeKind.BackreferenceConditional);
Debug.Assert(ChildCount() is 1 or 2);
- // This isn't so much an optimization as it is changing the tree for consistency.
- // We want all engines to be able to trust that every Testref will have two children,
+ // This isn't so much an optimization as it is changing the tree for consistency. We want
+ // all engines to be able to trust that every backreference conditional will have two children,
// even though it's optional in the syntax. If it's missing a "not matched" branch,
// we add one that will match empty.
if (ChildCount() == 1)
}
/// <summary>Optimizations for expression conditionals.</summary>
- private RegexNode ReduceTestgroup()
+ private RegexNode ReduceExpressionConditional()
{
Debug.Assert(Kind == RegexNodeKind.ExpressionConditional);
Debug.Assert(ChildCount() is 2 or 3);
- // This isn't so much an optimization as it is changing the tree for consistency.
- // We want all engines to be able to trust that every Testgroup will have three children,
+ // This isn't so much an optimization as it is changing the tree for consistency. We want
+ // all engines to be able to trust that every expression conditional will have three children,
// even though it's optional in the syntax. If it's missing a "not matched" branch,
// we add one that will match empty.
if (ChildCount() == 2)