The greedy naming is really confusing (we should fix it in the framework documentation as well).
In general regular expression nomenclature, "greedy" is used as the opposite of "lazy", indicating how much a loop initially consumes, e.g. does `a*` first try consuming as many 'a's as possible or does it first try consuming as few 'a's as possible... it's "greedy" (denoted as `a*`) if it consumes as many as possible and "lazy" (denoted as `a*?` if it consumes as few.
How aggressively it consumes, however, is orthogonal to whether it backtracks. Whereas `a*` is greedy and backtracking and `a*?` is lazy and backtracking, `(?>a*)` is greedy and non-backtracking and `(?>a*?) is lazy and non-backtracking.
Unfortunately, the nomenclature in the implementation and the documentation describes the `(?> ... )` as being a "greedy subexpression", which then conflates the meaning of "greedy".
The rest of the industry refers to these instead as "atomic", so I've changed it to that in the implementation.
// Manufactured primitive operations, derived from the tree that comes from the parser.
// These exist to reduce backtracking (both actually performing it and spitting code for it).
- public const int Oneloopgreedy = 43; // lef,back char,min,max (?> a {,n} )
- public const int Notoneloopgreedy = 44; // lef,back set,min,max (?> . {,n} )
- public const int Setloopgreedy = 45; // lef,back set,min,max (?> [\d]{,n} )
+ public const int Oneloopatomic = 43; // lef,back char,min,max (?> a {,n} )
+ public const int Notoneloopatomic = 44; // lef,back set,min,max (?> . {,n} )
+ public const int Setloopatomic = 45; // lef,back set,min,max (?> [\d]{,n} )
// Modifiers for alternate modes
public const int Mask = 63; // Mask to get unmodified ordinary operator
case Onerep:
case Notonerep:
case Oneloop:
- case Oneloopgreedy:
+ case Oneloopatomic:
case Notoneloop:
- case Notoneloopgreedy:
+ case Notoneloopatomic:
case Onelazy:
case Notonelazy:
case Setlazy:
case Setrep:
case Setloop:
- case Setloopgreedy:
+ case Setloopatomic:
return 3;
default:
"Setjump", "Backjump", "Forejump", "Testref", "Goto",
"Prune", "Stop",
"ECMABoundary", "NonECMABoundary",
- "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy"
+ "Oneloopatomic", "Notoneloopatomic", "Setloopatomic"
};
private static string OperatorDescription(int Opcode)
case Onerep:
case Notonerep:
case Oneloop:
- case Oneloopgreedy:
+ case Oneloopatomic:
case Notoneloop:
- case Notoneloopgreedy:
+ case Notoneloopatomic:
case Onelazy:
case Notonelazy:
sb.Append("Ch = ");
case Set:
case Setrep:
case Setloop:
- case Setloopgreedy:
+ case Setloopatomic:
case Setlazy:
sb.Append("Set = ");
sb.Append(RegexCharClass.SetDescription(Strings[Codes[offset + 1]]));
case Onerep:
case Notonerep:
case Oneloop:
- case Oneloopgreedy:
+ case Oneloopatomic:
case Notoneloop:
- case Notoneloopgreedy:
+ case Notoneloopatomic:
case Onelazy:
case Notonelazy:
case Setrep:
case Setloop:
- case Setloopgreedy:
+ case Setloopatomic:
case Setlazy:
sb.Append(", Rep = ");
if (Codes[offset + 2] == int.MaxValue)
case RegexNode.Eol:
case RegexNode.End:
case RegexNode.EndZ:
- // {Set/One}loopgreedy are optimized nodes that represent non-backtracking variable-length loops.
+ // {Set/One/Notone}loopatomic are optimized nodes that represent non-backtracking variable-length loops.
// These consume their {Set/One} inputs as long as they match, and don't give up anything they
// matched, which means we can support them without backtracking.
- case RegexNode.Oneloopgreedy:
- case RegexNode.Notoneloopgreedy:
- case RegexNode.Setloopgreedy:
+ case RegexNode.Oneloopatomic:
+ case RegexNode.Notoneloopatomic:
+ case RegexNode.Setloopatomic:
// "Empty" is easy: nothing is emitted for it.
// "Nothing" is also easy: it doesn't match anything.
case RegexNode.Empty:
case RegexNode.Oneloop:
case RegexNode.Notoneloop:
case RegexNode.Setloop:
- Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Greedy, "Loop should have been transformed into a greedy type.");
+ Debug.Assert(node.Next == null || node.Next.Type != RegexNode.Atomic, "Loop should have been transformed into an atomic type.");
goto case RegexNode.Onelazy;
case RegexNode.Onelazy:
case RegexNode.Notonelazy:
break;
// {Lazy}Loop repeaters are the same, except their child also needs to be supported.
- // We also support such loops being greedy.
+ // We also support such loops being atomic.
case RegexNode.Loop:
case RegexNode.Lazyloop:
supported =
- (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy)) &&
+ (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic)) &&
NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1);
break;
- // We can handle greedy as long as we can handle making its child greedy, or
+ // We can handle atomic as long as we can handle making its child atomic, or
// its child doesn't have that concept.
- case RegexNode.Greedy:
+ case RegexNode.Atomic:
// Lookahead assertions also only require that the child node be supported.
// The RightToLeft check earlier is important to differentiate lookbehind,
// which is not supported.
supported = NodeSupportsNonBacktrackingImplementation(node.Child(0), level + 1);
break;
- // We can handle alternates as long as they're greedy (a root / global alternate is
- // effectively greedy, as nothing will try to backtrack into it as it's the last thing).
+ // We can handle alternates as long as they're atomic (a root / global alternate is
+ // effectively atomic, as nothing will try to backtrack into it as it's the last thing).
// Its children must all also be supported.
case RegexNode.Alternate:
if (node.Next != null &&
- (node.Next.Type == RegexNode.Greedy || // greedy alternate
+ (node.Next.Type == RegexNode.Atomic || // atomic alternate
(node.Next.Type == RegexNode.Capture && node.Next.Next is null))) // root alternate
{
goto case RegexNode.Concatenate;
}
}
- // Emits the code for a greedy alternate, one that once a branch successfully matches is non-backtracking into it.
+ // Emits the code for an atomic alternate, one that once a branch successfully matches is non-backtracking into it.
// This amounts to generating the code for each branch, with failures in a branch resetting state to what it was initially
// and then jumping to the next branch. We don't need to worry about uncapturing, because capturing is only allowed for the
// implicit capture that happens for the whole match at the end.
- void EmitGreedyAlternate(RegexNode node)
+ void EmitAtomicAlternate(RegexNode node)
{
// int startingTextSpanPos = textSpanPos;
// int startingRunTextPos = runtextpos;
EmitMultiChar(node);
break;
- case RegexNode.Oneloopgreedy:
- case RegexNode.Notoneloopgreedy:
- case RegexNode.Setloopgreedy:
+ case RegexNode.Oneloopatomic:
+ case RegexNode.Notoneloopatomic:
+ case RegexNode.Setloopatomic:
case RegexNode.Loop:
- EmitGreedyLoop(node);
+ EmitAtomicLoop(node);
break;
case RegexNode.Lazyloop:
- // A greedy lazy loop amounts to doing the minimum amount of work possible.
+ // An atomic lazy loop amounts to doing the minimum amount of work possible.
// That means iterating as little as is required, which means a repeater
// for the min, and if min is 0, doing nothing.
- Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy));
+ Debug.Assert(node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic));
if (node.M > 0)
{
EmitRepeater(node, repeatChildNode: true, iterations: node.M);
}
break;
- case RegexNode.Greedy:
+ case RegexNode.Atomic:
EmitNode(node.Child(0));
break;
case RegexNode.Alternate:
- EmitGreedyAlternate(node);
+ EmitAtomicAlternate(node);
break;
case RegexNode.Oneloop:
case RegexNode.Set:
case RegexNode.Setlazy:
case RegexNode.Setloop:
- case RegexNode.Setloopgreedy:
+ case RegexNode.Setloopatomic:
LocalBuilder setScratchLocal = RentInt32Local();
EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
ReturnInt32Local(setScratchLocal);
case RegexNode.One:
case RegexNode.Onelazy:
case RegexNode.Oneloop:
- case RegexNode.Oneloopgreedy:
+ case RegexNode.Oneloopatomic:
if (IsCaseInsensitive(node)) CallToLower();
Ldc(node.Ch);
BneFar(doneLabel);
break;
default:
- Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopgreedy);
+ Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopatomic);
if (IsCaseInsensitive(node)) CallToLower();
Ldc(node.Ch);
BeqFar(doneLabel);
ReturnInt32Local(iterationLocal);
}
- // Emits the code to handle a non-backtracking, variable-length loop (Oneloopgreedy or Setloopgreedy).
- void EmitGreedyLoop(RegexNode node)
+ // Emits the code to handle a non-backtracking, variable-length loop (Oneloopatomic or Setloopatomic).
+ void EmitAtomicLoop(RegexNode node)
{
Debug.Assert(
- node.Type == RegexNode.Oneloopgreedy ||
- node.Type == RegexNode.Notoneloopgreedy ||
- node.Type == RegexNode.Setloopgreedy ||
- (node.Type == RegexNode.Loop && (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Greedy))));
+ node.Type == RegexNode.Oneloopatomic ||
+ node.Type == RegexNode.Notoneloopatomic ||
+ node.Type == RegexNode.Setloopatomic ||
+ (node.Type == RegexNode.Loop && (node.M == node.N || (node.Next != null && node.Next.Type == RegexNode.Atomic))));
Debug.Assert(node.M < int.MaxValue);
// First generate the code to handle the required number of iterations.
Label originalDoneLabel = doneLabel;
doneLabel = DefineLabel();
- if (node.Type == RegexNode.Notoneloopgreedy && node.N == int.MaxValue && !IsCaseInsensitive(node))
+ if (node.Type == RegexNode.Notoneloopatomic && node.N == int.MaxValue && !IsCaseInsensitive(node))
{
- // For Notoneloopgreedy, we're looking for a specific character, as everything until we find
+ // For Notoneloopatomic, we're looking for a specific character, as everything until we find
// it is consumed by the loop. If we're unbounded, such as with ".*" and if we're case-sensitive,
// we can use the vectorized IndexOf to do the search, rather than open-coding it. (In the future,
// we could consider using IndexOf with StringComparison for case insensitivity.)
LdindU2();
switch (node.Type)
{
- case RegexNode.Oneloopgreedy:
+ case RegexNode.Oneloopatomic:
if (IsCaseInsensitive(node)) CallToLower();
Ldc(node.Ch);
BneFar(doneLabel);
break;
- case RegexNode.Notoneloopgreedy:
+ case RegexNode.Notoneloopatomic:
if (IsCaseInsensitive(node)) CallToLower();
Ldc(node.Ch);
BeqFar(doneLabel);
break;
- case RegexNode.Setloopgreedy:
+ case RegexNode.Setloopatomic:
LocalBuilder setScratchLocal = RentInt32Local();
EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
ReturnInt32Local(setScratchLocal);
case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl:
case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl:
case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Oneloopgreedy:
- case RegexCode.Notoneloopgreedy:
- case RegexCode.Setloopgreedy:
- case RegexCode.Oneloopgreedy | RegexCode.Rtl:
- case RegexCode.Notoneloopgreedy | RegexCode.Rtl:
- case RegexCode.Setloopgreedy | RegexCode.Rtl:
- case RegexCode.Oneloopgreedy | RegexCode.Ci:
- case RegexCode.Notoneloopgreedy | RegexCode.Ci:
- case RegexCode.Setloopgreedy | RegexCode.Ci:
- case RegexCode.Oneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notoneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setloopgreedy | RegexCode.Ci | RegexCode.Rtl:
+ case RegexCode.Oneloopatomic:
+ case RegexCode.Notoneloopatomic:
+ case RegexCode.Setloopatomic:
+ case RegexCode.Oneloopatomic | RegexCode.Rtl:
+ case RegexCode.Notoneloopatomic | RegexCode.Rtl:
+ case RegexCode.Setloopatomic | RegexCode.Rtl:
+ case RegexCode.Oneloopatomic | RegexCode.Ci:
+ case RegexCode.Notoneloopatomic | RegexCode.Ci:
+ case RegexCode.Setloopatomic | RegexCode.Ci:
+ case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl:
+ case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl:
+ case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl:
//: int c = Operand(1);
//: if (c > Rightchars())
//: c = Rightchars();
Dup();
Stloc(cLocal);
Ldc(0);
- if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopgreedy)
+ if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
{
BleFar(l2);
}
Rightcharnext();
}
- if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopgreedy)
+ if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
{
EmitTimeoutCheck();
EmitCallCharInClass(_strings![Operand(0)], IsCaseInsensitive(), charInClassLocal);
}
Ldc(Operand(0));
- if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopgreedy)
+ if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic)
{
Beq(l1);
}
else
{
- Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopgreedy);
+ Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic);
Bne(l1);
}
}
MarkLabel(l2);
- if (Code() != RegexCode.Oneloopgreedy && Code() != RegexCode.Notoneloopgreedy && Code() != RegexCode.Setloopgreedy)
+ if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic)
{
Ldloc(lenLocal);
Ldloc(cLocal);
}
break;
- case RegexNode.Greedy:
+ case RegexNode.Atomic:
case RegexNode.Capture:
curNode = curNode.Child(0);
concatNode = null;
continue;
case RegexNode.Oneloop:
- case RegexNode.Oneloopgreedy:
+ case RegexNode.Oneloopatomic:
case RegexNode.Onelazy:
// In release, cutoff at a length to which we can still reasonably construct a string
}
break;
- case RegexNode.Greedy:
+ case RegexNode.Atomic:
case RegexNode.Capture:
curNode = curNode.Child(0);
concatNode = null;
case RegexNode.Group | AfterChild:
case RegexNode.Capture | BeforeChild:
case RegexNode.Capture | AfterChild:
- case RegexNode.Greedy | BeforeChild:
- case RegexNode.Greedy | AfterChild:
+ case RegexNode.Atomic | BeforeChild:
+ case RegexNode.Atomic | AfterChild:
break;
case RegexNode.Require | BeforeChild:
break;
case RegexNode.Oneloop:
- case RegexNode.Oneloopgreedy:
+ case RegexNode.Oneloopatomic:
case RegexNode.Onelazy:
PushFC(new RegexFC(node.Ch, false, node.M == 0, ci));
break;
case RegexNode.Notoneloop:
- case RegexNode.Notoneloopgreedy:
+ case RegexNode.Notoneloopatomic:
case RegexNode.Notonelazy:
PushFC(new RegexFC(node.Ch, true, node.M == 0, ci));
break;
break;
case RegexNode.Setloop:
- case RegexNode.Setloopgreedy:
+ case RegexNode.Setloopatomic:
case RegexNode.Setlazy:
PushFC(new RegexFC(node.Str!, node.M == 0, ci));
break;
}
case RegexCode.Oneloop:
- case RegexCode.Oneloopgreedy:
+ case RegexCode.Oneloopatomic:
{
int c = Operand(1);
}
case RegexCode.Notoneloop:
- case RegexCode.Notoneloopgreedy:
+ case RegexCode.Notoneloopatomic:
{
int c = Operand(1);
}
case RegexCode.Setloop:
- case RegexCode.Setloopgreedy:
+ case RegexCode.Setloopatomic:
{
int c = Operand(1);
public const int EndZ = RegexCode.EndZ; // \Z
public const int End = RegexCode.End; // \z
- public const int Oneloopgreedy = RegexCode.Oneloopgreedy; // c,n (?> a*)
- public const int Notoneloopgreedy = RegexCode.Notoneloopgreedy; // c,n (?> .*)
- public const int Setloopgreedy = RegexCode.Setloopgreedy; // set,n (?> \d*)
+ public const int Oneloopatomic = RegexCode.Oneloopatomic; // c,n (?> a*)
+ public const int Notoneloopatomic = RegexCode.Notoneloopatomic; // c,n (?> .*)
+ public const int Setloopatomic = RegexCode.Setloopatomic; // set,n (?> \d*)
// Interior nodes do not correspond to primitive operations, but
// control structures compositing other operations
public const int Group = 29; // (?:) - noncapturing group
public const int Require = 30; // (?=) (?<=) - lookahead and lookbehind assertions
public const int Prevent = 31; // (?!) (?<!) - negative lookahead and lookbehind assertions
- public const int Greedy = 32; // (?>) - greedy subexpression
+ public const int Atomic = 32; // (?>) - atomic subexpression
public const int Testref = 33; // (?(n) | ) - alternation, reference
public const int Testgroup = 34; // (?(...) | )- alternation, expression
switch (node.Type)
{
case Oneloop:
- node.Type = Oneloopgreedy;
+ node.Type = Oneloopatomic;
break;
case Notoneloop:
- node.Type = Notoneloopgreedy;
+ node.Type = Notoneloopatomic;
break;
case Setloop:
- node.Type = Setloopgreedy;
+ node.Type = Setloopatomic;
break;
case Capture:
case Alternate:
case Loop:
case Lazyloop:
- var greedy = new RegexNode(Greedy, Options);
- greedy.AddChild(existingChild);
- node.ReplaceChild(node.ChildCount() - 1, greedy);
+ var atomic = new RegexNode(Atomic, Options);
+ atomic.AddChild(existingChild);
+ node.ReplaceChild(node.ChildCount() - 1, atomic);
break;
}
continue;
- case Greedy:
+ case Atomic:
node = node.Child(0);
continue;
}
n = ReduceLoops();
break;
- case Greedy:
- n = ReduceGreedy();
+ case Atomic:
+ n = ReduceAtomic();
break;
case Group:
}
/// <summary>
- /// Simple optimization. If a greedy subexpression contains only a set loop
- /// or a one loop, change them to be a greedy set loop or greedy one loop,
- /// and remove the greedy node.
+ /// Simple optimization. If an atomic subexpression contains only a one/notone/set loop,
+ /// change it to be an atomic one/notone/set loop and remove the atomic node.
/// </summary>
- private RegexNode ReduceGreedy()
+ private RegexNode ReduceAtomic()
{
- Debug.Assert(Type == Greedy);
+ Debug.Assert(Type == Atomic);
Debug.Assert(ChildCount() == 1);
RegexNode child = Child(0);
switch (child.Type)
{
case Oneloop:
- child.Type = Oneloopgreedy;
+ child.Type = Oneloopatomic;
return child;
case Notoneloop:
- child.Type = Notoneloopgreedy;
+ child.Type = Notoneloopatomic;
return child;
case Setloop:
- child.Type = Setloopgreedy;
+ child.Type = Setloopatomic;
return child;
- case Oneloopgreedy:
- case Notoneloopgreedy:
- case Setloopgreedy:
+ case Oneloopatomic:
+ case Notoneloopatomic:
+ case Setloopatomic:
return child;
}
switch (child.Type)
{
case Oneloop:
- case Oneloopgreedy:
+ case Oneloopatomic:
case Notoneloop:
- case Notoneloopgreedy:
+ case Notoneloopatomic:
case Setloop:
- case Setloopgreedy:
+ case Setloopatomic:
valid = true;
break;
}
children.RemoveRange(j, i - j);
}
- // Now try to convert as many loops as possible to be greedy to avoid unnecessary backtracking.
+ // Now try to convert as many loops as possible to be atomic to avoid unnecessary backtracking.
if ((Options & RegexOptions.RightToLeft) == 0)
{
- ReduceConcatenateWithAutoGreedy();
+ ReduceConcatenateWithAutoAtomic();
}
// If the concatenation is now empty, return an empty node, or if it's got a single child, return that child.
/// <summary>
/// Finds oneloop and setloop nodes in the concatenation that can be automatically upgraded
- /// to oneloopgreedy and setloopgreedy nodes. Such changes avoid potential useless backtracking.
+ /// to oneloopatomic and setloopatomic nodes. Such changes avoid potential useless backtracking.
/// This looks for cases like A*B, where A and B are known to not overlap: in such cases,
/// we can effectively convert this to (?>A*)B.
/// </summary>
- private void ReduceConcatenateWithAutoGreedy()
+ private void ReduceConcatenateWithAutoAtomic()
{
Debug.Assert(Type == Concatenate);
Debug.Assert((Options & RegexOptions.RightToLeft) == 0);
switch (subsequent.Type)
{
case Capture:
- case Greedy:
+ case Atomic:
case Require:
case Concatenate:
case Loop when subsequent.M > 0:
}
// If this node is a one/notone/setloop, see if it overlaps with its successor in the concatenation.
- // If it doesn't, then we can upgrade it to being a one/notone/setloopgreedy.
+ // If it doesn't, then we can upgrade it to being a one/notone/setloopatomic.
// Doing so avoids unnecessary backtracking.
switch (node.Type)
{
case One when node.Ch != subsequent.Ch:
case Onelazy when subsequent.M > 0 && node.Ch != subsequent.Ch:
case Oneloop when subsequent.M > 0 && node.Ch != subsequent.Ch:
- case Oneloopgreedy when subsequent.M > 0 && node.Ch != subsequent.Ch:
+ case Oneloopatomic when subsequent.M > 0 && node.Ch != subsequent.Ch:
case Notone when node.Ch == subsequent.Ch:
case Notonelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
case Notoneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
- case Notoneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+ case Notoneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch:
case Multi when node.Ch != subsequent.Str![0]:
case Set when !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
case Setlazy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
case Setloop when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
- case Setloopgreedy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
+ case Setloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
case End:
case EndZ when node.Ch != '\n':
case Eol when node.Ch != '\n':
case Nonboundary when !RegexCharClass.IsWordChar(node.Ch):
case ECMABoundary when RegexCharClass.IsECMAWordChar(node.Ch):
case NonECMABoundary when !RegexCharClass.IsECMAWordChar(node.Ch):
- node.Type = Oneloopgreedy;
+ node.Type = Oneloopatomic;
break;
}
break;
case One when node.Ch == subsequent.Ch:
case Onelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
case Oneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
- case Oneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+ case Oneloopatomic when subsequent.M > 0 && node.Ch == subsequent.Ch:
case Multi when node.Ch == subsequent.Str![0]:
case End:
- node.Type = Notoneloopgreedy;
+ node.Type = Notoneloopatomic;
break;
}
break;
case One when !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Onelazy when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Oneloop when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
- case Oneloopgreedy when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
+ case Oneloopatomic when subsequent.M > 0 && !RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Notone when RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Notonelazy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Notoneloop when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
- case Notoneloopgreedy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
+ case Notoneloopatomic when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!):
case Set when !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
case Setlazy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
case Setloop when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
- case Setloopgreedy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
+ case Setloopatomic when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
case End:
case EndZ when !RegexCharClass.CharInClass('\n', node.Str!):
case Eol when !RegexCharClass.CharInClass('\n', node.Str!):
case Nonboundary when node.Str == RegexCharClass.NotWordClass || node.Str == RegexCharClass.NotDigitClass:
case ECMABoundary when node.Str == RegexCharClass.ECMAWordClass || node.Str == RegexCharClass.ECMADigitClass:
case NonECMABoundary when node.Str == RegexCharClass.NotECMAWordClass || node.Str == RegexCharClass.NotDigitClass:
- node.Type = Setloopgreedy;
+ node.Type = Setloopatomic;
break;
}
break;
"Nothing", "Empty",
"Alternate", "Concatenate",
"Loop", "Lazyloop",
- "Capture", "Group", "Require", "Prevent", "Greedy",
+ "Capture", "Group", "Require", "Prevent", "Atomic",
"Testref", "Testgroup",
"", "", "", "", "", "",
"ECMABoundary", "NonECMABoundary",
- "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy",
+ "Oneloopatomic", "Notoneloopatomic", "Setloopatomic",
};
public string Description()
switch (Type)
{
case Oneloop:
- case Oneloopgreedy:
+ case Oneloopatomic:
case Notoneloop:
- case Notoneloopgreedy:
+ case Notoneloopatomic:
case Onelazy:
case Notonelazy:
case One:
break;
case Set:
case Setloop:
- case Setloopgreedy:
+ case Setloopatomic:
case Setlazy:
argSb.Append("(Set = " + RegexCharClass.SetDescription(Str!) + ")");
break;
switch (Type)
{
case Oneloop:
- case Oneloopgreedy:
+ case Oneloopatomic:
case Notoneloop:
- case Notoneloopgreedy:
+ case Notoneloopatomic:
case Onelazy:
case Notonelazy:
case Setloop:
- case Setloopgreedy:
+ case Setloopatomic:
case Setlazy:
case Loop:
case Lazyloop:
break;
case '>':
- // greedy subexpression
- nodeType = RegexNode.Greedy;
+ // atomic subexpression
+ nodeType = RegexNode.Atomic;
break;
case '\'':
Emit(RegexCode.Forejump);
break;
- case RegexNode.Greedy | BeforeChild:
+ case RegexNode.Atomic | BeforeChild:
Emit(RegexCode.Setjump);
break;
- case RegexNode.Greedy | AfterChild:
+ case RegexNode.Atomic | AfterChild:
Emit(RegexCode.Forejump);
break;
break;
case RegexNode.Notoneloop:
- case RegexNode.Notoneloopgreedy:
+ case RegexNode.Notoneloopatomic:
case RegexNode.Notonelazy:
case RegexNode.Oneloop:
- case RegexNode.Oneloopgreedy:
+ case RegexNode.Oneloopatomic:
case RegexNode.Onelazy:
if (node.M > 0)
{
- Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopgreedy || node.Type == RegexNode.Onelazy) ?
+ Emit(((node.Type == RegexNode.Oneloop || node.Type == RegexNode.Oneloopatomic || node.Type == RegexNode.Onelazy) ?
RegexCode.Onerep : RegexCode.Notonerep) | bits, node.Ch, node.M);
}
if (node.N > node.M)
break;
case RegexNode.Setloop:
- case RegexNode.Setloopgreedy:
+ case RegexNode.Setloopatomic:
case RegexNode.Setlazy:
{
int stringCode = StringCode(node.Str!);
yield return new object[] { null, @"(cat){5,dog}?", "cat{5,dog}?", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } };
yield return new object[] { null, @"(cat){cat,dog}?", "cat{cat,dog}?", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } };
- // Atomic ("greedy") subexpressions
- // Implicitly upgrading oneloop to be greedy
+ // Atomic subexpressions
+ // Implicitly upgrading oneloop to be atomic
yield return new object[] { null, @"a*", "aaa", RegexOptions.None, new string[] { "aaa" } };
yield return new object[] { null, @"a*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"a*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"a*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
yield return new object[] { null, @"@*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
yield return new object[] { null, @"@*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
- // Implicitly upgrading notoneloop to be greedy
+ // Implicitly upgrading notoneloop to be atomic
yield return new object[] { null, @"[^b]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"[^b]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"[^b]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"[^b]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"[^b]*bac", "aaabac", RegexOptions.None, new string[] { "aaabac" } };
yield return new object[] { null, @"[^b]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
- // Implicitly upgrading setloop to be greedy
+ // Implicitly upgrading setloop to be atomic
yield return new object[] { null, @"[ac]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
yield return new object[] { null, @"[ac]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"[ac]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
yield return new object[] { null, @"[ac]*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
- // Implicitly upgrading nested loops to be greedy
+ // Implicitly upgrading nested loops to be atomic
yield return new object[] { null, @"(?:a){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
yield return new object[] { null, @"(?:a){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
yield return new object[] { null, @"(?:a{2}){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } };