I initially mistakenly thought this wouldn't be useful, but it is. For example, the expression ".*\n" can be made non-backtracing.
// These exist to reduce backtracking (both actually performing it and spitting code for it).
public const int Oneloopgreedy = 43; // lef,back char,min,max (?> a {,n} )
- public const int Setloopgreedy = 44; // lef,back set,min,max (?> [\d]{,n} )
+ public const int Notoneloopgreedy = 44; // lef,back set,min,max (?> . {,n} )
+ public const int Setloopgreedy = 45; // lef,back set,min,max (?> [\d]{,n} )
// Modifiers for alternate modes
public const int Mask = 63; // Mask to get unmodified ordinary operator
case Oneloop:
case Oneloopgreedy:
case Notoneloop:
+ case Notoneloopgreedy:
case Onelazy:
case Notonelazy:
case Setlazy:
"Setjump", "Backjump", "Forejump", "Testref", "Goto",
"Prune", "Stop",
"ECMABoundary", "NonECMABoundary",
- "Oneloopgreedy", "Setloopgreedy"
+ "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy"
};
private static string OperatorDescription(int Opcode)
case Oneloop:
case Oneloopgreedy:
case Notoneloop:
+ case Notoneloopgreedy:
case Onelazy:
case Notonelazy:
sb.Append("Ch = ");
case Oneloop:
case Oneloopgreedy:
case Notoneloop:
+ case Notoneloopgreedy:
case Onelazy:
case Notonelazy:
case Setrep:
// {Set/One}loopgreedy are optimized nodes that represent non-backtracking variable-length loops.
// These consume their {Set/One} inputs as long as they match, and don't give up anything they
// matched, which means we can support them without backtracking.
- case RegexNode.Setloopgreedy:
case RegexNode.Oneloopgreedy:
+ case RegexNode.Notoneloopgreedy:
+ case RegexNode.Setloopgreedy:
// TODO: Add support for greedy {Lazy}Loop around supported elements, namely Concatenate.
// Nested loops will require multiple iteration variables to be defined.
supported = true;
break;
case RegexNode.Oneloopgreedy:
+ case RegexNode.Notoneloopgreedy:
case RegexNode.Setloopgreedy:
EmitGreedyLoop(node);
break;
break;
default:
- Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop);
+ Debug.Assert(node.Type == RegexNode.Notone || node.Type == RegexNode.Notonelazy || node.Type == RegexNode.Notoneloop || node.Type == RegexNode.Notoneloopgreedy);
if (IsCaseInsensitive(node)) CallToLower();
Ldc(node.Ch);
BeqFar(doneLabel);
// Emits the code to handle a non-backtracking, variable-length loop (Oneloopgreedy or Setloopgreedy).
void EmitGreedyLoop(RegexNode node)
{
- Debug.Assert(node.Type == RegexNode.Oneloopgreedy || node.Type == RegexNode.Setloopgreedy);
+ Debug.Assert(node.Type == RegexNode.Oneloopgreedy || node.Type == RegexNode.Notoneloopgreedy || node.Type == RegexNode.Setloopgreedy);
Debug.Assert(node.M < int.MaxValue);
// First generate the code to handle the required number of iterations.
Add();
Call(s_spanGetItemMethod);
LdindU2();
- if (node.Type == RegexNode.Oneloopgreedy)
- {
- if (IsCaseInsensitive(node)) CallToLower();
- Ldc(node.Ch);
- BneFar(doneLabel);
- }
- else // Setloopgreedy
+ switch (node.Type)
{
- EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
- BrfalseFar(doneLabel);
+ case RegexNode.Oneloopgreedy:
+ if (IsCaseInsensitive(node)) CallToLower();
+ Ldc(node.Ch);
+ BneFar(doneLabel);
+ break;
+ case RegexNode.Notoneloopgreedy:
+ if (IsCaseInsensitive(node)) CallToLower();
+ Ldc(node.Ch);
+ BeqFar(doneLabel);
+ break;
+ case RegexNode.Setloopgreedy:
+ EmitCallCharInClass(node.Str!, IsCaseInsensitive(node), setScratchLocal);
+ BrfalseFar(doneLabel);
+ break;
}
// i++;
case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl:
case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl:
case RegexCode.Oneloopgreedy:
+ case RegexCode.Notoneloopgreedy:
case RegexCode.Setloopgreedy:
case RegexCode.Oneloopgreedy | RegexCode.Rtl:
+ case RegexCode.Notoneloopgreedy | RegexCode.Rtl:
case RegexCode.Setloopgreedy | RegexCode.Rtl:
case RegexCode.Oneloopgreedy | RegexCode.Ci:
+ case RegexCode.Notoneloopgreedy | RegexCode.Ci:
case RegexCode.Setloopgreedy | RegexCode.Ci:
case RegexCode.Oneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
+ case RegexCode.Notoneloopgreedy | RegexCode.Ci | RegexCode.Rtl:
case RegexCode.Setloopgreedy | RegexCode.Ci | RegexCode.Rtl:
//: int c = Operand(1);
//: if (c > Rightchars())
}
else
{
+ Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopgreedy);
Bne(l1);
}
}
MarkLabel(l2);
- if (Code() != RegexCode.Oneloopgreedy && Code() != RegexCode.Setloopgreedy)
+ if (Code() != RegexCode.Oneloopgreedy && Code() != RegexCode.Notoneloopgreedy && Code() != RegexCode.Setloopgreedy)
{
Ldloc(lenLocal);
Ldloc(cLocal);
break;
case RegexNode.Notoneloop:
+ case RegexNode.Notoneloopgreedy:
case RegexNode.Notonelazy:
PushFC(new RegexFC(node.Ch, true, node.M == 0, ci));
break;
}
case RegexCode.Notoneloop:
+ case RegexCode.Notoneloopgreedy:
{
int c = Operand(1);
}
}
- if (c > i)
+ if (c > i && Operator() == RegexCode.Notoneloop)
+ {
TrackPush(c - i - 1, Textpos() - Bump());
+ }
advance = 2;
continue;
public const int EndZ = RegexCode.EndZ; // \Z
public const int End = RegexCode.End; // \z
- public const int Oneloopgreedy = RegexCode.Oneloopgreedy; // c,n (?> a*)
- public const int Setloopgreedy = RegexCode.Setloopgreedy; // set,n (?> \d*)
+ public const int Oneloopgreedy = RegexCode.Oneloopgreedy; // c,n (?> a*)
+ public const int Notoneloopgreedy = RegexCode.Notoneloopgreedy; // c,n (?> .*)
+ public const int Setloopgreedy = RegexCode.Setloopgreedy; // set,n (?> \d*)
// Interior nodes do not correspond to primitive operations, but
// control structures compositing other operations
{
switch (node.Type)
{
- case Setloop:
- node.Type = Setloopgreedy;
- break;
-
case Oneloop:
node.Type = Oneloopgreedy;
break;
+ case Notoneloop:
+ node.Type = Notoneloopgreedy;
+ break;
+
+ case Setloop:
+ node.Type = Setloopgreedy;
+ break;
+
case Capture:
case Greedy:
Debug.Assert(node.ChildCount() == 1);
child.Type = Oneloopgreedy;
return child;
+ case Notoneloop:
+ child.Type = Notoneloopgreedy;
+ return child;
+
case Setloop:
child.Type = Setloopgreedy;
return child;
case Oneloopgreedy:
+ case Notoneloopgreedy:
case Setloopgreedy:
return child;
}
case Oneloop:
case Oneloopgreedy:
case Notoneloop:
+ case Notoneloopgreedy:
case Setloop:
case Setloopgreedy:
valid = true;
continue;
}
- // If this node is a oneloop or a setloop, see if it overlaps with its successor in the concatenation.
- // If it doesn't, then we can upgrade it to being a oneloopgreedy or setloopgreedy, respectively.
+ // If this node is a one/notone/setloop, see if it overlaps with its successor in the concatenation.
+ // If it doesn't, then we can upgrade it to being a one/notone/setloopgreedy.
// Doing so avoids unnecessary backtracking.
switch (node.Type)
{
case Notone when node.Ch == subsequent.Ch:
case Notonelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
case Notoneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
+ case Notoneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
case Multi when node.Ch != subsequent.Str![0]:
case Set when !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
case Setlazy when subsequent.M > 0 && !RegexCharClass.CharInClass(node.Ch, subsequent.Str!):
}
break;
+ case Notoneloop:
+ switch (subsequent.Type)
+ {
+ case One when node.Ch == subsequent.Ch:
+ case Onelazy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+ case Oneloop when subsequent.M > 0 && node.Ch == subsequent.Ch:
+ case Oneloopgreedy when subsequent.M > 0 && node.Ch == subsequent.Ch:
+ case Multi when node.Ch == subsequent.Str![0]:
+ case End:
+ node.Type = Notoneloopgreedy;
+ break;
+ }
+ break;
+
case Setloop:
switch (subsequent.Type)
{
case Notone when RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Notonelazy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
case Notoneloop when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
- case Multi when !string.IsNullOrEmpty(subsequent.Str) && !RegexCharClass.CharInClass(subsequent.Str[0], node.Str!):
+ case Notoneloopgreedy when subsequent.M > 0 && RegexCharClass.CharInClass(subsequent.Ch, node.Str!):
+ case Multi when !RegexCharClass.CharInClass(subsequent.Str![0], node.Str!):
case Set when !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
case Setlazy when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
case Setloop when subsequent.M > 0 && !RegexCharClass.MayOverlap(node.Str!, subsequent.Str!):
"Testref", "Testgroup",
"", "", "", "", "", "",
"ECMABoundary", "NonECMABoundary",
- "Oneloopgreedy", "Setloopgreedy",
+ "Oneloopgreedy", "Notoneloopgreedy", "Setloopgreedy",
};
public string Description()
case Oneloop:
case Oneloopgreedy:
case Notoneloop:
+ case Notoneloopgreedy:
case Onelazy:
case Notonelazy:
case One:
case Oneloop:
case Oneloopgreedy:
case Notoneloop:
+ case Notoneloopgreedy:
case Onelazy:
case Notonelazy:
case Setloop:
break;
case RegexNode.Notoneloop:
+ case RegexNode.Notoneloopgreedy:
case RegexNode.Notonelazy:
case RegexNode.Oneloop:
case RegexNode.Oneloopgreedy: