{
internal static class DiagnosticDescriptors
{
+ private const string Category = "RegexGenerator";
+
public static DiagnosticDescriptor InvalidRegexGeneratorAttribute { get; } = new DiagnosticDescriptor(
id: "SYSLIB1040",
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
- category: "RegexGenerator",
+ category: Category,
DiagnosticSeverity.Error,
isEnabledByDefault: true,
customTags: WellKnownDiagnosticTags.NotConfigurable);
id: "SYSLIB1041",
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
messageFormat: new LocalizableResourceString(nameof(SR.MultipleRegexGeneratorAttributesMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
- category: "RegexGenerator",
+ category: Category,
DiagnosticSeverity.Error,
isEnabledByDefault: true,
customTags: WellKnownDiagnosticTags.NotConfigurable);
id: "SYSLIB1042",
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
messageFormat: new LocalizableResourceString(nameof(SR.InvalidRegexArgumentsMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
- category: "RegexGenerator",
+ category: Category,
DiagnosticSeverity.Error,
isEnabledByDefault: true,
customTags: WellKnownDiagnosticTags.NotConfigurable);
id: "SYSLIB1043",
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
messageFormat: new LocalizableResourceString(nameof(SR.RegexMethodMustHaveValidSignatureMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
- category: "RegexGenerator",
+ category: Category,
DiagnosticSeverity.Error,
isEnabledByDefault: true,
customTags: WellKnownDiagnosticTags.NotConfigurable);
id: "SYSLIB1044",
title: new LocalizableResourceString(nameof(SR.InvalidRegexGeneratorAttributeTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
messageFormat: new LocalizableResourceString(nameof(SR.InvalidLangVersionMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
- category: "RegexGenerator",
+ category: Category,
DiagnosticSeverity.Error,
isEnabledByDefault: true,
customTags: WellKnownDiagnosticTags.NotConfigurable);
+
+ public static DiagnosticDescriptor LimitedSourceGeneration { get; } = new DiagnosticDescriptor(
+ id: "SYSLIB1045",
+ title: new LocalizableResourceString(nameof(SR.LimitedSourceGenerationTitle), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
+ messageFormat: new LocalizableResourceString(nameof(SR.LimitedSourceGenerationMessage), SR.ResourceManager, typeof(FxResources.System.Text.RegularExpressions.Generator.SR)),
+ category: Category,
+ DiagnosticSeverity.Info,
+ isEnabledByDefault: true);
}
}
using System.CodeDom.Compiler;
using System.Collections;
using System.Collections.Generic;
+using System.Collections.Immutable;
using System.Diagnostics;
using System.Globalization;
using System.IO;
using System.Security.Cryptography;
using System.Text;
using System.Threading;
+using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
// NOTE: The logic in this file is largely a copy of logic in RegexCompiler, emitting C# instead of MSIL.
{
public partial class RegexGenerator
{
- /// <summary>
- /// Value added to the written code to enable subsequent replacement with any variable declarations
- /// dynamically discovered during code generation.
- /// </summary>
- private const string AdditionalDeclarationsPlaceholder = "<>PLACEHOLDER_FOR_ADDITIONAL_DECLARATIONS";
-
/// <summary>Code for a [GeneratedCode] attribute to put on the top-level generated members.</summary>
private static readonly string s_generatedCodeAttribute = $"[global::System.CodeDom.Compiler.GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]";
/// <summary>Header comments and usings to include at the top of every generated file.</summary>
};
/// <summary>Generates the code for one regular expression class.</summary>
- private static string EmitRegexType(RegexType regexClass)
+ private static (string, ImmutableArray<Diagnostic>) EmitRegexType(RegexType regexClass)
{
var sb = new StringBuilder(1024);
var writer = new IndentedTextWriter(new StringWriter(sb));
generatedName += ComputeStringHash(generatedName).ToString("X");
// Generate the regex type
- EmitRegexMethod(writer, regexClass.Method, generatedName);
+ ImmutableArray<Diagnostic> diagnostics = EmitRegexMethod(writer, regexClass.Method, generatedName);
while (writer.Indent != 0)
{
}
writer.Flush();
- return sb.ToString();
+ return (sb.ToString(), diagnostics);
// FNV-1a hash function. The actual algorithm used doesn't matter; just something simple
- // to create a pseudo-random value based on input text.
+ // to create a deterministic, pseudo-random value that's based on input text.
static uint ComputeStringHash(string s)
{
uint hashCode = 2166136261;
}
/// <summary>Gets whether a given regular expression method is supported by the code generator.</summary>
- private static bool SupportsCustomCodeGeneration(RegexMethod rm) =>
- // The generator doesn't currently know how to emit code for NonBacktracking.
- (rm.Options & RegexOptions.NonBacktracking) == 0;
+ private static bool SupportsCodeGeneration(RegexMethod rm)
+ {
+ RegexNode root = rm.Code.Tree.Root;
+
+ if (!root.SupportsCompilation())
+ {
+ return false;
+ }
+
+ if (ExceedsMaxDepthForSimpleCodeGeneration(root, allowedDepth: 40))
+ {
+ // Deep RegexNode trees can result in emitting C# code that exceeds C# compiler
+ // limitations, leading to "CS8078: An expression is too long or complex to compile".
+ // Place an artificial limit on max tree depth in order to mitigate such issues.
+ // The allowed depth can be tweaked as needed;its exceedingly rare to find
+ // expressions with such deep trees.
+ return false;
+ }
+
+ return true;
+
+ static bool ExceedsMaxDepthForSimpleCodeGeneration(RegexNode node, int allowedDepth)
+ {
+ if (allowedDepth <= 0)
+ {
+ return true;
+ }
+
+ int childCount = node.ChildCount();
+ for (int i = 0; i < childCount; i++)
+ {
+ if (ExceedsMaxDepthForSimpleCodeGeneration(node.Child(i), allowedDepth - 1))
+ {
+ return true;
+ }
+ }
+
+ return false;
+ }
+ }
/// <summary>Generates the code for a regular expression method.</summary>
- private static void EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id)
+ private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id)
{
string patternExpression = Literal(rm.Pattern);
string optionsExpression = $"(global::System.Text.RegularExpressions.RegexOptions)({(int)rm.Options})";
writer.Write(" public static global::System.Text.RegularExpressions.Regex Instance { get; } = ");
// If we can't support custom generation for this regex, spit out a Regex constructor call.
- if (!SupportsCustomCodeGeneration(rm))
+ if (!SupportsCodeGeneration(rm))
{
writer.WriteLine($"new global::System.Text.RegularExpressions.Regex({patternExpression}, {optionsExpression}, {timeoutExpression});");
writer.WriteLine("}");
- return;
+ return ImmutableArray.Create(Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, rm.MethodSyntax.GetLocation()));
}
writer.WriteLine($"new {id}();");
writer.WriteLine($" }}");
writer.WriteLine($" }}");
writer.WriteLine("}");
+ return ImmutableArray<Diagnostic>.Empty;
static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
{
{
RegexOptions options = (RegexOptions)rm.Options;
RegexCode code = rm.Code;
- bool rtl = code.RightToLeft;
bool hasTextInfo = false;
// In some cases, we need to emit declarations at the beginning of the method, but we only discover we need them later.
// Emit locals initialization
writer.WriteLine("global::System.ReadOnlySpan<char> runtextSpan = base.runtext;");
writer.WriteLine("int runtextpos = base.runtextpos;");
- if (rtl)
- {
- writer.WriteLine("int runtextbeg = base.runtextbeg;");
- }
- writer.WriteLine($"int runtextend = base.runtextend;{AdditionalDeclarationsPlaceholder}"); // placeholder at the end of a line so the generated indents line up
+ writer.Write($"int runtextend = base.runtextend;");
+ writer.Flush();
+ int additionalDeclarationsPosition = ((StringWriter)writer.InnerWriter).GetStringBuilder().Length;
+ int additionalDeclarationsIndent = writer.Indent;
writer.WriteLine();
// Generate length check. If the input isn't long enough to possibly match, fail quickly.
// especially since we want the "return false" code regardless.
int minRequiredLength = rm.Code.Tree.MinRequiredLength;
Debug.Assert(minRequiredLength >= 0);
- string clause = !rtl ?
- minRequiredLength switch
- {
- 0 => "if (runtextpos <= runtextend)",
- 1 => "if (runtextpos < runtextend)",
- _ => $"if (runtextpos < runtextend - {minRequiredLength - 1})"
- } :
- minRequiredLength switch
- {
- 0 => "if (runtextpos >= runtextbeg)",
- 1 => "if (runtextpos > runtextbeg)",
- _ => $"if (runtextpos - {minRequiredLength - 1} > runtextbeg)"
- };
+ string clause = minRequiredLength switch
+ {
+ 0 => "if (runtextpos <= runtextend)",
+ 1 => "if (runtextpos < runtextend)",
+ _ => $"if (runtextpos < runtextend - {minRequiredLength - 1})"
+ };
using (EmitBlock(writer, clause))
{
// Emit any anchors.
EmitIndexOf_LeftToRight(code.FindOptimizations.LeadingCaseSensitivePrefix);
break;
- case FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive:
- Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix));
- EmitIndexOf_RightToLeft(code.FindOptimizations.LeadingCaseSensitivePrefix);
- break;
-
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive:
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
EmitFixedSet_LeftToRight();
break;
- case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive:
- case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive:
- Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
- EmitFixedSet_RightToLeft();
- break;
-
default:
Debug.Fail($"Unexpected mode: {code.FindOptimizations.FindMode}");
goto case FindNextStartingPositionMode.NoSearch;
writer.WriteLine("// No match");
writer.WriteLine("ReturnFalse:");
- writer.WriteLine(!rm.Code.RightToLeft ? "base.runtextpos = runtextend;" : "base.runtextpos = runtextbeg;");
+ writer.WriteLine("base.runtextpos = runtextend;");
writer.WriteLine("return false;");
// We're done. Patch up any additional declarations.
- ReplaceAdditionalDeclarations(additionalDeclarations, writer);
+ ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent);
return;
// Emits any anchors. Returns true if the anchor roots any match to a specific location and thus no further
// Generate anchor checks.
if ((code.FindOptimizations.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0)
{
- // TODO: Interpreted and Compiled differ in various places as to whether they update positions, as do LTR vs RTL. Determine why.
switch (code.FindOptimizations.LeadingAnchor)
{
case RegexPrefixAnalyzer.Beginning:
writer.WriteLine("// Beginning \\A anchor");
- if (!rtl)
- {
- using (EmitBlock(writer, "if (runtextpos > runtextbeg)"))
- {
- writer.WriteLine("goto ReturnFalse;");
- }
- }
- else
+ using (EmitBlock(writer, "if (runtextpos > runtextbeg)"))
{
- // TODO: RegexOptions.Compiled doesn't ever return false here. Instead it updates the position. Why?
- using (EmitBlock(writer, "if (runtextpos > runtextbeg)"))
- {
- writer.WriteLine("base.runtextpos = runtextbeg;");
- }
+ writer.WriteLine("goto ReturnFalse;");
}
writer.WriteLine("return true;");
return true;
case RegexPrefixAnalyzer.Start:
writer.WriteLine("// Start \\G anchor");
- if (!rtl)
- {
- using (EmitBlock(writer, "if (runtextpos > runtextstart)"))
- {
- writer.WriteLine("goto ReturnFalse;");
- }
- }
- else
+ using (EmitBlock(writer, "if (runtextpos > runtextstart)"))
{
- // TODO: RegexOptions.Compiled doesn't ever return false here. Instead it updates the position. Why?
- using (EmitBlock(writer, "if (runtextpos < runtextstart)"))
- {
- writer.WriteLine("goto ReturnFalse;");
- }
+ writer.WriteLine("goto ReturnFalse;");
}
writer.WriteLine("return true;");
return true;
case RegexPrefixAnalyzer.EndZ:
- // TODO: Why are the LTR and RTL cases inconsistent here with RegexOptions.Compiled?
writer.WriteLine("// End \\Z anchor");
- if (!rtl)
- {
- using (EmitBlock(writer, "if (runtextpos < runtextend - 1)"))
- {
- writer.WriteLine("base.runtextpos = runtextend - 1;");
- }
- }
- else
+ using (EmitBlock(writer, "if (runtextpos < runtextend - 1)"))
{
- // TODO: This differs subtly between interpreted and compiled. Why?
- using (EmitBlock(writer, "if (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && runtextSpan[runtextpos] != '\\n'))"))
- {
- writer.WriteLine("goto ReturnFalse;");
- }
+ writer.WriteLine("base.runtextpos = runtextend - 1;");
}
writer.WriteLine("return true;");
return true;
case RegexPrefixAnalyzer.End:
writer.WriteLine("// End \\z anchor");
- if (!rtl)
- {
- using (EmitBlock(writer, "if (runtextpos < runtextend)"))
- {
- writer.WriteLine("base.runtextpos = runtextend;");
- }
- }
- else
+ using (EmitBlock(writer, "if (runtextpos < runtextend)"))
{
- using (EmitBlock(writer, "if (runtextpos < runtextend)"))
- {
- writer.WriteLine("goto ReturnFalse;");
- }
+ writer.WriteLine("base.runtextpos = runtextend;");
}
writer.WriteLine("return true;");
return true;
// other anchors like Beginning, there are potentially multiple places a BOL can match. So unlike
// the other anchors, which all skip all subsequent processing if found, with BOL we just use it
// to boost our position to the next line, and then continue normally with any searches.
- Debug.Assert(!rtl, "RightToLeft isn't implemented and should have been filtered out previously");
writer.WriteLine("// Beginning-of-line anchor");
using (EmitBlock(writer, "if (runtextpos > runtextbeg && runtextSpan[runtextpos - 1] != '\\n')"))
{
writer.WriteLine("}");
}
- // Emits a case-sensitive right-to-left prefix search for a string at the beginning of the pattern.
- void EmitIndexOf_RightToLeft(string prefix)
- {
- writer.WriteLine($"int i = global::System.MemoryExtensions.LastIndexOf(runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg), {Literal(prefix)});");
- writer.WriteLine("if (i >= 0)");
- writer.WriteLine("{");
- writer.WriteLine($" base.runtextpos = runtextbeg + i + {prefix.Length};");
- writer.WriteLine(" return true;");
- writer.WriteLine("}");
- }
-
- // Emits a right-to-left search for a set at a fixed position from the start of the pattern.
- // (Currently that position will always be a distance of 0, meaning the start of the pattern itself.)
- void EmitFixedSet_RightToLeft()
- {
- (char[]? Chars, string Set, int Distance, bool CaseInsensitive) set = code.FindOptimizations.FixedDistanceSets![0];
- Debug.Assert(set.Distance == 0);
-
- if (set.Chars is { Length: 1 } && !set.CaseInsensitive)
- {
- writer.WriteLine($"int i = global::System.MemoryExtensions.LastIndexOf(runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg), {Literal(set.Chars[0])});");
- writer.WriteLine("if (i >= 0)");
- writer.WriteLine("{");
- writer.WriteLine(" base.runtextpos = runtextbeg + i + 1;");
- writer.WriteLine(" return true;");
- writer.WriteLine("}");
- }
- else
- {
- using (EmitBlock(writer, "for (int i = runtextpos - 1; i >= runtextbeg; i--)"))
- {
- using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "runtextSpan[i]", set.Set, set.CaseInsensitive, additionalDeclarations)})"))
- {
- writer.WriteLine("base.runtextpos = i + 1;");
- writer.WriteLine("return true;");
- }
- }
- }
- }
-
// Emits a left-to-right search for a set at a fixed position from the start of the pattern,
// and potentially other sets at other fixed positions in the pattern.
void EmitFixedSet_LeftToRight()
bool needsCulture = rm.Code.FindOptimizations.FindMode switch
{
FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or
- FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive or
FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
- FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or
- FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive => true,
+ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive => true,
_ when rm.Code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
/// <summary>Emits the body of the Go override.</summary>
private static void EmitGo(IndentedTextWriter writer, RegexMethod rm, string id)
{
- Debug.Assert(rm.Code.Tree.Root.Type == RegexNode.Capture);
-
- if ((rm.Options & RegexOptions.NonBacktracking) != 0)
- {
- EmitNonBacktrackingGo(writer, rm, id);
- return;
- }
- RegexNode root = rm.Code.Tree.Root;
- if (!ExceedsMaxDepthForSimpleCodeGeneration(root) &&
- root.Child(0).SupportsSimplifiedCodeGenerationImplementation() &&
- (((RegexOptions)root.Options) & RegexOptions.RightToLeft) == 0)
- {
- EmitSimplifiedGo(writer, rm, id);
- return;
- }
-
- EmitCompleteGo(writer, rm, id);
-
- // Deep RegexNode trees used with the simplified code generator can result in
- // emitting C# code that exceeds C# compiler limitations, leading to "CS8078: An
- // expression is too long or complex to compile". Place an artificial limit on
- // max tree depth in order to mitigate such issues.
- static bool ExceedsMaxDepthForSimpleCodeGeneration(RegexNode node, int maxDepth = 30)
- {
- if (maxDepth <= 0)
- {
- return true;
- }
-
- int childCount = node.ChildCount();
- for (int i = 0; i < childCount; i++)
- {
- if (ExceedsMaxDepthForSimpleCodeGeneration(node.Child(i), maxDepth - 1))
- {
- return true;
- }
- }
-
- return false;
- }
- }
-
- /// <summary>Emits the body of a Go method supporting RegexOptions.NonBacktracking.</summary>
- private static void EmitNonBacktrackingGo(IndentedTextWriter writer, RegexMethod rm, string id)
- {
- // TODO: Implement this and remove SupportsCustomCodeGeneration.
- }
+ // In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
+ // version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via
+ // RegexWriter; the interpreter would then sit in a loop processing those opcodes, and the RegexCompiler iterated through the
+ // opcodes generating code for each equivalent to what the interpreter would do albeit with some decisions made at compile-time
+ // rather than at run-time. This approach, however, lead to complicated code that wasn't pay-for-play (e.g. a big backtracking
+ // jump table that all compilations went through even if there was no backtracking), that didn't factor in the shape of the
+ // tree (e.g. it's difficult to add optimizations based on interactions between nodes in the graph), and that didn't read well
+ // when decompiled from IL to C# or when directly emitted as C# as part of a source generator.
+ //
+ // This implementation is instead based on directly walking the RegexNode tree and outputting code for each node in the graph.
+ // A dedicated for each kind of RegexNode emits the code necessary to handle that node's processing, including recursively
+ // calling the relevant function for any of its children nodes. Backtracking is handled not via a giant jump table, but instead
+ // by emitting direct jumps to each backtracking construct. This is achieved by having all match failures jump to a "done"
+ // label that can be changed by a previous emitter, e.g. before EmitLoop returns, it ensures that "doneLabel" is set to the
+ // label that code should jump back to when backtracking. That way, a subsequent EmitXx function doesn't need to know exactly
+ // where to jump: it simply always jumps to "doneLabel" on match failure, and "doneLabel" is always configured to point to
+ // the right location. In an expression without backtracking, or before any backtracking constructs have been encountered,
+ // "doneLabel" is simply the final return location from the Go method that will undo any captures and exit, signaling to
+ // the calling scan loop that nothing was matched.
- /// <summary>Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression.</summary>
- private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id)
- {
// Arbitrary limit for unrolling vs creating a loop. We want to balance size in the generated
// code with other costs, like the (small) overhead of slicing to create the temp span to iterate.
const int MaxUnrollSize = 16;
RegexOptions options = (RegexOptions)rm.Options;
RegexCode code = rm.Code;
- bool rtl = code.RightToLeft;
bool hasTimeout = false;
// Helper to define names. Names start unadorned, but as soon as there's repetition,
writer.WriteLine("string runtext = base.runtext!;");
writer.WriteLine("int runtextpos = base.runtextpos;");
writer.WriteLine("int runtextend = base.runtextend;");
- writer.WriteLine($"int original_runtextpos = runtextpos;{AdditionalDeclarationsPlaceholder}"); // placeholder at the end of a line so the generated indents line up
- writer.WriteLine("int runstackpos = 0;");
+ writer.WriteLine($"int original_runtextpos = runtextpos;");
hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm);
+ writer.Write("int runstackpos = 0;");
+ writer.Flush();
+ int additionalDeclarationsPosition = ((StringWriter)writer.InnerWriter).GetStringBuilder().Length;
+ int additionalDeclarationsIndent = writer.Indent;
+ writer.WriteLine();
// TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo; // only if the whole expression or any subportion is ignoring case, and we're not using invariant
bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm);
}
// We're done. Patch up any additional declarations.
- ReplaceAdditionalDeclarations(additionalDeclarations, writer);
+ ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent);
return;
static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0;
// construct is responsible for unwinding back to its starting crawl position. If
// it eventually ends up failing, that failure will result in jumping to the next branch
// of the alternation, which will again dutifully unwind the remaining captures until
- // what they were at the start of the alternation.
+ // what they were at the start of the alternation. Of course, if there are no captures
+ // anywhere in the regex, we don't have to do any of that.
string? startingCrawlPos = null;
- if ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic)
+ if (expressionHasCaptures && ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic))
{
startingCrawlPos = ReserveName("alternation_starting_crawlpos");
additionalDeclarations.Add($"int {startingCrawlPos} = 0;");
{
EmitRunstackResizeIfNeeded(2);
writer.WriteLine($"{RunstackPush()} = {i};");
- writer.WriteLine($"{RunstackPush()} = {startingCrawlPos};");
+ if (startingCrawlPos is not null)
+ {
+ writer.WriteLine($"{RunstackPush()} = {startingCrawlPos};");
+ }
writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};");
}
labelMap[i] = doneLabel;
// "doneLabel" to the label for this section. Thus, we only need to emit it if
// something can backtrack to us, which can't happen if we're inside of an atomic
// node. Thus, emit the backtracking section only if we're non-atomic.
- if (!isAtomic)
+ if (isAtomic)
+ {
+ doneLabel = originalDoneLabel;
+ }
+ else
{
doneLabel = backtrackLabel;
MarkLabel(backtrackLabel, emitSemicolon: false);
writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};");
- writer.WriteLine($"{startingCrawlPos} = {RunstackPop()};");
+ if (startingCrawlPos is not null)
+ {
+ writer.WriteLine($"{startingCrawlPos} = {RunstackPop()};");
+ }
using (EmitBlock(writer, $"switch ({RunstackPop()})"))
{
for (int i = 0; i < labelMap.Length; i++)
// Emits the code for an if(backreference)-then-else conditional.
void EmitBackreferenceConditional(RegexNode node)
{
+ bool isAtomic = node.IsAtomicByParent();
+
// We're branching in a complicated fashion. Make sure textSpanPos is 0.
TransferTextSpanPosToRunTextPos();
}
}
- // If either the yes branch or the no branch contained backtracking, subsequent expressions
- // might try to backtrack to here, so output a backtracking map based on resumeAt.
- if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)
+ if (isAtomic)
{
- // Skip the backtracking section.
- writer.WriteLine($"goto {endRef};");
- writer.WriteLine();
+ doneLabel = originalDoneLabel;
+ }
+ else
+ {
+ // If either the yes branch or the no branch contained backtracking, subsequent expressions
+ // might try to backtrack to here, so output a backtracking map based on resumeAt.
+ if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)
+ {
+ // Skip the backtracking section.
+ writer.WriteLine($"goto {endRef};");
+ writer.WriteLine();
- string backtrack = ReserveName("ConditionalBackreferenceBacktrack");
- doneLabel = backtrack;
- MarkLabel(backtrack);
+ string backtrack = ReserveName("ConditionalBackreferenceBacktrack");
+ doneLabel = backtrack;
+ MarkLabel(backtrack);
- writer.WriteLine($"{resumeAt} = {RunstackPop()};");
+ writer.WriteLine($"{resumeAt} = {RunstackPop()};");
- using (EmitBlock(writer, $"switch ({resumeAt})"))
- {
- if (postIfDoneLabel != originalDoneLabel)
+ using (EmitBlock(writer, $"switch ({resumeAt})"))
{
- writer.WriteLine($"case 0: goto {postIfDoneLabel};");
- }
+ if (postIfDoneLabel != originalDoneLabel)
+ {
+ writer.WriteLine($"case 0: goto {postIfDoneLabel};");
+ }
- if (postElseDoneLabel != originalDoneLabel)
- {
- writer.WriteLine($"case 1: goto {postElseDoneLabel};");
- }
+ if (postElseDoneLabel != originalDoneLabel)
+ {
+ writer.WriteLine($"case 1: goto {postElseDoneLabel};");
+ }
- writer.WriteLine($"default: goto {originalDoneLabel};");
+ writer.WriteLine($"default: goto {originalDoneLabel};");
+ }
}
}
if (postIfDoneLabel != originalDoneLabel || hasNo)
{
MarkLabel(endRef);
- if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)
+ if (!isAtomic)
{
- EmitRunstackResizeIfNeeded(1);
- writer.WriteLine($"{RunstackPush()} = {resumeAt};");
+ if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)
+ {
+ EmitRunstackResizeIfNeeded(1);
+ writer.WriteLine($"{RunstackPush()} = {resumeAt};");
+ }
}
}
}
// Emits the code for an if(expression)-then-else conditional.
void EmitExpressionConditional(RegexNode node)
{
+ bool isAtomic = node.IsAtomicByParent();
+
// We're branching in a complicated fashion. Make sure textSpanPos is 0.
TransferTextSpanPosToRunTextPos();
string postConditionalDoneLabel = doneLabel;
string resumeAt = ReserveName("conditionalexpression_resumeAt");
- additionalDeclarations.Add($"int {resumeAt} = 0;");
+ if (!isAtomic)
+ {
+ additionalDeclarations.Add($"int {resumeAt} = 0;");
+ }
// If we get to this point of the code, the conditional successfully matched, so run the "yes" branch.
// Since the "yes" branch may have a different execution path than the "no" branch or the lack of
EmitNode(yesBranch);
TransferTextSpanPosToRunTextPos(); // ensure all subsequent code sees the same textSpanPos value by setting it to 0
string postYesDoneLabel = doneLabel;
- if (postYesDoneLabel != originalDoneLabel)
+ if (!isAtomic && postYesDoneLabel != originalDoneLabel)
{
writer.WriteLine($"{resumeAt} = 0;");
}
EmitNode(noBranch);
TransferTextSpanPosToRunTextPos(); // ensure all subsequent code sees the same textSpanPos value by setting it to 0
postNoDoneLabel = doneLabel;
- if (postNoDoneLabel != originalDoneLabel)
+ if (!isAtomic && postNoDoneLabel != originalDoneLabel)
{
writer.WriteLine($"{resumeAt} = 1;");
}
// There's only a yes branch. If it's going to cause us to output a backtracking
// label but code may not end up taking the yes branch path, we need to emit a resumeAt
// that will cause the backtracking to immediately pass through this node.
- if (postYesDoneLabel != originalDoneLabel)
+ if (!isAtomic && postYesDoneLabel != originalDoneLabel)
{
writer.WriteLine($"{resumeAt} = 2;");
}
}
- if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel)
+ if (isAtomic)
{
- // Skip the backtracking section.
- writer.WriteLine($"goto {end};");
- writer.WriteLine();
+ doneLabel = originalDoneLabel;
+ }
+ else
+ {
+ if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel)
+ {
+ // Skip the backtracking section.
+ writer.WriteLine($"goto {end};");
+ writer.WriteLine();
- string backtrack = ReserveName("ConditionalExpressionBacktrack");
- doneLabel = backtrack;
- MarkLabel(backtrack);
+ string backtrack = ReserveName("ConditionalExpressionBacktrack");
+ doneLabel = backtrack;
+ MarkLabel(backtrack);
- using (EmitBlock(writer, $"switch ({RunstackPop()})"))
- {
- if (postYesDoneLabel != postConditionalDoneLabel)
+ using (EmitBlock(writer, $"switch ({RunstackPop()})"))
{
- writer.WriteLine($"case 0: goto {postYesDoneLabel};");
- }
+ if (postYesDoneLabel != postConditionalDoneLabel)
+ {
+ writer.WriteLine($"case 0: goto {postYesDoneLabel};");
+ }
- if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel)
- {
- writer.WriteLine($"case 1: goto {postNoDoneLabel};");
- }
+ if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel)
+ {
+ writer.WriteLine($"case 1: goto {postNoDoneLabel};");
+ }
- writer.WriteLine($"default: goto {postConditionalDoneLabel};");
+ writer.WriteLine($"default: goto {postConditionalDoneLabel};");
+ }
}
- }
- if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel)
- {
- EmitRunstackResizeIfNeeded(1);
- writer.WriteLine($"{RunstackPush()} = {resumeAt};");
+ if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel)
+ {
+ EmitRunstackResizeIfNeeded(1);
+ writer.WriteLine($"{RunstackPush()} = {resumeAt};");
+ }
}
MarkLabel(end);
Debug.Assert(node.Type == RegexNode.Capture);
int capnum = RegexParser.MapCaptureNumber(node.M, rm.Code.Caps);
int uncapnum = RegexParser.MapCaptureNumber(node.N, rm.Code.Caps);
+ bool isAtomic = node.IsAtomicByParent();
TransferTextSpanPosToRunTextPos();
string startingRunTextPos = ReserveName("capture_starting_runtextpos");
writer.WriteLine($"base.TransferCapture({capnum}, {uncapnum}, {startingRunTextPos}, runtextpos);");
}
- if (childBacktracks || node.IsInLoop())
+ if (!isAtomic && (childBacktracks || node.IsInLoop()))
{
writer.WriteLine();
doneLabel = backtrack;
MarkLabel(end);
}
+ else
+ {
+ doneLabel = originalDoneLabel;
+ }
}
// Emits code to unwind the capture stack until the crawl position specified in the provided local.
writer.WriteLine("base.runtextpos = runtextpos;");
}
+ // Emits code for a concatenation
void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired)
{
+ // Emit the code for each child one after the other.
int childCount = node.ChildCount();
for (int i = 0; i < childCount; i++)
{
+ // If we can find a subsequence of fixed-length children, we can emit a length check once for that sequence
+ // and then skip the individual length checks for each. We also want to minimize the repetition of if blocks,
+ // and so we try to emit a series of clauses all part of the same if block rather than one if block per child.
if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd))
{
bool wroteClauses = true;
if (child.Type is RegexNode.One or RegexNode.Notone or RegexNode.Set)
{
WriteSingleCharChild(child);
- writer.Write($" /* {DescribeNode(child)} */");
}
else if (child.Type is RegexNode.Oneloop or RegexNode.Onelazy or RegexNode.Oneloopatomic or
RegexNode.Setloop or RegexNode.Setlazy or RegexNode.Setloopatomic or
for (int c = 0; c < child.M; c++)
{
WriteSingleCharChild(child);
- if (c == 0)
- {
- writer.Write($" /* {DescribeNode(child)} */");
- }
}
}
else
}
i--;
+ continue;
}
- else
- {
- EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: emitLengthChecksIfRequired);
- }
+
+ EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: emitLengthChecksIfRequired);
}
}
int minIterations = node.M;
int maxIterations = node.N;
string originalDoneLabel = doneLabel;
+ bool isAtomic = node.IsAtomicByParent();
// If this is actually an atomic lazy loop, we need to output just the minimum number of iterations,
// as nothing will backtrack into the lazy loop to get it progress further.
- if (node.IsAtomicByParent())
+ if (isAtomic)
{
switch (minIterations)
{
MarkLabel(endLoop);
- // Store the capture's state and skip the backtracking section
- EmitRunstackResizeIfNeeded(3);
- writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};");
- writer.WriteLine($"{RunstackPush()} = {iterationCount};");
- writer.WriteLine($"{RunstackPush()} = {sawEmpty};");
- string skipBacktrack = ReserveName("SkipBacktrack");
- writer.WriteLine($"goto {skipBacktrack};");
- writer.WriteLine();
+ if (!isAtomic)
+ {
+ // Store the capture's state and skip the backtracking section
+ EmitRunstackResizeIfNeeded(3);
+ writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};");
+ writer.WriteLine($"{RunstackPush()} = {iterationCount};");
+ writer.WriteLine($"{RunstackPush()} = {sawEmpty};");
+ string skipBacktrack = ReserveName("SkipBacktrack");
+ writer.WriteLine($"goto {skipBacktrack};");
+ writer.WriteLine();
- // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
- string backtrack = ReserveName($"LazyLoopBacktrack");
- MarkLabel(backtrack);
+ // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
+ string backtrack = ReserveName($"LazyLoopBacktrack");
+ MarkLabel(backtrack);
- writer.WriteLine($"{sawEmpty} = {RunstackPop()};");
- writer.WriteLine($"{iterationCount} = {RunstackPop()};");
- writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};");
+ writer.WriteLine($"{sawEmpty} = {RunstackPop()};");
+ writer.WriteLine($"{iterationCount} = {RunstackPop()};");
+ writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};");
- if (maxIterations == int.MaxValue)
- {
- using (EmitBlock(writer, $"if ({sawEmpty} == 0)"))
+ if (maxIterations == int.MaxValue)
{
- writer.WriteLine($"goto {body};");
+ using (EmitBlock(writer, $"if ({sawEmpty} == 0)"))
+ {
+ writer.WriteLine($"goto {body};");
+ }
}
- }
- else
- {
- using (EmitBlock(writer, $"if ({iterationCount} < {maxIterations} && {sawEmpty} == 0)"))
+ else
{
- writer.WriteLine($"goto {body};");
+ using (EmitBlock(writer, $"if ({iterationCount} < {maxIterations} && {sawEmpty} == 0)"))
+ {
+ writer.WriteLine($"goto {body};");
+ }
}
- }
- writer.WriteLine($"goto {doneLabel};");
- writer.WriteLine();
+ writer.WriteLine($"goto {doneLabel};");
+ writer.WriteLine();
- doneLabel = backtrack;
- MarkLabel(skipBacktrack);
+ doneLabel = backtrack;
+ MarkLabel(skipBacktrack);
+ }
}
// Emits the code to handle a loop (repeater) with a fixed number of iterations.
Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}");
int minIterations = node.M;
int maxIterations = node.N;
+ bool isAtomic = node.IsAtomicByParent();
// We might loop any number of times. In order to ensure this loop and subsequent code sees textSpanPos
// the same regardless, we always need it to contain the same value, and the easiest such value is 0.
}
}
- if (childBacktracks)
+ if (isAtomic)
{
- writer.WriteLine($"goto {endLoop};");
- writer.WriteLine();
-
- string backtrack = ReserveName("LoopBacktrack");
- MarkLabel(backtrack);
- using (EmitBlock(writer, $"if ({iterationCount} == 0)"))
- {
- writer.WriteLine($"goto {originalDoneLabel};");
- }
- writer.WriteLine($"goto {doneLabel};");
- doneLabel = backtrack;
+ doneLabel = originalDoneLabel;
+ MarkLabel(endLoop);
}
-
- MarkLabel(endLoop);
-
-
-
- if (node.IsInLoop())
- {
- writer.WriteLine();
-
- // Store the capture's state
- EmitRunstackResizeIfNeeded(3);
- writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};");
- writer.WriteLine($"{RunstackPush()} = {iterationCount};");
-
- // Skip past the backtracking section
- string end = ReserveName("SkipBacktrack");
- writer.WriteLine($"goto {end};");
- writer.WriteLine();
-
- // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
- string backtrack = ReserveName("LoopBacktrack");
- MarkLabel(backtrack);
- writer.WriteLine($"{iterationCount} = {RunstackPop()};");
- writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};");
-
- writer.WriteLine($"goto {doneLabel};");
- writer.WriteLine();
-
- doneLabel = backtrack;
- MarkLabel(end);
- }
- }
-
- void EmitRunstackResizeIfNeeded(int count)
- {
- string subCount = count > 1 ? $" - {count - 1}" : "";
- using (EmitBlock(writer, $"if (runstackpos >= base.runstack!.Length{subCount})"))
- {
- writer.WriteLine("global::System.Array.Resize(ref base.runstack, base.runstack.Length * 2);");
- }
- }
-
- string RunstackPush() => "base.runstack[runstackpos++]";
- string RunstackPop() => "base.runstack![--runstackpos]";
- }
-
- /// <summary>Emits the body of a complete Go implementation that fully supports backtracking.</summary>
- private static void EmitCompleteGo(IndentedTextWriter writer, RegexMethod rm, string id)
- {
- const int Stackpop = 0; // pop one
- const int Stackpop2 = 1; // pop two
- const int Capback = 3; // uncapture
- const int Capback2 = 4; // uncapture 2
- const int Branchmarkback2 = 5; // back2 part of branchmark
- const int Lazybranchmarkback2 = 6; // back2 part of lazybranchmark
- const int Branchcountback2 = 7; // back2 part of branchcount
- const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount
- const int Forejumpback = 9; // back part of forejump
- const int Uniquecount = 10;
- const string Backtrack = "Backtrack"; // label for backtracking
-
- int[] codes = rm.Code.Codes;
- RegexOptions options = rm.Options;
-
- int labelCounter = 0;
- string DefineLabel(string prefix = "L") => $"{prefix}{labelCounter++}";
- void MarkLabel(string label) => writer.WriteLine($"{label}:");
-
- var labels = new string?[codes.Length]; // a label for every operation in _codes
- BacktrackNote[]? notes = null; // a list of the backtracking states to be generated
- int noteCount = 0; // true count of _notes (allocation grows exponentially)
-
- int currentOpcode = 0; // the current opcode being processed
- int currentCodePos = 0; // the current code being translated
- int currentBacktrackNote = 0; // the current backtrack-note being translated
-
- // special code fragments
- var uniqueNote = new int[Uniquecount]; // notes indices for code that should be emitted <= once
- var forwardJumpsThroughSwitch = new int[codes.Length]; // indices for forward-jumps-through-switch (for allocations)
-
- // Generates the forward logic corresponding directly to the regex codes.
- // In the absence of backtracking, this is all we would need.
- writer.WriteLine("string runtext = base.runtext!;");
- writer.WriteLine("int runtextbeg = base.runtextbeg;");
- writer.WriteLine("int runtextend = base.runtextend;");
- writer.WriteLine("int runtextpos = base.runtextpos;");
- writer.WriteLine("int[] runtrack = base.runtrack!;");
- writer.WriteLine("int runtrackpos = base.runtrackpos;");
- writer.WriteLine("int[] runstack = base.runstack!;");
- writer.WriteLine("int runstackpos = base.runstackpos;");
- writer.WriteLine("int tmp1, tmp2, ch;");
- bool hasTimeout = EmitLoopTimeoutCounterIfNeeded(writer, rm);
- bool hasTextInfo = EmitInitializeCultureForGoIfNecessary(writer, rm);
- writer.WriteLine();
-
- uniqueNote.AsSpan().Fill(-1);
- for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos]))
- {
- forwardJumpsThroughSwitch[codepos] = -1;
- labels[codepos] = DefineLabel();
- }
-
- currentBacktrackNote = -1;
- for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos]))
- {
- currentCodePos = codepos;
- currentOpcode = codes[codepos];
- EmitOneCode(labels[codepos]);
- writer.WriteLine();
- }
-
- // Generate the backtracking switch jump table that allows us to simulate a stack of addresses,
- // and contains the calls that expand the tracking and the grouping stack when they get too full.
- MarkLabel(Backtrack);
-
- // (Equivalent of EnsureStorage, but written to avoid unnecessary local spilling.)
- writer.WriteLine("int limit = base.runtrackcount * 4;");
- using (EmitBlock(writer, "if (runstackpos < limit)"))
- {
- writer.WriteLine("base.runstackpos = runstackpos;");
- writer.WriteLine("base.DoubleStack(); // might change runstackpos and runstack");
- writer.WriteLine("runstackpos = base.runstackpos;");
- writer.WriteLine("runstack = base.runstack!;");
- }
- using (EmitBlock(writer, "if (runtrackpos < limit)"))
- {
- writer.WriteLine("base.runtrackpos = runtrackpos;");
- writer.WriteLine("base.DoubleTrack(); // might change runtrackpos and runtrack");
- writer.WriteLine("runtrackpos = base.runtrackpos;");
- writer.WriteLine("runtrack = base.runtrack!;");
- }
- writer.WriteLine();
- using (EmitBlock(writer, "switch (runtrack[runtrackpos++])"))
- {
- for (int i = 0; i < noteCount; i++)
+ else
{
- using (EmitBlock(writer, $"case {i}:"))
+ if (childBacktracks)
{
- Debug.Assert(notes is not null);
- BacktrackNote n = notes[i];
- if (n.flags != 0)
- {
- currentCodePos = n.codepos;
- currentBacktrackNote = i;
- currentOpcode = codes[n.codepos] | n.flags;
- EmitOneCode(null); // should always end in a goto
- }
- else
- {
- writer.WriteLine($"goto {n.label};");
- }
- }
-
- writer.WriteLine();
- }
-
- using (EmitBlock(writer, "default:"))
- {
- writer.WriteLine("global::System.Diagnostics.Debug.Fail($\"Unexpected backtracking state {runtrack[runtrackpos - 1]}\");");
- writer.WriteLine("break;");
- }
- }
-
- return;
-
- /// <summary>
- /// The main translation function. It translates the logic for a single opcode at
- /// the current position. The structure of this function exactly mirrors
- /// the structure of the inner loop of RegexInterpreter.Go().
- /// </summary>
- /// <remarks>
- /// Note that since we're generating code, we can collapse many cases that are
- /// dealt with one-at-a-time in RegexIntepreter. We can also unroll loops that
- /// iterate over constant strings or sets.
- /// </remarks>
- void EmitOneCode(string? label)
- {
- writer.WriteLine($"// {SymbolDisplay.FormatLiteral(RegexCode.OpcodeDescription(currentCodePos, rm.Code.Codes, rm.Code.Strings), quote: false)}");
-
- if (label is not null)
- {
- MarkLabel(label);
- }
-
- // Before executing any Regex code in the unrolled loop,
- // we try checking for the match timeout:
- EmitTimeoutCheck(writer, hasTimeout);
-
- // Now generate the code for the Regex code saved in _regexopcode.
- switch (currentOpcode)
- {
- case RegexCode.Stop:
- writer.WriteLine("base.runtextpos = runtextpos;");
- writer.WriteLine("return;");
- break;
-
- case RegexCode.Nothing:
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.UpdateBumpalong:
- // UpdateBumpalong should only exist in the code stream at such a point where the root
- // of the backtracking stack contains the runtextpos from the start of this Go call. Replace
- // that tracking value with the current runtextpos value.
- writer.WriteLine("runtrack[^1] = runtextpos;");
- break;
-
- case RegexCode.Goto:
- Goto(Operand(0));
- break;
-
- case RegexCode.Testref:
- using (EmitBlock(writer, $"if (!base.IsMatched({Operand(0)}))"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Lazybranch:
- PushTrack("runtextpos");
- Track();
- break;
-
- case RegexCode.Lazybranch | RegexCode.Back:
- writer.WriteLine($"runtextpos = {PopTrack()};");
- Goto(Operand(0));
- break;
-
- case RegexCode.Nullmark:
- PushStack(-1);
- TrackUnique(Stackpop);
- break;
-
- case RegexCode.Setmark:
- PushStack("runtextpos");
- TrackUnique(Stackpop);
- break;
-
- case RegexCode.Nullmark | RegexCode.Back:
- case RegexCode.Setmark | RegexCode.Back:
- PopDiscardStack();
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Getmark:
- writer.WriteLine($"runtextpos = {PopStack()};");
- PushTrack("runtextpos");
- Track();
- break;
-
- case RegexCode.Getmark | RegexCode.Back:
- PushStack(PopTrack());
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Capturemark:
- {
- if (Operand(1) != -1)
- {
- using (EmitBlock(writer, $"if (!base.IsMatched({Operand(1)}))"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- }
-
- const string Stacked = "tmp1";
- writer.WriteLine($"{Stacked} = {PopStack()};");
- writer.WriteLine(Operand(1) != -1 ?
- $"base.TransferCapture({Operand(0)}, {Operand(1)}, {Stacked}, runtextpos);" :
- $"base.Capture({Operand(0)}, {Stacked}, runtextpos);");
- PushTrack(Stacked);
- TrackUnique(Operand(0) != -1 && Operand(1) != -1 ? Capback2 : Capback);
- }
- break;
-
- case RegexCode.Capturemark | RegexCode.Back:
- PushStack(PopTrack());
- writer.WriteLine("base.Uncapture();");
- if (Operand(0) != -1 && Operand(1) != -1)
- {
- writer.WriteLine("base.Uncapture();");
- }
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Branchmark:
- {
- const string Mark = "tmp1";
- writer.WriteLine($"{Mark} = {PopStack()}; // mark");
- PushTrack(Mark);
- using (EmitBlock(writer, $"if (runtextpos != {Mark})"))
- {
- PushTrack("runtextpos");
- PushStack("runtextpos");
- Track();
- Goto(Operand(0));
- }
- using (EmitBlock(writer, "else"))
- {
- TrackUnique2(Branchmarkback2);
- }
- }
- break;
-
- case RegexCode.Branchmark | RegexCode.Back:
- writer.WriteLine($"runtextpos = {PopTrack()};");
- PopDiscardStack();
- TrackUnique2(Branchmarkback2); // track spot 0 is already in place
- Advance();
- break;
-
- case RegexCode.Branchmark | RegexCode.Back2:
- PushStack(PopTrack());
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Lazybranchmark:
- {
- const string Mark = "tmp1";
- writer.WriteLine($"{Mark} = {PopStack()}; // mark");
- PushTrack($"{Mark} != -1 ? {Mark} : runtextpos");
- using (EmitBlock(writer, $"if (runtextpos != {Mark})"))
- {
- PushTrack("runtextpos");
- Track();
- Advance();
- }
- PushStack(Mark);
- TrackUnique2(Lazybranchmarkback2);
- }
- break;
-
- case RegexCode.Lazybranchmark | RegexCode.Back:
- writer.WriteLine($"runtextpos = {PopTrack()};");
- PushStack("runtextpos");
- TrackUnique2(Lazybranchmarkback2);
- Goto(Operand(0));
- break;
-
- case RegexCode.Lazybranchmark | RegexCode.Back2:
- writer.WriteLine($"{ReadyReplaceStack(0)} = {PopTrack()};");
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Nullcount:
- PushStack(-1);
- PushStack(Operand(0));
- TrackUnique(Stackpop2);
- break;
-
- case RegexCode.Setcount:
- PushStack("runtextpos");
- PushStack(Operand(0));
- TrackUnique(Stackpop2);
- break;
-
- case RegexCode.Nullcount | RegexCode.Back:
- case RegexCode.Setcount | RegexCode.Back:
- PopDiscardStack(2);
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Branchcount:
- {
- const string Count = "tmp1";
- const string Mark = "tmp2";
- writer.WriteLine($"{Count} = {PopStack()}; // count");
- writer.WriteLine($"{Mark} = {PopStack()}; // mark");
- PushTrack(Mark);
- using (EmitBlock(writer, $"if ({Count} < ({Mark} == runtextpos ? 0 : {Operand(1)}))"))
- {
- PushStack("runtextpos");
- PushStack($"{Count} + 1");
- Track();
- Goto(Operand(0));
- }
- PushTrack(Count);
- TrackUnique2(Branchcountback2);
- }
- break;
-
- case RegexCode.Branchcount | RegexCode.Back:
- {
- const string Count = "tmp1";
- writer.WriteLine($"{Count} = {PopStack()} - 1; // count");
- using (EmitBlock(writer, $"if ({Count} >= 0)"))
- {
- writer.WriteLine($"runtextpos = {PopStack()};");
- PushTrack(Count);
- TrackUnique2(Branchcountback2);
- Advance();
- }
- writer.WriteLine($"{ReadyReplaceStack(0)} = {PopTrack()};");
- PushStack(Count);
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Branchcount | RegexCode.Back2:
- {
- const string Mark = "tmp1";
- writer.WriteLine($"{Mark} = {PopTrack()}; // mark");
- PushStack(PopTrack());
- PushStack(Mark);
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Lazybranchcount:
- {
- const string Count = "tmp1";
- writer.WriteLine($"{Count} = {PopStack()}; // count");
- PushTrack(PopStack()); // mark
- using (EmitBlock(writer, $"if ({Count} < 0)"))
- {
- PushStack("runtextpos");
- PushStack($"{Count} + 1");
- TrackUnique2(Lazybranchcountback2);
- Goto(Operand(0));
- }
- PushTrack(Count);
- PushTrack("runtextpos");
- Track();
- }
- break;
-
- case RegexCode.Lazybranchcount | RegexCode.Back:
- {
- const string C = "tmp1";
- writer.WriteLine($"runtextpos = {PopTrack()};");
- writer.WriteLine($"{C} = {PopTrack()}; // c");
- using (EmitBlock(writer, $"if ({C} < {Operand(1)} && runtextpos != {TopTrack()})"))
- {
- PushStack("runtextpos");
- PushStack($"{C} + 1");
- TrackUnique2(Lazybranchcountback2);
- Goto(Operand(0));
- }
- PushStack(PopTrack());
- PushStack(C);
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Lazybranchcount | RegexCode.Back2:
- writer.WriteLine($"{ReadyReplaceStack(1)} = {PopTrack()};");
- writer.WriteLine($"{ReadyReplaceStack(0)} = {TopStack()} - 1;");
- ReadyReplaceStack(0);
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Setjump:
- PushStack("runtrack.Length - runtrackpos");
- PushStack("base.Crawlpos()");
- TrackUnique(Stackpop2);
- break;
-
- case RegexCode.Setjump | RegexCode.Back:
- PopDiscardStack(2);
- writer.WriteLine($"goto {Backtrack};");
- break;
-
- case RegexCode.Backjump:
- {
- const string Stacked = "tmp1";
- writer.WriteLine($"{Stacked} = {PopStack()}; // stacked");
- writer.WriteLine($"runtrackpos = runtrack.Length - {PopStack()};");
- writer.WriteLine($"while (base.Crawlpos() != {Stacked}) base.Uncapture();");
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Forejump:
- {
- const string Stacked = "tmp1";
- writer.WriteLine($"{Stacked} = {PopStack()}; // stacked");
- writer.WriteLine($"runtrackpos = runtrack.Length - {PopStack()};");
- PushTrack(Stacked);
- TrackUnique(Forejumpback);
- }
- break;
-
- case RegexCode.Forejump | RegexCode.Back:
- {
- const string TrackedCrawlpos = "tmp1";
- writer.WriteLine($"{TrackedCrawlpos} = {PopTrack()}; // tracked crawlpos");
- writer.WriteLine($"while (base.Crawlpos() != {TrackedCrawlpos}) base.Uncapture();");
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Bol:
- using (EmitBlock(writer, $"if (runtextpos <= runtextbeg)"))
- {
- writer.WriteLine($"goto {labels[NextCodepos()]};");
- }
- using (EmitBlock(writer, $"if ({Leftchar()} != '\\n')"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Eol:
- using (EmitBlock(writer, $"if (runtextpos >= runtextend)"))
- {
- writer.WriteLine($"goto {labels[NextCodepos()]};");
- }
- using (EmitBlock(writer, $"if ({Rightchar()} != '\\n')"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Boundary:
- case RegexCode.NonBoundary:
- using (EmitBlock(writer, $"if ({(Code() == RegexCode.Boundary ? "!" : "")}base.IsBoundary(runtextpos, runtextbeg, runtextend))"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.ECMABoundary:
- case RegexCode.NonECMABoundary:
- using (EmitBlock(writer, $"if ({(Code() == RegexCode.ECMABoundary ? "!" : "")}base.IsECMABoundary(runtextpos, runtextbeg, runtextend))"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Beginning:
- using (EmitBlock(writer, $"if (runtextpos > runtextbeg)"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.Start:
- using (EmitBlock(writer, $"if (runtextpos != runtextstart)"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.EndZ:
- using (EmitBlock(writer, $"if (runtextpos < runtextend - 1)"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- using (EmitBlock(writer, $"if (runtextpos >= runtextend)"))
- {
- writer.WriteLine($"goto {labels[NextCodepos()]};");
- }
- using (EmitBlock(writer, $"if ({Rightchar()} != '\\n')"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.End:
- using (EmitBlock(writer, $"if (runtextpos < runtextend)"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- break;
-
- case RegexCode.One:
- case RegexCode.Notone:
- case RegexCode.Set:
- case RegexCode.One | RegexCode.Rtl:
- case RegexCode.Notone | RegexCode.Rtl:
- case RegexCode.Set | RegexCode.Rtl:
- case RegexCode.One | RegexCode.Ci:
- case RegexCode.Notone | RegexCode.Ci:
- case RegexCode.Set | RegexCode.Ci:
- case RegexCode.One | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notone | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Set | RegexCode.Ci | RegexCode.Rtl:
- {
- string clause;
- string expr;
- if (!IsRightToLeft())
- {
- clause = $"runtextpos >= runtextend || ";
- expr = Rightcharnext();
- }
- else
- {
- clause = $"runtextpos <= runtextbeg || ";
- expr = Leftcharnext();
- }
-
- clause += Code() == RegexCode.Set ?
- $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}" :
- $"{ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive())} {(Code() == RegexCode.One ? "!=" : "==")} {Operand(0)}";
-
- using (EmitBlock(writer, $"if ({clause})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- }
- break;
-
- case RegexCode.Multi:
- case RegexCode.Multi | RegexCode.Ci:
- {
- string str = rm.Code.Strings[Operand(0)];
- Debug.Assert(str.Length != 0);
- writer.WriteLine($"if (runtextend - runtextpos < {str.Length} ||");
- for (int i = 0; i < str.Length; i++)
- {
- writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos{(i == 0 ? "" : $" + {i}")}]", IsCaseInsensitive())} != {Literal(str[i])}");
- writer.WriteLine(i < str.Length - 1 ? " ||" : ")");
- }
- using (EmitBlock(writer, null))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- EmitAdd(writer, "runtextpos", str.Length);
- break;
- }
-
- case RegexCode.Multi | RegexCode.Rtl:
- case RegexCode.Multi | RegexCode.Ci | RegexCode.Rtl:
- {
- string str = rm.Code.Strings[Operand(0)];
- Debug.Assert(str.Length != 0);
- writer.WriteLine($"if (runtextpos - runtextbeg < {str.Length} ||");
- for (int i = str.Length; i > 0;)
- {
- i--;
- writer.Write($" {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {str.Length - i}]", IsCaseInsensitive())} != {Literal(str[i])}");
- writer.WriteLine(i == 0 ? ")" : " ||");
- }
- using (EmitBlock(writer, null))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- writer.WriteLine($"runtextpos -= {str.Length};");
- break;
- }
-
- case RegexCode.Ref:
- case RegexCode.Ref | RegexCode.Ci:
- case RegexCode.Ref | RegexCode.Rtl:
- case RegexCode.Ref | RegexCode.Ci | RegexCode.Rtl:
- {
- const string Length = "tmp1";
- const string Index = "tmp2";
-
- using (EmitBlock(writer, $"if (!base.IsMatched({Operand(0)}))"))
- {
- writer.WriteLine($"goto {((options & RegexOptions.ECMAScript) != 0 ? AdvanceLabel() : Backtrack)};");
- }
-
- writer.WriteLine($"{Length} = base.MatchLength({Operand(0)}); // length");
-
- using (EmitBlock(writer, !IsRightToLeft() ? $"if (runtextend - runtextpos < {Length})" : $"if (runtextpos - runtextbeg < {Length})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
-
- if (!IsRightToLeft())
- {
- writer.WriteLine($"{Index} = base.MatchIndex({Operand(0)}) + {Length}; // index");
- writer.WriteLine($"runtextpos += {Length};");
- }
- else
- {
- writer.WriteLine($"{Index} = base.MatchIndex({Operand(0)}); // index");
- writer.WriteLine($"runtextpos -= {Length};");
- }
-
- using (EmitBlock(writer, "while (true)"))
- {
- using (EmitBlock(writer, $"if ({Length} <= 0)"))
- {
- writer.WriteLine($"goto {AdvanceLabel()};");
- }
-
- using (EmitBlock(writer, !IsRightToLeft() ?
- $"if ({ToLowerIfNeeded(hasTextInfo, options, $"runtext[{Index} - {Length}]", IsCaseInsensitive())} != {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {Length}--]", IsCaseInsensitive())})" :
- $"if ({ToLowerIfNeeded(hasTextInfo, options, $"runtext[{Index} + --{Length}]", IsCaseInsensitive())} != {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos + {Length}]", IsCaseInsensitive())})"))
- {
- writer.WriteLine($"break;");
- }
- }
-
- writer.WriteLine($"goto {Backtrack};");
- break;
- }
-
- case RegexCode.Onerep:
- case RegexCode.Notonerep:
- case RegexCode.Setrep:
- case RegexCode.Onerep | RegexCode.Ci:
- case RegexCode.Notonerep | RegexCode.Ci:
- case RegexCode.Setrep | RegexCode.Ci:
- {
- int c = Operand(1);
- if (c != 0)
- {
- using (EmitBlock(writer, $"if (runtextend - runtextpos < {c})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
-
- using (EmitBlock(writer, $"for (int i = 0; i < {c}; i++)"))
- {
- string expr = "runtext[runtextpos + i]";
- if (Code() == RegexCode.Setrep)
- {
- EmitTimeoutCheck(writer, hasTimeout);
- expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}";
- }
- else
- {
- expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
- expr = $"{expr} {(Code() == RegexCode.Onerep ? "!=" : "==")} {Literal((char)Operand(0))}";
- }
-
- using (EmitBlock(writer, $"if ({expr})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- }
- EmitAdd(writer, "runtextpos", c);
- }
- }
- break;
-
- case RegexCode.Onerep | RegexCode.Rtl:
- case RegexCode.Notonerep | RegexCode.Rtl:
- case RegexCode.Setrep | RegexCode.Rtl:
- case RegexCode.Onerep | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notonerep | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setrep | RegexCode.Ci | RegexCode.Rtl:
- {
- int c = Operand(1);
- if (c != 0)
- {
- const string Length = "tmp1";
-
- using (EmitBlock(writer, $"if (runtextpos - runtextbeg < {c})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- writer.WriteLine($"runtextpos -= {c};");
- writer.WriteLine($"{Length} = {c}; // length");
-
- string l1 = DefineLabel();
- MarkLabel(l1);
-
- string expr = $"runtext[runtextpos + --{Length}]";
- if (Code() == RegexCode.Setrep)
- {
- EmitTimeoutCheck(writer, hasTimeout);
- using (EmitBlock(writer, $"if (!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- }
- else
- {
- expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
- string op = Code() == RegexCode.Onerep ? "!=" : "==";
- using (EmitBlock(writer, $"if ({expr} {op} {Literal((char)Operand(0))})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
- }
-
- using (EmitBlock(writer, $"if ({Length} > 0)"))
- {
- writer.WriteLine($"goto {l1};");
- }
- }
- break;
- }
-
- case RegexCode.Oneloop:
- case RegexCode.Notoneloop:
- case RegexCode.Setloop:
- case RegexCode.Oneloop | RegexCode.Rtl:
- case RegexCode.Notoneloop | RegexCode.Rtl:
- case RegexCode.Setloop | RegexCode.Rtl:
- case RegexCode.Oneloop | RegexCode.Ci:
- case RegexCode.Notoneloop | RegexCode.Ci:
- case RegexCode.Setloop | RegexCode.Ci:
- case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Oneloopatomic:
- case RegexCode.Notoneloopatomic:
- case RegexCode.Setloopatomic:
- case RegexCode.Oneloopatomic | RegexCode.Rtl:
- case RegexCode.Notoneloopatomic | RegexCode.Rtl:
- case RegexCode.Setloopatomic | RegexCode.Rtl:
- case RegexCode.Oneloopatomic | RegexCode.Ci:
- case RegexCode.Notoneloopatomic | RegexCode.Ci:
- case RegexCode.Setloopatomic | RegexCode.Ci:
- case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl:
- {
- int c = Operand(1);
- if (c != 0)
- {
- const string Len = "tmp1";
- const string I = "tmp2";
-
- if (c == int.MaxValue)
- {
- writer.WriteLine(!IsRightToLeft() ?
- $"{Len} = runtextend - runtextpos; // length" :
- $"{Len} = runtextpos - runtextbeg; // length");
- }
- else
- {
- writer.WriteLine(!IsRightToLeft() ?
- $"{Len} = global::System.Math.Min(runtextend - runtextpos, {c}); // length" :
- $"{Len} = global::System.Math.Min(runtextpos - runtextbeg, {c}); // length");
- }
-
- string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? rm.Code.Strings[Operand(0)] : null;
- Span<char> setChars = stackalloc char[5]; // max optimized by IndexOfAny today
- int numSetChars;
-
- // If this is a notoneloop{atomic} and we're left-to-right and case-sensitive,
- // we can use the vectorized IndexOf to search for the target character.
- if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) &&
- !IsRightToLeft() &&
- !IsCaseInsensitive())
- {
- writer.WriteLine($"{I} = global::System.MemoryExtensions.IndexOf(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal((char)Operand(0))}); // i");
- using (EmitBlock(writer, $"if ({I} == -1)"))
- {
- writer.WriteLine($"runtextpos += {Len};");
- writer.WriteLine($"{I} = 0;");
- }
- using (EmitBlock(writer, "else"))
- {
- writer.WriteLine($"runtextpos += {I};");
- writer.WriteLine($"{I} = {Len} - {I};");
- }
- }
- else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) &&
- !IsRightToLeft() &&
- !IsCaseInsensitive() &&
- (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) != 0 &&
- RegexCharClass.IsNegated(set!))
- {
- // Similarly, if this is a setloop{atomic} and we're left-to-right and case-sensitive,
- // and if the set contains only a few negated chars, we can use the vectorized IndexOfAny
- // to search for those chars.
- Debug.Assert(numSetChars > 1);
- writer.WriteLine(numSetChars switch
- {
- 2 => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}); // i",
- 3 => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])}); // i",
- _ => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars.Slice(0, numSetChars).ToString())}); // i",
- });
- using (EmitBlock(writer, $"if ({I} == -1)"))
- {
- writer.WriteLine($"runtextpos += {Len};");
- writer.WriteLine($"{I} = 0;");
- }
- using (EmitBlock(writer, "else"))
- {
- writer.WriteLine($"runtextpos += {I};");
- writer.WriteLine($"{I} = {Len} - {I};");
- }
- }
- else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) &&
- !IsRightToLeft() &&
- set == RegexCharClass.AnyClass)
- {
- // If someone uses .* along with RegexOptions.Singleline, that becomes [anycharacter]*, which means it'll
- // consume everything. As such, we can simply update our position to be the last allowed, without
- // actually checking anything.
- writer.WriteLine($"runtextpos += {Len};");
- writer.WriteLine($"{I} = 0;");
- }
- else
- {
- // Otherwise, we emit the open-coded loop.
- writer.WriteLine($"{I} = {Len} + 1;");
- using (EmitBlock(writer, $"while (--{I} > {0})"))
- {
- string expr = !IsRightToLeft() ?
- Rightcharnext() :
- Leftcharnext();
-
- if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
- {
- EmitTimeoutCheck(writer, hasTimeout);
- expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}";
- }
- else
- {
- string op = Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic ? "!=" : "==";
- expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
- expr = $"{expr} {op} {Literal((char)Operand(0))}";
- }
-
- using (EmitBlock(writer, $"if ({expr})"))
- {
- writer.WriteLine(!IsRightToLeft() ?
- "runtextpos--;" :
- "runtextpos++;");
- writer.WriteLine("break;");
- }
- }
- }
-
- if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic)
- {
- using (EmitBlock(writer, $"if ({I} >= {Len})"))
- {
- writer.WriteLine($"goto {AdvanceLabel()};");
- }
- PushTrack($"{Len} - {I} - 1");
- PushTrack(!IsRightToLeft() ?
- "runtextpos - 1" :
- "runtextpos + 1");
- Track();
- }
- }
- break;
- }
-
- case RegexCode.Oneloop | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Back:
- case RegexCode.Oneloop | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- {
- const string Position = "tmp1";
- writer.WriteLine($"runtextpos = {PopTrack()};");
- writer.WriteLine($"{Position} = {PopTrack()}; // position");
- using (EmitBlock(writer, $"if ({Position} > 0)"))
- {
- PushTrack($"{Position} - 1");
- PushTrack(!IsRightToLeft() ?
- "runtextpos - 1" :
- "runtextpos + 1");
- Trackagain();
- }
- Advance();
- }
- break;
-
- case RegexCode.Onelazy:
- case RegexCode.Notonelazy:
- case RegexCode.Setlazy:
- case RegexCode.Onelazy | RegexCode.Rtl:
- case RegexCode.Notonelazy | RegexCode.Rtl:
- case RegexCode.Setlazy | RegexCode.Rtl:
- case RegexCode.Onelazy | RegexCode.Ci:
- case RegexCode.Notonelazy | RegexCode.Ci:
- case RegexCode.Setlazy | RegexCode.Ci:
- case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl:
- {
- int count = Operand(1);
- if (count != 0)
- {
- const string C = "tmp1";
- if (count == int.MaxValue)
- {
- writer.WriteLine(!IsRightToLeft() ?
- $"{C} = runtextend - runtextpos; // count" :
- $"{C} = runtextpos - runtextbeg; // count");
- }
- else
- {
- writer.WriteLine(!IsRightToLeft() ?
- $"{C} = global::System.Math.Min(runtextend - runtextpos, {count}); // count" :
- $"{C} = global::System.Math.Min(runtextpos - runtextbeg, {count}); // count");
- }
-
- using (EmitBlock(writer, $"if ({C} <= 0)"))
- {
- writer.WriteLine($"goto {AdvanceLabel()};");
- }
-
- PushTrack($"{C} - 1");
- PushTrack("runtextpos");
- Track();
- }
- break;
- }
+ writer.WriteLine($"goto {endLoop};");
+ writer.WriteLine();
- case RegexCode.Onelazy | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Back:
- case RegexCode.Onelazy | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
+ string backtrack = ReserveName("LoopBacktrack");
+ MarkLabel(backtrack);
+ using (EmitBlock(writer, $"if ({iterationCount} == 0)"))
{
- const string I = "tmp1";
-
- writer.WriteLine($"runtextpos = {PopTrack()};");
- writer.WriteLine($"{I} = {PopTrack()}; // i");
-
- string expr = !IsRightToLeft() ?
- Rightcharnext() :
- Leftcharnext();
-
- if (Code() == RegexCode.Setlazy)
- {
- EmitTimeoutCheck(writer, hasTimeout);
- expr = $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive(), null)}";
- }
- else
- {
- expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
- expr = $"{expr} {(Code() == RegexCode.Onelazy ? "!=" : "==")} {Literal((char)Operand(0))}";
- }
-
- using (EmitBlock(writer, $"if ({expr})"))
- {
- writer.WriteLine($"goto {Backtrack};");
- }
-
- using (EmitBlock(writer, $"if ({I} > 0)"))
- {
- PushTrack($"{I} - 1");
- PushTrack("runtextpos");
- Trackagain();
- }
-
- Advance();
+ writer.WriteLine($"goto {originalDoneLabel};");
}
- break;
-
- default:
- Debug.Fail($"Unimplemented state: {currentOpcode:X8}");
- break;
- }
- }
-
-
-
- /// <summary>
- /// Branch to the label corresponding to the regex code at i
- /// </summary>
- /// <remarks>
- /// A trick: since track and stack space is gobbled up unboundedly
- /// only as a result of branching backwards, this is where we check
- /// for sufficient space and trigger reallocations.
- ///
- /// If the "goto" is backwards, we generate code that checks
- /// available space against the amount of space that would be needed
- /// in the worst case by code that will only go forward; if there's
- /// not enough, we push the destination on the tracking stack, then
- /// we jump to the place where we invoke the allocator.
- ///
- /// Since forward gotos pose no threat, they just turn into a Br.
- /// </remarks>
- void Goto(int i)
- {
- // When going backwards, ensure enough space.
- if (i < currentCodePos)
- {
- using (EmitBlock(writer, $"if (runtrackpos <= {rm.Code.TrackCount * 4} || runstackpos <= {rm.Code.TrackCount * 3})"))
- {
- writer.WriteLine($"{ReadyPushTrack()} = {AddGoto(i)};");
- writer.WriteLine($"goto {Backtrack};");
+ writer.WriteLine($"goto {doneLabel};");
+ doneLabel = backtrack;
}
- }
-
- writer.WriteLine($"goto {labels[i]};");
- }
-
- string ReadyPushTrack() => "runtrack[--runtrackpos]";
-
- void Track() => PushTrack(AddTrack());
-
- /// <summary>
- /// Pushes the current switch index on the tracking stack so the backtracking
- /// logic will be repeated again next time we backtrack here.
- /// </summary>
- void Trackagain() => PushTrack(currentBacktrackNote);
-
- void PushTrack<T>(T expr) where T : notnull => writer.WriteLine($"{ReadyPushTrack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
-
- /// <summary>Retrieves the top entry on the tracking stack without popping.</summary>
- string TopTrack() => "runtrack[runtrackpos]";
-
- int Operand(int i) => codes[currentCodePos + i + 1];
-
- /// <summary>True if the current operation is marked for the leftward direction.</summary>
- bool IsRightToLeft() => (currentOpcode & RegexCode.Rtl) != 0;
-
- /// <summary>True if the current operation is marked for case insensitive operation.</summary>
- bool IsCaseInsensitive() => (currentOpcode & RegexCode.Ci) != 0;
-
- /// <summary>Returns the raw regex opcode (masking out Back and Rtl).</summary>
- int Code() => currentOpcode & RegexCode.Mask;
- /// <summary>Saves the value of a local variable on the grouping stack.</summary>
- void PushStack<T>(T expr) where T : notnull => writer.WriteLine($"{ReadyPushStack()} = {(expr is IFormattable ? ((IFormattable)expr).ToString(null, CultureInfo.InvariantCulture) : expr.ToString())};");
+ MarkLabel(endLoop);
- string ReadyPushStack() => "runstack[--runstackpos]";
-
- /// <summary>Retrieves the top entry on the stack without popping.</summary>
- string TopStack() => "runstack[runstackpos]";
-
- void TrackUnique(int i) => PushTrack(AddUniqueTrack(i));
-
- void TrackUnique2(int i) => PushTrack(AddUniqueTrack(i, RegexCode.Back2));
-
- int AddUniqueTrack(int i, int flags = RegexCode.Back)
- {
- if (uniqueNote[i] == -1)
- {
- uniqueNote[i] = AddTrack(flags);
- }
-
- return uniqueNote[i];
- }
-
- /// <summary>
- /// Returns the position of the next operation in the regex code, taking
- /// into account the different numbers of arguments taken by operations
- /// </summary>
- int NextCodepos() => currentCodePos + RegexCode.OpcodeSize(codes[currentCodePos]);
-
- /// <summary>The label for the next (forward) operation.</summary>
- string AdvanceLabel() => labels[NextCodepos()]!;
-
- /// <summary>Goto the next (forward) operation.</summary>
- void Advance() => writer.WriteLine($"goto {AdvanceLabel()};");
+ if (node.IsInLoop())
+ {
+ writer.WriteLine();
- /// <summary>Loads the char to the left of the current position.</summary>
- string Leftchar() => "runtext[runtextpos - 1]";
+ // Store the capture's state
+ EmitRunstackResizeIfNeeded(3);
+ writer.WriteLine($"{RunstackPush()} = {startingRunTextPos};");
+ writer.WriteLine($"{RunstackPush()} = {iterationCount};");
- /// <summary>Loads the char to the left of the current position and advances (leftward).</summary>
- string Leftcharnext() => "runtext[--runtextpos]";
+ // Skip past the backtracking section
+ string end = ReserveName("SkipBacktrack");
+ writer.WriteLine($"goto {end};");
+ writer.WriteLine();
- /// <summary>Loads the char to the right of the current position.</summary>
- string Rightchar() => "runtext[runtextpos]";
+ // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
+ string backtrack = ReserveName("LoopBacktrack");
+ MarkLabel(backtrack);
+ writer.WriteLine($"{iterationCount} = {RunstackPop()};");
+ writer.WriteLine($"{startingRunTextPos} = {RunstackPop()};");
- /// <summary>Loads the char to the right of the current position and advances the current position.</summary>
- string Rightcharnext() => "runtext[runtextpos++]";
+ writer.WriteLine($"goto {doneLabel};");
+ writer.WriteLine();
- /// <summary>
- /// Adds a backtrack note to the list of them, and returns the index of the new
- /// note (which is also the index for the jump used by the switch table)
- /// </summary>
- int AddBacktrackNote(int flags, string l, int codepos)
- {
- if (notes == null || noteCount >= notes.Length)
- {
- var newnotes = new BacktrackNote[notes == null ? 16 : notes.Length * 2];
- if (notes != null)
- {
- Array.Copy(notes, newnotes, noteCount);
+ doneLabel = backtrack;
+ MarkLabel(end);
}
- notes = newnotes;
}
-
- notes[noteCount] = new BacktrackNote(flags, l, codepos);
- return noteCount++;
}
- /// <summary>
- /// Adds a backtrack note for the current operation; creates a new label for
- /// where the code will be, and returns the switch index.
- /// </summary>
- int AddTrack(int flags = RegexCode.Back) => AddBacktrackNote(flags, DefineLabel(), currentCodePos);
-
- int AddGoto(int destpos)
+ void EmitRunstackResizeIfNeeded(int count)
{
- if (forwardJumpsThroughSwitch[destpos] == -1)
+ string subCount = count > 1 ? $" - {count - 1}" : "";
+ using (EmitBlock(writer, $"if (runstackpos >= base.runstack!.Length{subCount})"))
{
- forwardJumpsThroughSwitch[destpos] = AddBacktrackNote(0, labels[destpos]!, destpos);
+ writer.WriteLine("global::System.Array.Resize(ref base.runstack, base.runstack.Length * 2);");
}
-
- return forwardJumpsThroughSwitch[destpos];
}
- /// <summary>Pops an element off the tracking stack.</summary>
- string PopTrack() => "runtrack[runtrackpos++]";
-
- /// <summary>Pops an element off the grouping stack (leave it on the operand stack).</summary>
- string PopStack() => "runstack[runstackpos++]";
-
- /// <summary>Pops i elements off the grouping stack and discards them.</summary>
- void PopDiscardStack(int i = 1) => EmitAdd(writer, "runstackpos", i);
-
- /// <summary>Prologue to code that will replace the ith element on the grouping stack.</summary>
- string ReadyReplaceStack(int i) => i == 0 ? "runstack[runstackpos]" : $"runstack[runstackpos + {i}]";
+ string RunstackPush() => "base.runstack[runstackpos++]";
+ string RunstackPop() => "base.runstack![--runstackpos]";
}
- /// <summary>
- /// Keeps track of an operation that needs to be referenced in the backtrack-jump
- /// switch table, and that needs backtracking code to be emitted (if flags != 0)
- /// </summary>
- private record BacktrackNote(int flags, string label, int codepos);
-
private static bool EmitLoopTimeoutCounterIfNeeded(IndentedTextWriter writer, RegexMethod rm)
{
if (rm.MatchTimeout != Timeout.Infinite)
/// Replaces <see cref="AdditionalDeclarationsPlaceholder"/> in <paramref name="writer"/> with
/// all of the variable declarations in <paramref name="declarations"/>.
/// </summary>
- private static void ReplaceAdditionalDeclarations(HashSet<string> declarations, IndentedTextWriter writer)
+ /// <param name="writer">The writer around a StringWriter to have additional declarations inserted into.</param>
+ /// <param name="declarations">The additional declarations to insert.</param>
+ /// <param name="position">The position into the writer at which to insert the additional declarations.</param>
+ /// <param name="indent">The indentation to use for the additional declarations.</param>
+ private static void ReplaceAdditionalDeclarations(IndentedTextWriter writer, HashSet<string> declarations, int position, int indent)
{
- StringBuilder sb = ((StringWriter)writer.InnerWriter).GetStringBuilder();
- string replacement = "";
-
if (declarations.Count != 0)
{
- var tmp = new StringBuilder().AppendLine();
- foreach (string decl in declarations)
+ var arr = new string[declarations.Count];
+ declarations.CopyTo(arr);
+ Array.Sort(arr);
+
+ StringBuilder tmp = new StringBuilder().AppendLine();
+ foreach (string decl in arr)
{
- tmp.Append(' ', writer.Indent * 4).AppendLine(decl);
+ for (int i = 0; i < indent; i++)
+ {
+ tmp.Append(IndentedTextWriter.DefaultTabString);
+ }
+
+ tmp.AppendLine(decl);
}
- replacement = tmp.ToString();
- }
- sb.Replace(AdditionalDeclarationsPlaceholder, replacement);
+ ((StringWriter)writer.InnerWriter).GetStringBuilder().Insert(position, tmp.ToString());
+ }
}
private static string Literal(char c) => SymbolDisplay.FormatLiteral(c, quote: true);
SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted));
var regexMethod = new RegexMethod(
+ methodSyntax,
regexMethodSymbol.Name,
methodSyntax.Modifiers.ToString(),
pattern,
}
/// <summary>A regex method.</summary>
- internal sealed record RegexMethod(string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code);
+ internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexCode Code);
/// <summary>A type holding a regex method.</summary>
internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name, string Constraints)
context.ReportDiagnostic(d);
break;
- case string s:
- code.Add(s);
+ case ValueTuple<string, ImmutableArray<Diagnostic>> t:
+ code.Add(t.Item1);
+ foreach (Diagnostic d in t.Item2)
+ {
+ context.ReportDiagnostic(d);
+ }
break;
}
}
<data name="InvalidLangVersionMessage" xml:space="preserve">
<value>C# LangVersion of 10 or greater is required</value>
</data>
+ <data name="LimitedSourceGenerationTitle" xml:space="preserve">
+ <value>RegexGenerator limitation reached.</value>
+ </data>
+ <data name="LimitedSourceGenerationMessage" xml:space="preserve">
+ <value>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</value>
+ </data>
<data name="Generic" xml:space="preserve">
<value>Regular expression parser error '{0}' at offset {1}.</value>
</data>
<target state="translated">Délka nemůže být menší než 0 nebo přesáhnout délku vstupu.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Neplatný vzor {0} u posunu {1}. {2}</target>
<target state="translated">Die Länge darf nicht kleiner als 0 sein oder die Eingabelänge überschreiten.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Ungültiges Muster "{0}" bei Offset {1}. {2}</target>
<target state="translated">La longitud no puede ser inferior a 0 ni superar la longitud de entrada.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Patrón '{0}' no válido en el desplazamiento {1}. {2}</target>
<target state="translated">La longueur ne peut pas être inférieure à 0 ou supérieure à la longueur d'entrée.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Modèle « {0} » non valide au niveau du décalage {1}. {2}</target>
<target state="translated">Lenght non può essere minore di zero o superare la lunghezza di input.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Criterio '{0}' non valido alla posizione di offset {1}. {2}</target>
<target state="translated">長さを 0 未満に設定したり、入力の長さを超えることはできません。</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">オフセット {1} に無効なパターン '{0}' があります。{2}</target>
<target state="translated">길이는 0보다 작거나 입력 길이를 초과할 수 없습니다.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">오프셋 {1}에서 잘못된 패턴 '{0}'. {2}</target>
<target state="translated">Długość nie może być mniejsza od 0 ani przekraczać długości danych wejściowych.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Nieprawidłowy wzorzec „{0}” przy przesunięciu {1}. {2}</target>
<target state="translated">Comprimento não pode ser menor que 0 ou exceder o comprimento de entrada.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Padrão inválido '{0}' no deslocamento {1}. {2}</target>
<target state="translated">Длина не может быть меньше 0 или превышать длину ввода.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">Недопустимый шаблон "{0}" со смещением {1}. {2}</target>
<target state="translated">Uzunluk sıfırdan küçük olamaz ve giriş uzunluğunu aşamaz.</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">{1} ofsetinde geçersiz “{0}” deseni. {2}</target>
<target state="translated">长度不能小于 0 或超过输入长度。</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">偏移 {0} 处的模式“{1}”无效。{2}</target>
<target state="translated">長度不能小於零或超過輸入長度。</target>
<note />
</trans-unit>
+ <trans-unit id="LimitedSourceGenerationMessage">
+ <source>The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</source>
+ <target state="new">The RegexGenerator couldn't generate a complete source implementation for the specified regular expression, due to an unsupported option or too complex a regular expression. The implementation will interpret the regular expression at run-time.</target>
+ <note />
+ </trans-unit>
+ <trans-unit id="LimitedSourceGenerationTitle">
+ <source>RegexGenerator limitation reached.</source>
+ <target state="new">RegexGenerator limitation reached.</target>
+ <note />
+ </trans-unit>
<trans-unit id="MakeException">
<source>Invalid pattern '{0}' at offset {1}. {2}</source>
<target state="translated">位移 {1} 的模式 '{0}' 無效。{2}</target>
else if (RuntimeFeature.IsDynamicCodeCompiled && UseOptionC())
{
// If the compile option is set and compilation is supported, then compile the code.
+ // If the compiler can't compile this regex, it'll return null, and we'll fall back
+ // to the interpreter.
factory = Compile(pattern, _code, options, matchTimeout != InfiniteMatchTimeout);
- _code = null;
+ if (factory is not null)
+ {
+ _code = null;
+ }
}
}
/// instantiating a non-compiled regex.
/// </summary>
[MethodImpl(MethodImplOptions.NoInlining)]
- private static RegexRunnerFactory Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) =>
+ private static RegexRunnerFactory? Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) =>
RegexCompiler.Compile(pattern, code, options, hasTimeout);
[Obsolete(Obsoletions.RegexCompileToAssemblyMessage, DiagnosticId = Obsoletions.RegexCompileToAssemblyDiagId, UrlFormat = Obsoletions.SharedUrlFormat)]
using System.Collections.Generic;
using System.Diagnostics;
-using System.Diagnostics.CodeAnalysis;
using System.Globalization;
using System.Reflection;
using System.Reflection.Emit;
namespace System.Text.RegularExpressions
{
/// <summary>
- /// RegexCompiler translates a block of RegexCode to MSIL, and creates a
- /// subclass of the RegexRunner type.
+ /// RegexCompiler translates a block of RegexCode to MSIL, and creates a subclass of the RegexRunner type.
/// </summary>
internal abstract class RegexCompiler
{
private static readonly FieldInfo s_runtextstartField = RegexRunnerField("runtextstart");
private static readonly FieldInfo s_runtextposField = RegexRunnerField("runtextpos");
private static readonly FieldInfo s_runtextField = RegexRunnerField("runtext");
- private static readonly FieldInfo s_runtrackposField = RegexRunnerField("runtrackpos");
- private static readonly FieldInfo s_runtrackField = RegexRunnerField("runtrack");
- private static readonly FieldInfo s_runstackposField = RegexRunnerField("runstackpos");
private static readonly FieldInfo s_runstackField = RegexRunnerField("runstack");
- protected static readonly FieldInfo s_runtrackcountField = RegexRunnerField("runtrackcount");
- private static readonly MethodInfo s_doubleStackMethod = RegexRunnerMethod("DoubleStack");
- private static readonly MethodInfo s_doubleTrackMethod = RegexRunnerMethod("DoubleTrack");
private static readonly MethodInfo s_captureMethod = RegexRunnerMethod("Capture");
private static readonly MethodInfo s_transferCaptureMethod = RegexRunnerMethod("TransferCapture");
private static readonly MethodInfo s_uncaptureMethod = RegexRunnerMethod("Uncapture");
private static readonly MethodInfo s_crawlposMethod = RegexRunnerMethod("Crawlpos");
private static readonly MethodInfo s_charInClassMethod = RegexRunnerMethod("CharInClass");
private static readonly MethodInfo s_checkTimeoutMethod = RegexRunnerMethod("CheckTimeout");
-#if DEBUG
- private static readonly MethodInfo s_dumpStateM = RegexRunnerMethod("DumpState");
-#endif
private static readonly MethodInfo s_charIsDigitMethod = typeof(char).GetMethod("IsDigit", new Type[] { typeof(char) })!;
private static readonly MethodInfo s_charIsWhiteSpaceMethod = typeof(char).GetMethod("IsWhiteSpace", new Type[] { typeof(char) })!;
private static readonly MethodInfo s_charToLowerInvariantMethod = typeof(char).GetMethod("ToLowerInvariant", new Type[] { typeof(char) })!;
private static readonly MethodInfo s_cultureInfoGetCurrentCultureMethod = typeof(CultureInfo).GetMethod("get_CurrentCulture")!;
private static readonly MethodInfo s_cultureInfoGetTextInfoMethod = typeof(CultureInfo).GetMethod("get_TextInfo")!;
-#if DEBUG
- private static readonly MethodInfo s_debugWriteLine = typeof(Debug).GetMethod("WriteLine", new Type[] { typeof(string) })!;
-#endif
private static readonly MethodInfo s_spanGetItemMethod = typeof(ReadOnlySpan<char>).GetMethod("get_Item", new Type[] { typeof(int) })!;
private static readonly MethodInfo s_spanGetLengthMethod = typeof(ReadOnlySpan<char>).GetMethod("get_Length")!;
private static readonly MethodInfo s_memoryMarshalGetReference = typeof(MemoryMarshal).GetMethod("GetReference", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
private static readonly MethodInfo s_spanIndexOfAnyCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char));
private static readonly MethodInfo s_spanIndexOfAnyCharCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char));
private static readonly MethodInfo s_spanIndexOfAnySpan = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
- private static readonly MethodInfo s_spanLastIndexOfChar = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char));
- private static readonly MethodInfo s_spanLastIndexOfSpan = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
private static readonly MethodInfo s_spanSliceIntMethod = typeof(ReadOnlySpan<char>).GetMethod("Slice", new Type[] { typeof(int) })!;
private static readonly MethodInfo s_spanSliceIntIntMethod = typeof(ReadOnlySpan<char>).GetMethod("Slice", new Type[] { typeof(int), typeof(int) })!;
private static readonly MethodInfo s_spanStartsWith = typeof(MemoryExtensions).GetMethod("StartsWith", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
private static readonly MethodInfo s_textInfoToLowerMethod = typeof(TextInfo).GetMethod("ToLower", new Type[] { typeof(char) })!;
private static readonly MethodInfo s_arrayResize = typeof(Array).GetMethod("Resize")!.MakeGenericMethod(typeof(int));
+ /// <summary>The ILGenerator currently in use.</summary>
protected ILGenerator? _ilg;
-
- // tokens representing local variables
- private LocalBuilder? _runtextbegLocal;
- private LocalBuilder? _runtextendLocal;
- private LocalBuilder? _runtextposLocal;
- private LocalBuilder? _runtextLocal;
- private LocalBuilder? _runtextSpanLocal;
- private LocalBuilder? _runtrackposLocal;
- private LocalBuilder? _runtrackLocal;
- private LocalBuilder? _runstackposLocal;
- private LocalBuilder? _runstackLocal;
- private LocalBuilder? _textInfoLocal; // cached to avoid extraneous TLS hits from CurrentCulture and virtual calls to TextInfo
- private LocalBuilder? _loopTimeoutCounterLocal; // timeout counter for setrep and setloop
-
- protected RegexOptions _options; // options
- protected RegexCode? _code; // the RegexCode object
- protected int[]? _codes; // the RegexCodes being translated
- protected string[]? _strings; // the stringtable associated with the RegexCodes
- protected bool _hasTimeout; // whether the regex has a non-infinite timeout
-
- private Label[]? _labels; // a label for every operation in _codes
- private BacktrackNote[]? _notes; // a list of the backtracking states to be generated
- private int _notecount; // true count of _notes (allocation grows exponentially)
- protected int _trackcount; // count of backtracking states (used to reduce allocations)
- private Label _backtrack; // label for backtracking
- private Stack<LocalBuilder>? _int32LocalsPool; // pool of Int32 local variables
- private Stack<LocalBuilder>? _readOnlySpanCharLocalsPool; // pool of ReadOnlySpan<char> local variables
-
- private int _regexopcode; // the current opcode being processed
- private int _codepos; // the current code being translated
- private int _backpos; // the current backtrack-note being translated
-
- // special code fragments
- private int[]? _uniquenote; // _notes indices for code that should be emitted <= once
- private int[]? _goto; // indices for forward-jumps-through-switch (for allocations)
-
- // indices for unique code fragments
- private const int Stackpop = 0; // pop one
- private const int Stackpop2 = 1; // pop two
- private const int Capback = 3; // uncapture
- private const int Capback2 = 4; // uncapture 2
- private const int Branchmarkback2 = 5; // back2 part of branchmark
- private const int Lazybranchmarkback2 = 6; // back2 part of lazybranchmark
- private const int Branchcountback2 = 7; // back2 part of branchcount
- private const int Lazybranchcountback2 = 8; // back2 part of lazybranchcount
- private const int Forejumpback = 9; // back part of forejump
- private const int Uniquecount = 10;
- private const int LoopTimeoutCheckCount = 2048; // A conservative value to guarantee the correct timeout handling.
+ /// <summary>The options for the expression.</summary>
+ protected RegexOptions _options;
+ /// <summary>The code written for the expression.</summary>
+ protected RegexCode? _code;
+ /// <summary>Whether this expression has a non-infinite timeout.</summary>
+ protected bool _hasTimeout;
+
+ /// <summary>Pool of Int32 LocalBuilders.</summary>
+ private Stack<LocalBuilder>? _int32LocalsPool;
+ /// <summary>Pool of ReadOnlySpan of char locals.</summary>
+ private Stack<LocalBuilder>? _readOnlySpanCharLocalsPool;
+
+ /// <summary>Local representing a cached TextInfo for the culture to use for all case-insensitive operations.</summary>
+ private LocalBuilder? _textInfo;
+ /// <summary>Local representing a timeout counter for loops (set loops and node loops).</summary>
+ private LocalBuilder? _loopTimeoutCounter;
+ /// <summary>A frequency with which the timeout should be validated.</summary>
+ private const int LoopTimeoutCheckCount = 2048;
private static FieldInfo RegexRunnerField(string fieldname) => typeof(RegexRunner).GetField(fieldname, BindingFlags.NonPublic | BindingFlags.Public | BindingFlags.Instance | BindingFlags.Static)!;
/// Entry point to dynamically compile a regular expression. The expression is compiled to
/// an in-memory assembly.
/// </summary>
- internal static RegexRunnerFactory Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) =>
+ internal static RegexRunnerFactory? Compile(string pattern, RegexCode code, RegexOptions options, bool hasTimeout) =>
new RegexLWCGCompiler().FactoryInstanceFromCode(pattern, code, options, hasTimeout);
- /// <summary>
- /// Keeps track of an operation that needs to be referenced in the backtrack-jump
- /// switch table, and that needs backtracking code to be emitted (if flags != 0)
- /// </summary>
- private sealed class BacktrackNote
- {
- internal int _codepos;
- internal int _flags;
- internal Label _label;
-
- public BacktrackNote(int flags, Label label, int codepos)
- {
- _codepos = codepos;
- _flags = flags;
- _label = label;
- }
- }
-
- /// <summary>
- /// Adds a backtrack note to the list of them, and returns the index of the new
- /// note (which is also the index for the jump used by the switch table)
- /// </summary>
- private int AddBacktrackNote(int flags, Label l, int codepos)
- {
- if (_notes == null || _notecount >= _notes.Length)
- {
- var newnotes = new BacktrackNote[_notes == null ? 16 : _notes.Length * 2];
- if (_notes != null)
- {
- Array.Copy(_notes, newnotes, _notecount);
- }
- _notes = newnotes;
- }
-
- _notes[_notecount] = new BacktrackNote(flags, l, codepos);
-
- return _notecount++;
- }
-
- /// <summary>
- /// Adds a backtrack note for the current operation; creates a new label for
- /// where the code will be, and returns the switch index.
- /// </summary>
- private int AddTrack() => AddTrack(RegexCode.Back);
-
- /// <summary>
- /// Adds a backtrack note for the current operation; creates a new label for
- /// where the code will be, and returns the switch index.
- /// </summary>
- private int AddTrack(int flags) => AddBacktrackNote(flags, DefineLabel(), _codepos);
-
- /// <summary>
- /// Adds a switchtable entry for the specified position (for the forward
- /// logic; does not cause backtracking logic to be generated)
- /// </summary>
- private int AddGoto(int destpos)
- {
- if (_goto![destpos] == -1)
- {
- _goto[destpos] = AddBacktrackNote(0, _labels![destpos], destpos);
- }
-
- return _goto[destpos];
- }
-
- /// <summary>
- /// Adds a note for backtracking code that only needs to be generated once;
- /// if it's already marked to be generated, returns the switch index
- /// for the unique piece of code.
- /// </summary>
- private int AddUniqueTrack(int i) => AddUniqueTrack(i, RegexCode.Back);
-
- /// <summary>
- /// Adds a note for backtracking code that only needs to be generated once;
- /// if it's already marked to be generated, returns the switch index
- /// for the unique piece of code.
- /// </summary>
- private int AddUniqueTrack(int i, int flags)
- {
- if (_uniquenote![i] == -1)
- {
- _uniquenote[i] = AddTrack(flags);
- }
-
- return _uniquenote[i];
- }
-
/// <summary>A macro for _ilg.DefineLabel</summary>
private Label DefineLabel() => _ilg!.DefineLabel();
/// <summary>A macro for _ilg.MarkLabel</summary>
private void MarkLabel(Label l) => _ilg!.MarkLabel(l);
- /// <summary>Returns the ith operand of the current operation.</summary>
- private int Operand(int i) => _codes![_codepos + i + 1];
-
- /// <summary>True if the current operation is marked for the leftward direction.</summary>
- private bool IsRightToLeft() => (_regexopcode & RegexCode.Rtl) != 0;
-
- /// <summary>True if the current operation is marked for case insensitive operation.</summary>
- private bool IsCaseInsensitive() => (_regexopcode & RegexCode.Ci) != 0;
-
- /// <summary>Returns the raw regex opcode (masking out Back and Rtl).</summary>
- private int Code() => _regexopcode & RegexCode.Mask;
-
/// <summary>A macro for _ilg.Emit(Opcodes.Ldstr, str)</summary>
protected void Ldstr(string str) => _ilg!.Emit(OpCodes.Ldstr, str);
/// <summary>A macro for _ilg.Emit(OpCodes.Ret).</summary>
protected void Ret() => _ilg!.Emit(OpCodes.Ret);
- /// <summary>A macro for _ilg.Emit(OpCodes.Newobj, constructor).</summary>
- protected void Newobj(ConstructorInfo constructor) => _ilg!.Emit(OpCodes.Newobj, constructor);
-
/// <summary>A macro for _ilg.Emit(OpCodes.Dup).</summary>
protected void Dup() => _ilg!.Emit(OpCodes.Dup);
/// <summary>A macro for _ilg.Emit(OpCodes.Add).</summary>
private void Add() => _ilg!.Emit(OpCodes.Add);
- /// <summary>A macro for _ilg.Emit(OpCodes.Add); a true flag can turn it into a Sub.</summary>
- private void Add(bool negate) => _ilg!.Emit(negate ? OpCodes.Sub : OpCodes.Add);
-
/// <summary>A macro for _ilg.Emit(OpCodes.Sub).</summary>
private void Sub() => _ilg!.Emit(OpCodes.Sub);
- /// <summary>A macro for _ilg.Emit(OpCodes.Sub) or _ilg.Emit(OpCodes.Add).</summary>
- private void Sub(bool negate) => _ilg!.Emit(negate ? OpCodes.Add : OpCodes.Sub);
-
- /// <summary>A macro for _ilg.Emit(OpCodes.Neg).</summary>
- private void Neg() => _ilg!.Emit(OpCodes.Neg);
-
/// <summary>A macro for _ilg.Emit(OpCodes.Mul).</summary>
private void Mul() => _ilg!.Emit(OpCodes.Mul);
protected void Ldthisfld(FieldInfo ft)
{
Ldthis();
- Ldfld(ft);
+ _ilg!.Emit(OpCodes.Ldfld, ft);
}
/// <summary>A macro for Ldthis(); Ldfld(); Stloc();</summary>
Stloc(lt);
}
- /// <summary>A macro for Ldthis(); Ldloc(); Stfld();</summary>
- private void Mvlocfld(LocalBuilder lt, FieldInfo ft)
- {
- Ldthis();
- Ldloc(lt);
- Stfld(ft);
- }
-
- /// <summary>A macro for _ilg.Emit(OpCodes.Ldfld).</summary>
- private void Ldfld(FieldInfo ft) => _ilg!.Emit(OpCodes.Ldfld, ft);
-
/// <summary>A macro for _ilg.Emit(OpCodes.Stfld).</summary>
protected void Stfld(FieldInfo ft) => _ilg!.Emit(OpCodes.Stfld, ft);
/// <summary>A macro for _ilg.Emit(OpCodes.Bge_Un) (long form).</summary>
private void BgeUnFar(Label l) => _ilg!.Emit(OpCodes.Bge_Un, l);
- /// <summary>A macro for _ilg.Emit(OpCodes.Bgt) (long form).</summary>
- private void BgtFar(Label l) => _ilg!.Emit(OpCodes.Bgt, l);
-
/// <summary>A macro for _ilg.Emit(OpCodes.Bne) (long form).</summary>
private void BneFar(Label l) => _ilg!.Emit(OpCodes.Bne_Un, l);
/// <summary>A macro for _ilg.Emit(OpCodes.Beq) (long form).</summary>
private void BeqFar(Label l) => _ilg!.Emit(OpCodes.Beq, l);
- /// <summary>A macro for _ilg.Emit(OpCodes.Brfalse_S) (short jump).</summary>
- private void Brfalse(Label l) => _ilg!.Emit(OpCodes.Brfalse_S, l);
-
/// <summary>A macro for _ilg.Emit(OpCodes.Brtrue_S) (short jump).</summary>
private void Brtrue(Label l) => _ilg!.Emit(OpCodes.Brtrue_S, l);
/// <summary>A macro for _ilg.Emit(OpCodes.Bgt_S) (short jump).</summary>
private void Bgt(Label l) => _ilg!.Emit(OpCodes.Bgt_S, l);
- /// <summary>A macro for _ilg.Emit(OpCodes.Bgt_Un_S) (short jump).</summary>
- private void BgtUn(Label l) => _ilg!.Emit(OpCodes.Bgt_Un_S, l);
-
/// <summary>A macro for _ilg.Emit(OpCodes.Bne_S) (short jump).</summary>
private void Bne(Label l) => _ilg!.Emit(OpCodes.Bne_Un_S, l);
/// <summary>Declares a local CultureInfo.</summary>
private LocalBuilder? DeclareTextInfo() => _ilg!.DeclareLocal(typeof(TextInfo));
- /// <summary>Declares a local int[].</summary>
- private LocalBuilder DeclareInt32Array() => _ilg!.DeclareLocal(typeof(int[]));
-
/// <summary>Declares a local string.</summary>
private LocalBuilder DeclareString() => _ilg!.DeclareLocal(typeof(string));
}
}
- /// <summary>Loads the char to the right of the current position.</summary>
- private void Rightchar()
- {
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Call(s_stringGetCharsMethod);
- }
-
- /// <summary>Loads the char to the right of the current position and advances the current position.</summary>
- private void Rightcharnext()
- {
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Call(s_stringGetCharsMethod);
- Ldloc(_runtextposLocal!);
- Ldc(1);
- Add();
- Stloc(_runtextposLocal!);
- }
-
- /// <summary>Loads the char to the left of the current position.</summary>
- private void Leftchar()
- {
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Ldc(1);
- Sub();
- Call(s_stringGetCharsMethod);
- }
-
- /// <summary>Loads the char to the left of the current position and advances (leftward).</summary>
- private void Leftcharnext()
- {
- Ldloc(_runtextposLocal!);
- Ldc(1);
- Sub();
- Stloc(_runtextposLocal!);
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Call(s_stringGetCharsMethod);
- }
-
- /// <summary>Creates a backtrack note and pushes the switch index it on the tracking stack.</summary>
- private void Track()
- {
- ReadyPushTrack();
- Ldc(AddTrack());
- DoPush();
- }
-
- /// <summary>
- /// Pushes the current switch index on the tracking stack so the backtracking
- /// logic will be repeated again next time we backtrack here.
- /// </summary>
- private void Trackagain()
- {
- ReadyPushTrack();
- Ldc(_backpos);
- DoPush();
- }
-
- /// <summary>Saves the value of a local variable on the tracking stack.</summary>
- private void PushTrack(LocalBuilder lt)
- {
- ReadyPushTrack();
- Ldloc(lt);
- DoPush();
- }
-
- /// <summary>
- /// Creates a backtrack note for a piece of code that should only be generated once,
- /// and emits code that pushes the switch index on the backtracking stack.
- /// </summary>
- private void TrackUnique(int i)
- {
- ReadyPushTrack();
- Ldc(AddUniqueTrack(i));
- DoPush();
- }
-
- /// <summary>
- /// Creates a second-backtrack note for a piece of code that should only be
- /// generated once, and emits code that pushes the switch index on the
- /// backtracking stack.
- /// </summary>
- private void TrackUnique2(int i)
- {
- ReadyPushTrack();
- Ldc(AddUniqueTrack(i, RegexCode.Back2));
- DoPush();
- }
-
- /// <summary>Prologue to code that will push an element on the tracking stack.</summary>
- private void ReadyPushTrack()
- {
- Ldloc(_runtrackposLocal!);
- Ldc(1);
- Sub();
- Stloc(_runtrackposLocal!);
- Ldloc(_runtrackLocal!);
- Ldloc(_runtrackposLocal!);
- }
-
- /// <summary>Pops an element off the tracking stack (leave it on the operand stack).</summary>
- private void PopTrack()
- {
- Ldloc(_runtrackLocal!);
- Ldloc(_runtrackposLocal!);
- LdelemI4();
- using RentedLocalBuilder tmp = RentInt32Local();
- Stloc(tmp);
- Ldloc(_runtrackposLocal!);
- Ldc(1);
- Add();
- Stloc(_runtrackposLocal!);
- Ldloc(tmp);
- }
-
- /// <summary>Retrieves the top entry on the tracking stack without popping.</summary>
- private void TopTrack()
- {
- Ldloc(_runtrackLocal!);
- Ldloc(_runtrackposLocal!);
- LdelemI4();
- }
-
- /// <summary>Saves the value of a local variable on the grouping stack.</summary>
- private void PushStack(LocalBuilder lt)
- {
- ReadyPushStack();
- Ldloc(lt);
- DoPush();
- }
-
- /// <summary>Prologue to code that will replace the ith element on the grouping stack.</summary>
- internal void ReadyReplaceStack(int i)
- {
- Ldloc(_runstackLocal!);
- Ldloc(_runstackposLocal!);
- if (i != 0)
- {
- Ldc(i);
- Add();
- }
- }
-
- /// <summary>Prologue to code that will push an element on the grouping stack.</summary>
- private void ReadyPushStack()
- {
- Ldloc(_runstackposLocal!);
- Ldc(1);
- Sub();
- Stloc(_runstackposLocal!);
- Ldloc(_runstackLocal!);
- Ldloc(_runstackposLocal!);
- }
-
- /// <summary>Retrieves the top entry on the stack without popping.</summary>
- private void TopStack()
- {
- Ldloc(_runstackLocal!);
- Ldloc(_runstackposLocal!);
- LdelemI4();
- }
-
- /// <summary>Pops an element off the grouping stack (leave it on the operand stack).</summary>
- private void PopStack()
- {
- using RentedLocalBuilder elementLocal = RentInt32Local();
- Ldloc(_runstackLocal!);
- Ldloc(_runstackposLocal!);
- LdelemI4();
- Stloc(elementLocal);
- Ldloc(_runstackposLocal!);
- Ldc(1);
- Add();
- Stloc(_runstackposLocal!);
- Ldloc(elementLocal);
- }
-
- /// <summary>Pops 1 element off the grouping stack and discards it.</summary>
- private void PopDiscardStack() => PopDiscardStack(1);
-
- /// <summary>Pops i elements off the grouping stack and discards them.</summary>
- private void PopDiscardStack(int i)
- {
- Ldloc(_runstackposLocal!);
- Ldc(i);
- Add();
- Stloc(_runstackposLocal!);
- }
-
- /// <summary>Epilogue to code that will replace an element on a stack (use Ld* in between).</summary>
- private void DoReplace() => StelemI4();
-
- /// <summary>Epilogue to code that will push an element on a stack (use Ld* in between).</summary>
- private void DoPush() => StelemI4();
-
- /// <summary>Jump to the backtracking switch.</summary>
- private void Back() => BrFar(_backtrack);
-
- /// <summary>
- /// Branch to the MSIL corresponding to the regex code at i
- /// </summary>
- /// <remarks>
- /// A trick: since track and stack space is gobbled up unboundedly
- /// only as a result of branching backwards, this is where we check
- /// for sufficient space and trigger reallocations.
- ///
- /// If the "goto" is backwards, we generate code that checks
- /// available space against the amount of space that would be needed
- /// in the worst case by code that will only go forward; if there's
- /// not enough, we push the destination on the tracking stack, then
- /// we jump to the place where we invoke the allocator.
- ///
- /// Since forward gotos pose no threat, they just turn into a Br.
- /// </remarks>
- private void Goto(int i)
- {
- if (i < _codepos)
- {
- Label l1 = DefineLabel();
-
- // When going backwards, ensure enough space.
- Ldloc(_runtrackposLocal!);
- Ldc(_trackcount * 4);
- Ble(l1);
- Ldloc(_runstackposLocal!);
- Ldc(_trackcount * 3);
- BgtFar(_labels![i]);
- MarkLabel(l1);
- ReadyPushTrack();
- Ldc(AddGoto(i));
- DoPush();
- BrFar(_backtrack);
- }
- else
- {
- BrFar(_labels![i]);
- }
- }
-
- /// <summary>
- /// Returns the position of the next operation in the regex code, taking
- /// into account the different numbers of arguments taken by operations
- /// </summary>
- private int NextCodepos() => _codepos + RegexCode.OpcodeSize(_codes![_codepos]);
-
- /// <summary>The label for the next (forward) operation.</summary>
- private Label AdvanceLabel() => _labels![NextCodepos()];
-
- /// <summary>Goto the next (forward) operation.</summary>
- private void Advance() => BrFar(AdvanceLabel());
-
/// <summary>Sets the culture local to CultureInfo.CurrentCulture.</summary>
private void InitLocalCultureInfo()
{
- Debug.Assert(_textInfoLocal != null);
+ Debug.Assert(_textInfo != null);
Call(s_cultureInfoGetCurrentCultureMethod);
Callvirt(s_cultureInfoGetTextInfoMethod);
- Stloc(_textInfoLocal);
+ Stloc(_textInfo);
}
- /// <summary>Whether ToLower operations should be performed with the invariant culture as opposed to the one in <see cref="_textInfoLocal"/>.</summary>
- private bool UseToLowerInvariant => _textInfoLocal == null || (_options & RegexOptions.CultureInvariant) != 0;
+ /// <summary>Whether ToLower operations should be performed with the invariant culture as opposed to the one in <see cref="_textInfo"/>.</summary>
+ private bool UseToLowerInvariant => _textInfo == null || (_options & RegexOptions.CultureInvariant) != 0;
/// <summary>Invokes either char.ToLowerInvariant(c) or _textInfo.ToLower(c).</summary>
private void CallToLower()
{
using RentedLocalBuilder currentCharLocal = RentInt32Local();
Stloc(currentCharLocal);
- Ldloc(_textInfoLocal!);
+ Ldloc(_textInfo!);
Ldloc(currentCharLocal);
Callvirt(s_textInfoToLowerMethod);
}
}
- /// <summary>
- /// Generates the first section of the MSIL. This section contains all
- /// the forward logic, and corresponds directly to the regex codes.
- /// In the absence of backtracking, this is all we would need.
- /// </summary>
- private void GenerateForwardSection()
- {
- _uniquenote = new int[Uniquecount];
- _labels = new Label[_codes!.Length];
- _goto = new int[_codes.Length];
-
- // initialize
-
- Array.Fill(_uniquenote, -1);
- for (int codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos]))
- {
- _goto[codepos] = -1;
- _labels[codepos] = DefineLabel();
- }
-
- // emit variable initializers
-
- Mvfldloc(s_runtextField, _runtextLocal!);
- Mvfldloc(s_runtextbegField, _runtextbegLocal!);
- Mvfldloc(s_runtextendField, _runtextendLocal!);
- Mvfldloc(s_runtextposField, _runtextposLocal!);
- Mvfldloc(s_runtrackField, _runtrackLocal!);
- Mvfldloc(s_runtrackposField, _runtrackposLocal!);
- Mvfldloc(s_runstackField, _runstackLocal!);
- Mvfldloc(s_runstackposField, _runstackposLocal!);
-
- _backpos = -1;
-
- for (int codepos = 0; codepos < _codes.Length; codepos += RegexCode.OpcodeSize(_codes[codepos]))
- {
- MarkLabel(_labels[codepos]);
- _codepos = codepos;
- _regexopcode = _codes[codepos];
- GenerateOneCode();
- }
- }
-
- /// <summary>
- /// Generates the middle section of the MSIL. This section contains the
- /// big switch jump that allows us to simulate a stack of addresses,
- /// and it also contains the calls that expand the tracking and the
- /// grouping stack when they get too full.
- /// </summary>
- private void GenerateMiddleSection()
- {
- using RentedLocalBuilder limitLocal = RentInt32Local();
- Label afterDoubleStack = DefineLabel();
- Label afterDoubleTrack = DefineLabel();
-
- // Backtrack:
- MarkLabel(_backtrack);
-
- // (Equivalent of EnsureStorage, but written to avoid unnecessary local spilling.)
-
- // int limitLocal = runtrackcount * 4;
- Ldthisfld(s_runtrackcountField);
- Ldc(4);
- Mul();
- Stloc(limitLocal);
-
- // if (runstackpos < limit)
- // {
- // this.runstackpos = runstackpos;
- // DoubleStack(); // might change runstackpos and runstack
- // runstackpos = this.runstackpos;
- // runstack = this.runstack;
- // }
- Ldloc(_runstackposLocal!);
- Ldloc(limitLocal);
- Bge(afterDoubleStack);
- Mvlocfld(_runstackposLocal!, s_runstackposField);
- Ldthis();
- Call(s_doubleStackMethod);
- Mvfldloc(s_runstackposField, _runstackposLocal!);
- Mvfldloc(s_runstackField, _runstackLocal!);
- MarkLabel(afterDoubleStack);
-
- // if (runtrackpos < limit)
- // {
- // this.runtrackpos = runtrackpos;
- // DoubleTrack(); // might change runtrackpos and runtrack
- // runtrackpos = this.runtrackpos;
- // runtrack = this.runtrack;
- // }
- Ldloc(_runtrackposLocal!);
- Ldloc(limitLocal);
- Bge(afterDoubleTrack);
- Mvlocfld(_runtrackposLocal!, s_runtrackposField);
- Ldthis();
- Call(s_doubleTrackMethod);
- Mvfldloc(s_runtrackposField, _runtrackposLocal!);
- Mvfldloc(s_runtrackField, _runtrackLocal!);
- MarkLabel(afterDoubleTrack);
-
- // runtrack[runtrackpos++]
- PopTrack();
-
- // Backtracking jump table
- var table = new Label[_notecount];
- for (int i = 0; i < _notecount; i++)
- {
- table[i] = _notes![i]._label;
- }
- Switch(table);
- }
-
- /// <summary>
- /// Generates the last section of the MSIL. This section contains all of
- /// the backtracking logic.
- /// </summary>
- private void GenerateBacktrackSection()
- {
- for (int i = 0; i < _notecount; i++)
- {
- BacktrackNote n = _notes![i];
- if (n._flags != 0)
- {
- MarkLabel(n._label);
- _codepos = n._codepos;
- _backpos = i;
- _regexopcode = _codes![n._codepos] | n._flags;
- GenerateOneCode();
- }
- }
- }
-
- /// <summary>
- /// Generates FindFirstChar.
- /// </summary>
- protected void GenerateFindFirstChar()
+ /// <summary>Generates the implementation for FindFirstChar.</summary>
+ protected void EmitFindFirstChar()
{
Debug.Assert(_code != null);
_int32LocalsPool?.Clear();
_readOnlySpanCharLocalsPool?.Clear();
- _runtextposLocal = DeclareInt32();
- _runtextendLocal = DeclareInt32();
- if (_code.RightToLeft)
- {
- _runtextbegLocal = DeclareInt32();
- }
- _runtextSpanLocal = DeclareReadOnlySpanChar();
- _textInfoLocal = null;
+ LocalBuilder runtextSpan = DeclareReadOnlySpanChar();
+ LocalBuilder runtextpos = DeclareInt32();
+ LocalBuilder runtextend = DeclareInt32();
+
+ _textInfo = null;
if ((_options & RegexOptions.CultureInvariant) == 0)
{
bool needsCulture = _code.FindOptimizations.FindMode switch
{
FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or
- FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive or
FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
- FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or
- FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive => true,
+ FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive => true,
_ when _code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
if (needsCulture)
{
- _textInfoLocal = DeclareTextInfo();
+ _textInfo = DeclareTextInfo();
InitLocalCultureInfo();
}
}
// int runtextpos = this.runtextpos;
// int runtextend = this.runtextend;
// ReadOnlySpan<char> runtextSpan = this.runtext.AsSpan();
- Mvfldloc(s_runtextposField, _runtextposLocal);
- Mvfldloc(s_runtextendField, _runtextendLocal);
+ Mvfldloc(s_runtextposField, runtextpos);
+ Mvfldloc(s_runtextendField, runtextend);
Ldthisfld(s_runtextField);
Call(s_stringAsSpanMethod);
- Stloc(_runtextSpanLocal);
- if (_code.RightToLeft)
- {
- Mvfldloc(s_runtextbegField, _runtextbegLocal!);
- }
+ Stloc(runtextSpan);
// Generate length check. If the input isn't long enough to possibly match, fail quickly.
// It's rare for min required length to be 0, so we don't bother special-casing the check,
Debug.Assert(minRequiredLength >= 0);
Label returnFalse = DefineLabel();
Label finishedLengthCheck = DefineLabel();
- if (!_code.RightToLeft)
- {
- // if (runtextpos > runtextend - _code.Tree.MinRequiredLength)
- // {
- // this.runtextpos = runtextend;
- // return false;
- // }
- Ldloc(_runtextposLocal);
- Ldloc(_runtextendLocal);
- if (minRequiredLength > 0)
- {
- Ldc(minRequiredLength);
- Sub();
- }
- Ble(finishedLengthCheck);
- MarkLabel(returnFalse);
- Ldthis();
- Ldloc(_runtextendLocal);
- }
- else
+ // if (runtextpos > runtextend - _code.Tree.MinRequiredLength)
+ // {
+ // this.runtextpos = runtextend;
+ // return false;
+ // }
+ Ldloc(runtextpos);
+ Ldloc(runtextend);
+ if (minRequiredLength > 0)
{
- // if (runtextpos - _code.Tree.MinRequiredLength < runtextbeg)
- // {
- // this.runtextpos = runtextbeg;
- // return false;
- // }
- Ldloc(_runtextposLocal);
- if (minRequiredLength > 0)
- {
- Ldc(minRequiredLength);
- Sub();
- }
- Ldloc(_runtextbegLocal!);
- Bge(finishedLengthCheck);
-
- MarkLabel(returnFalse);
- Ldthis();
- Ldloc(_runtextbegLocal!);
+ Ldc(minRequiredLength);
+ Sub();
}
+ Ble(finishedLengthCheck);
+
+ MarkLabel(returnFalse);
+ Ldthis();
+ Ldloc(runtextend);
+
Stfld(s_runtextposField);
Ldc(0);
Ret();
}
// Either anchors weren't specified, or they don't completely root all matches to a specific location.
-
switch (_code.FindOptimizations.FindMode)
{
case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
Debug.Assert(!string.IsNullOrEmpty(_code.FindOptimizations.LeadingCaseSensitivePrefix));
- GenerateIndexOf_LeftToRight(_code.FindOptimizations.LeadingCaseSensitivePrefix);
- break;
-
- case FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive:
- Debug.Assert(!string.IsNullOrEmpty(_code.FindOptimizations.LeadingCaseSensitivePrefix));
- GenerateIndexOf_RightToLeft(_code.FindOptimizations.LeadingCaseSensitivePrefix);
+ EmitIndexOf_LeftToRight(_code.FindOptimizations.LeadingCaseSensitivePrefix);
break;
case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive:
case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive:
Debug.Assert(_code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
- GenerateFixedSet_LeftToRight();
- break;
-
- case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive:
- case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive:
- Debug.Assert(_code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
- GenerateFixedSet_RightToLeft();
+ EmitFixedSet_LeftToRight();
break;
default:
case RegexPrefixAnalyzer.Beginning:
{
Label l1 = DefineLabel();
- Ldloc(_runtextposLocal);
- if (!_code.RightToLeft)
- {
- Ldthisfld(s_runtextbegField);
- Ble(l1);
- Br(returnFalse);
- }
- else
- {
- Ldloc(_runtextbegLocal!);
- Ble(l1);
- Ldthis();
- Ldloc(_runtextbegLocal!);
- Stfld(s_runtextposField);
- }
+ Ldloc(runtextpos);
+ Ldthisfld(s_runtextbegField);
+ Ble(l1);
+ Br(returnFalse);
MarkLabel(l1);
}
Ldc(1);
case RegexPrefixAnalyzer.Start:
{
Label l1 = DefineLabel();
- Ldloc(_runtextposLocal);
+ Ldloc(runtextpos);
Ldthisfld(s_runtextstartField);
- if (!_code.RightToLeft)
- {
- Ble(l1);
- }
- else
- {
- Bge(l1);
- }
+ Ble(l1);
Br(returnFalse);
MarkLabel(l1);
}
case RegexPrefixAnalyzer.EndZ:
{
Label l1 = DefineLabel();
- if (!_code.RightToLeft)
- {
- Ldloc(_runtextposLocal);
- Ldloc(_runtextendLocal);
- Ldc(1);
- Sub();
- Bge(l1);
- Ldthis();
- Ldloc(_runtextendLocal);
- Ldc(1);
- Sub();
- Stfld(s_runtextposField);
- MarkLabel(l1);
- }
- else
- {
- Label l2 = DefineLabel();
- Ldloc(_runtextposLocal);
- Ldloc(_runtextendLocal);
- Ldc(1);
- Sub();
- Blt(l1);
- Ldloc(_runtextposLocal);
- Ldloc(_runtextendLocal);
- Beq(l2);
- Ldloca(_runtextSpanLocal);
- Ldloc(_runtextposLocal);
- Call(s_spanGetItemMethod);
- LdindU2();
- Ldc('\n');
- Beq(l2);
- MarkLabel(l1);
- BrFar(returnFalse);
- MarkLabel(l2);
- }
+ Ldloc(runtextpos);
+ Ldloc(runtextend);
+ Ldc(1);
+ Sub();
+ Bge(l1);
+ Ldthis();
+ Ldloc(runtextend);
+ Ldc(1);
+ Sub();
+ Stfld(s_runtextposField);
+ MarkLabel(l1);
}
Ldc(1);
Ret();
case RegexPrefixAnalyzer.End:
{
Label l1 = DefineLabel();
- Ldloc(_runtextposLocal);
- Ldloc(_runtextendLocal);
- if (!_code.RightToLeft)
- {
- Bge(l1);
- Ldthis();
- Ldloc(_runtextendLocal);
- Stfld(s_runtextposField);
- }
- else
- {
- Bge(l1);
- Br(returnFalse);
- }
+ Ldloc(runtextpos);
+ Ldloc(runtextend);
+ Bge(l1);
+ Ldthis();
+ Ldloc(runtextend);
+ Stfld(s_runtextposField);
MarkLabel(l1);
}
Ldc(1);
// the other anchors, which all skip all subsequent processing if found, with BOL we just use it
// to boost our position to the next line, and then continue normally with any prefix or char class searches.
- Debug.Assert(!_code.RightToLeft, "RightToLeft isn't implemented and should have been filtered out previously");
Label atBeginningOfLine = DefineLabel();
// if (runtextpos > runtextbeg...
- Ldloc(_runtextposLocal!);
+ Ldloc(runtextpos!);
Ldthisfld(s_runtextbegField);
Ble(atBeginningOfLine);
// ... && runtextSpan[runtextpos - 1] != '\n') { ... }
- Ldloca(_runtextSpanLocal);
- Ldloc(_runtextposLocal);
+ Ldloca(runtextSpan);
+ Ldloc(runtextpos);
Ldc(1);
Sub();
Call(s_spanGetItemMethod);
Beq(atBeginningOfLine);
// int tmp = runtextSpan.Slice(runtextpos).IndexOf('\n');
- Ldloca(_runtextSpanLocal);
- Ldloc(_runtextposLocal);
+ Ldloca(runtextSpan);
+ Ldloc(runtextpos);
Call(s_spanSliceIntMethod);
Ldc('\n');
Call(s_spanIndexOfChar);
Ldc(-1);
Beq(returnFalse);
Ldloc(newlinePos);
- Ldloc(_runtextposLocal);
+ Ldloc(runtextpos);
Add();
Ldc(1);
Add();
- Ldloc(_runtextendLocal);
+ Ldloc(runtextend);
Bgt(returnFalse);
// runtextpos = newlinePos + runtextpos + 1;
Ldloc(newlinePos);
- Ldloc(_runtextposLocal);
+ Ldloc(runtextpos);
Add();
Ldc(1);
Add();
- Stloc(_runtextposLocal);
+ Stloc(runtextpos);
}
MarkLabel(atBeginningOfLine);
return false;
}
- void GenerateIndexOf_LeftToRight(string prefix)
+ void EmitIndexOf_LeftToRight(string prefix)
{
using RentedLocalBuilder i = RentInt32Local();
// int i = runtextSpan.Slice(runtextpos, runtextend - runtextpos).IndexOf(prefix);
- Ldloca(_runtextSpanLocal);
- Ldloc(_runtextposLocal);
- Ldloc(_runtextendLocal);
- Ldloc(_runtextposLocal);
+ Ldloca(runtextSpan);
+ Ldloc(runtextpos);
+ Ldloc(runtextend);
+ Ldloc(runtextpos);
Sub();
Call(s_spanSliceIntIntMethod);
Ldstr(prefix);
// base.runtextpos = runtextpos + i;
// return true;
Ldthis();
- Ldloc(_runtextposLocal);
- Ldloc(i);
- Add();
- Stfld(s_runtextposField);
- Ldc(1);
- Ret();
- }
-
- void GenerateIndexOf_RightToLeft(string prefix)
- {
- using RentedLocalBuilder i = RentInt32Local();
-
- // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(prefix);
- Ldloca(_runtextSpanLocal);
- Ldloc(_runtextbegLocal!);
- Ldloc(_runtextposLocal);
- Ldloc(_runtextbegLocal!);
- Sub();
- Call(s_spanSliceIntIntMethod);
- Ldstr(prefix);
- Call(s_stringAsSpanMethod);
- Call(s_spanLastIndexOfSpan);
- Stloc(i);
-
- // if (i < 0) goto ReturnFalse;
- Ldloc(i);
- Ldc(0);
- BltFar(returnFalse);
-
- // base.runtextpos = runtextbeg + i + LeadingCaseSensitivePrefix.Length;
- // return true;
- Ldthis();
- Ldloc(_runtextbegLocal!);
+ Ldloc(runtextpos);
Ldloc(i);
Add();
- Ldc(prefix.Length);
- Add();
Stfld(s_runtextposField);
Ldc(1);
Ret();
}
- void GenerateFixedSet_RightToLeft()
- {
- (char[]? Chars, string Set, int Distance, bool CaseInsensitive) set = _code.FindOptimizations.FixedDistanceSets![0];
- Debug.Assert(set.Distance == 0);
-
- using RentedLocalBuilder i = RentInt32Local();
-
- if (set.Chars is { Length: 1 } && !set.CaseInsensitive)
- {
- // int i = runtextSpan.Slice(runtextbeg, runtextpos - runtextbeg).LastIndexOf(set.Chars[0]);
- Ldloca(_runtextSpanLocal);
- Ldloc(_runtextbegLocal!);
- Ldloc(_runtextposLocal);
- Ldloc(_runtextbegLocal!);
- Sub();
- Call(s_spanSliceIntIntMethod);
- Ldc(set.Chars[0]);
- Call(s_spanLastIndexOfChar);
- Stloc(i);
-
- // if (i < 0) goto ReturnFalse;
- Ldloc(i);
- Ldc(0);
- BltFar(returnFalse);
-
- // base.runtextpos = runtextbeg + i + 1;
- // return true;
- Ldthis();
- Ldloc(_runtextbegLocal!);
- Ldloc(i);
- Add();
- Ldc(1);
- Add();
- Stfld(s_runtextposField);
- Ldc(1);
- Ret();
- }
- else
- {
- Label condition = DefineLabel();
- Label increment = DefineLabel();
- Label body = DefineLabel();
-
- // for (int i = runtextpos - 1; ...
- Ldloc(_runtextposLocal);
- Ldc(1);
- Sub();
- Stloc(i);
- BrFar(condition);
-
- // if (MatchCharClass(runtextSpan[i], set))
- MarkLabel(body);
- Ldloca(_runtextSpanLocal);
- Ldloc(i);
- Call(s_spanGetItemMethod);
- LdindU2();
- EmitMatchCharacterClass(set.Set, set.CaseInsensitive);
- Brfalse(increment);
-
- // base.runtextpos = i + 1;
- // return true;
- Ldthis();
- Ldloc(i);
- Ldc(1);
- Add();
- Stfld(s_runtextposField);
- Ldc(1);
- Ret();
-
- // for (...; ...; i--)
- MarkLabel(increment);
- Ldloc(i);
- Ldc(1);
- Sub();
- Stloc(i);
-
- // for (...; i >= runtextbeg; ...)
- MarkLabel(condition);
- Ldloc(i);
- Ldloc(_runtextbegLocal!);
- BgeFar(body);
-
- BrFar(returnFalse);
- }
- }
-
- void GenerateFixedSet_LeftToRight()
+ void EmitFixedSet_LeftToRight()
{
List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = _code.FindOptimizations.FixedDistanceSets;
(char[]? Chars, string Set, int Distance, bool CaseInsensitive) primarySet = sets![0];
using RentedLocalBuilder textSpanLocal = RentReadOnlySpanCharLocal();
// ReadOnlySpan<char> span = runtextSpan.Slice(runtextpos, runtextend - runtextpos);
- Ldloca(_runtextSpanLocal);
- Ldloc(_runtextposLocal);
- Ldloc(_runtextendLocal);
- Ldloc(_runtextposLocal);
+ Ldloca(runtextSpan);
+ Ldloc(runtextpos);
+ Ldloc(runtextend);
+ Ldloc(runtextpos);
Sub();
Call(s_spanSliceIntIntMethod);
Stloc(textSpanLocal);
// this.runtextpos = runtextpos + i;
// return true;
Ldthis();
- Ldloc(_runtextposLocal);
+ Ldloc(runtextpos);
Ldloc(iLocal);
Add();
Stfld(s_runtextposField);
}
}
- private bool TryGenerateSimplifiedGo(RegexNode node)
+ /// <summary>Generates the implementation for Go.</summary>
+ protected void EmitGo()
{
+ // In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
+ // version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via
+ // RegexWriter; the interpreter would then sit in a loop processing those opcodes, and the RegexCompiler iterated through the
+ // opcodes generating code for each equivalent to what the interpreter would do albeit with some decisions made at compile-time
+ // rather than at run-time. This approach, however, lead to complicated code that wasn't pay-for-play (e.g. a big backtracking
+ // jump table that all compilations went through even if there was no backtracking), that didn't factor in the shape of the
+ // tree (e.g. it's difficult to add optimizations based on interactions between nodes in the graph), and that didn't read well
+ // when decompiled from IL to C# or when directly emitted as C# as part of a source generator.
+ //
+ // This implementation is instead based on directly walking the RegexNode tree and outputting code for each node in the graph.
+ // A dedicated for each kind of RegexNode emits the code necessary to handle that node's processing, including recursively
+ // calling the relevant function for any of its children nodes. Backtracking is handled not via a giant jump table, but instead
+ // by emitting direct jumps to each backtracking construct. This is achieved by having all match failures jump to a "done"
+ // label that can be changed by a previous emitter, e.g. before EmitLoop returns, it ensures that "doneLabel" is set to the
+ // label that code should jump back to when backtracking. That way, a subsequent EmitXx function doesn't need to know exactly
+ // where to jump: it simply always jumps to "doneLabel" on match failure, and "doneLabel" is always configured to point to
+ // the right location. In an expression without backtracking, or before any backtracking constructs have been encountered,
+ // "doneLabel" is simply the final return location from the Go method that will undo any captures and exit, signaling to
+ // the calling scan loop that nothing was matched.
+
+ Debug.Assert(_code != null);
+ _int32LocalsPool?.Clear();
+ _readOnlySpanCharLocalsPool?.Clear();
+
+ // Get the root Capture node of the tree.
+ RegexNode node = _code.Tree.Root;
Debug.Assert(node.Type == RegexNode.Capture, "Every generated tree should begin with a capture node");
Debug.Assert(node.ChildCount() == 1, "Capture nodes should have one child");
- // RightToLeft is rare and not worth adding a lot of custom code to handle in this path.
- if ((node.Options & RegexOptions.RightToLeft) != 0)
- {
- return false;
- }
-
// Skip the Capture node. We handle the implicit root capture specially.
node = node.Child(0);
- if (!node.SupportsSimplifiedCodeGenerationImplementation())
- {
- return false;
- }
-
- // We've determined that the RegexNode can be handled with this optimized path. Generate the code.
-#if DEBUG
- if ((_options & RegexOptions.Debug) != 0)
- {
- Debug.WriteLine("Using optimized non-backtracking code gen.");
- }
-#endif
// In some limited cases, FindFirstChar will only return true if it successfully matched the whole thing.
// This is the case, in particular, for strings. We can special case these to do essentially nothing
// in Go other than emit the capture.
- if (!IsCaseInsensitive(node)) // FindFirstChar may not be 100% accurate on casing in all cultures
+ if (!IsCaseInsensitive(node)) // FindFirstChar may yield false positives on these in some cultures when case-insensitive
{
switch (node.Type)
{
Add();
Stfld(s_runtextposField);
Ret();
- return true;
+ return;
}
}
- // Declare some locals.
+ // Initialize the main locals used throughout the implementation.
LocalBuilder runtextLocal = DeclareString();
LocalBuilder originalruntextposLocal = DeclareInt32();
LocalBuilder runtextposLocal = DeclareInt32();
Label originalDoneLabel = doneLabel;
if (_hasTimeout)
{
- _loopTimeoutCounterLocal = DeclareInt32();
+ _loopTimeoutCounter = DeclareInt32();
}
// CultureInfo culture = CultureInfo.CurrentCulture; // only if the whole expression or any subportion is ignoring case, and we're not using invariant
// return;
Ret();
- // Generated code successfully with non-backtracking implementation.
- return true;
+ // Generated code successfully.
+ return;
static bool IsCaseInsensitive(RegexNode node) => (node.Options & RegexOptions.IgnoreCase) != 0;
Label matchLabel = DefineLabel();
// Save off runtextpos. We'll need to reset this each time a branch fails.
+ // startingRunTextPos = runtextpos;
LocalBuilder startingRunTextPos = DeclareInt32();
Ldloc(runtextposLocal);
Stloc(startingRunTextPos);
// construct is responsible for unwinding back to its starting crawl position. If
// it eventually ends up failing, that failure will result in jumping to the next branch
// of the alternation, which will again dutifully unwind the remaining captures until
- // what they were at the start of the alternation.
+ // what they were at the start of the alternation. Of course, if there are no captures
+ // anywhere in the regex, we don't have to do any of that.
LocalBuilder? startingCrawlpos = null;
- if ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic)
+ if (expressionHasCaptures && ((node.Options & RegexNode.HasCapturesFlag) != 0 || !isAtomic))
{
+ // startingCrawlpos = base.Crawlpos();
startingCrawlpos = DeclareInt32();
Ldthis();
Call(s_crawlposMethod);
// still points to the nextBranch, which similarly is where we'll want to jump to.
if (!isAtomic)
{
+ // if (runstackpos + 3 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2);
+ // base.runstack[runstackpos++] = i;
+ // base.runstack[runstackpos++] = startingCrawlpos;
+ // base.runstack[runstackpos++] = startingRunTextPos;
EmitRunstackResizeIfNeeded(3);
EmitRunstackPush(() => Ldc(i));
- EmitRunstackPush(() => Ldloc(startingCrawlpos!));
+ if (startingCrawlpos is not null)
+ {
+ EmitRunstackPush(() => Ldloc(startingCrawlpos));
+ }
EmitRunstackPush(() => Ldloc(startingRunTextPos));
}
labelMap[i] = doneLabel;
// Before jumping to the end, we need to zero out textSpanPos, so that no
// matter what the value is after the branch, whatever follows the alternate
// will see the same textSpanPos.
+ // runtextpos += textSpanPos;
+ // textSpanPos = 0;
+ // goto matchLabel;
TransferTextSpanPosToRunTextPos();
BrFar(matchLabel);
// needs to be reset, uncapturing it.
if (!isLastBranch)
{
+ // NextBranch:
+ // runtextpos = startingRunTextPos;
+ // textSpan = runtext.AsSpan(runtextpos, runtextend - runtextpos);
+ // while (base.Crawlpos() > startingCrawlpos) base.Uncapture();
MarkLabel(nextBranch);
Ldloc(startingRunTextPos);
Stloc(runtextposLocal);
// "doneLabel" to the label for this section. Thus, we only need to emit it if
// something can backtrack to us, which can't happen if we're inside of an atomic
// node. Thus, emit the backtracking section only if we're non-atomic.
- if (!isAtomic)
+ if (isAtomic)
+ {
+ doneLabel = originalDoneLabel;
+ }
+ else
{
doneLabel = backtrackLabel;
MarkLabel(backtrackLabel);
// switch (base.runstack[--runstackpos]) { ... } // branch number
EmitRunstackPop();
Stloc(startingRunTextPos);
- EmitRunstackPop();
- Stloc(startingCrawlpos!);
+ if (startingCrawlpos is not null)
+ {
+ EmitRunstackPop();
+ Stloc(startingCrawlpos);
+ }
EmitRunstackPop();
Switch(labelMap);
}
// Emits the code for an if(backreference)-then-else conditional.
void EmitBackreferenceConditional(RegexNode node)
{
+ bool isAtomic = node.IsAtomicByParent();
+
// We're branching in a complicated fashion. Make sure textSpanPos is 0.
TransferTextSpanPosToRunTextPos();
Label postIfDoneLabel = doneLabel;
if (postIfDoneLabel != originalDoneLabel)
{
+ // resumeAt = 0;
Ldc(0);
Stloc(resumeAt);
}
if (postIfDoneLabel != originalDoneLabel || hasNo)
{
+ // goto endRef;
BrFar(endRef);
}
postElseDoneLabel = doneLabel;
if (postElseDoneLabel != originalDoneLabel)
{
+ // resumeAt = 1;
Ldc(1);
Stloc(resumeAt);
}
// that will cause the backtracking to immediately pass through this node.
if (postIfDoneLabel != originalDoneLabel)
{
+ // resumeAt = 2;
Ldc(2);
Stloc(resumeAt);
}
}
- // If either the yes branch or the no branch contained backtracking, subsequent expressions
- // might try to backtrack to here, so output a backtracking map based on resumeAt.
- if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)
+ if (isAtomic)
+ {
+ doneLabel = originalDoneLabel;
+ }
+ else
{
- // Skip the backtracking section
- Br(endRef);
+ // If either the yes branch or the no branch contained backtracking, subsequent expressions
+ // might try to backtrack to here, so output a backtracking map based on resumeAt.
+ if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)
+ {
+ // Skip the backtracking section
+ // goto endRef;
+ Br(endRef);
- Label backtrack = DefineLabel();
- doneLabel = backtrack;
- MarkLabel(backtrack);
+ Label backtrack = DefineLabel();
+ doneLabel = backtrack;
+ MarkLabel(backtrack);
- // resumeAt = base.runstack[--runstackpos];
- EmitRunstackPop();
- Stloc(resumeAt);
+ // resumeAt = base.runstack[--runstackpos];
+ EmitRunstackPop();
+ Stloc(resumeAt);
- if (postIfDoneLabel != originalDoneLabel)
- {
- // if (resumeAt == 0) goto postIfDoneLabel;
- Ldloc(resumeAt);
- Ldc(0);
- BeqFar(postIfDoneLabel);
- }
+ if (postIfDoneLabel != originalDoneLabel)
+ {
+ // if (resumeAt == 0) goto postIfDoneLabel;
+ Ldloc(resumeAt);
+ Ldc(0);
+ BeqFar(postIfDoneLabel);
+ }
- if (postElseDoneLabel != originalDoneLabel)
- {
- // if (resumeAt == 1) goto postElseDoneLabel;
- Ldloc(resumeAt);
- Ldc(1);
- BeqFar(postElseDoneLabel);
- }
+ if (postElseDoneLabel != originalDoneLabel)
+ {
+ // if (resumeAt == 1) goto postElseDoneLabel;
+ Ldloc(resumeAt);
+ Ldc(1);
+ BeqFar(postElseDoneLabel);
+ }
- // goto originalDoneLabel;
- BrFar(originalDoneLabel);
+ // goto originalDoneLabel;
+ BrFar(originalDoneLabel);
+ }
}
if (postIfDoneLabel != originalDoneLabel || hasNo)
{
MarkLabel(endRef);
- if (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel)
+ if (!isAtomic && (postIfDoneLabel != originalDoneLabel || postElseDoneLabel != originalDoneLabel))
{
+ // if (runstackpos + 1 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2);
+ // base.runstack[runstackpos++] = resumeAt;
EmitRunstackResizeIfNeeded(1);
EmitRunstackPush(() => Ldloc(resumeAt));
}
// Emits the code for an if(expression)-then-else conditional.
void EmitExpressionConditional(RegexNode node)
{
+ bool isAtomic = node.IsAtomicByParent();
+
// We're branching in a complicated fashion. Make sure textSpanPos is 0.
TransferTextSpanPosToRunTextPos();
}
Label postConditionalDoneLabel = doneLabel;
- LocalBuilder resumeAt = DeclareInt32();
+ LocalBuilder? resumeAt = !isAtomic ? DeclareInt32() : null;
// If we get to this point of the code, the conditional successfully matched, so run the "yes" branch.
// Since the "yes" branch may have a different execution path than the "no" branch or the lack of
EmitNode(yesBranch);
TransferTextSpanPosToRunTextPos(); // ensure all subsequent code sees the same textSpanPos value by setting it to 0
Label postYesDoneLabel = doneLabel;
- if (postYesDoneLabel != originalDoneLabel)
+ if (resumeAt is not null && postYesDoneLabel != originalDoneLabel)
{
+ // resumeAt = 0;
Ldc(0);
Stloc(resumeAt);
}
if (postYesDoneLabel != originalDoneLabel || noBranch is not null)
{
+ // goto end;
BrFar(end);
}
MarkLabel(no);
if (startingCrawlPos is not null)
{
+ // while (base.Crawlpos() > startingCrawlPos) base.Uncapture();
EmitUncaptureUntil(startingCrawlPos);
}
postNoDoneLabel = doneLabel;
if (postNoDoneLabel != originalDoneLabel)
{
+ // goto end;
BrFar(end);
}
}
// There's only a yes branch. If it's going to cause us to output a backtracking
// label but code may not end up taking the yes branch path, we need to emit a resumeAt
// that will cause the backtracking to immediately pass through this node.
- if (postYesDoneLabel != originalDoneLabel)
+ if (resumeAt is not null && postYesDoneLabel != originalDoneLabel)
{
+ // resumeAt = 2;
Ldc(2);
Stloc(resumeAt);
}
}
- if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel)
+ if (isAtomic)
{
- // Skip the backtracking section.
- BrFar(end);
+ doneLabel = originalDoneLabel;
+ }
+ else
+ {
+ Debug.Assert(resumeAt is not null);
+ if (postYesDoneLabel != postConditionalDoneLabel || postNoDoneLabel != postConditionalDoneLabel)
+ {
+ // Skip the backtracking section.
+ BrFar(end);
- Label backtrack = DefineLabel();
- doneLabel = backtrack;
- MarkLabel(backtrack);
+ Label backtrack = DefineLabel();
+ doneLabel = backtrack;
+ MarkLabel(backtrack);
- if (postYesDoneLabel != postConditionalDoneLabel)
- {
- Ldloc(resumeAt);
- Ldc(0);
- BeqFar(postYesDoneLabel);
+ if (postYesDoneLabel != postConditionalDoneLabel)
+ {
+ // if (resumeAt == 0) goto postYesDoneLabel;
+ Ldloc(resumeAt);
+ Ldc(0);
+ BeqFar(postYesDoneLabel);
+ }
+
+ if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel)
+ {
+ // if (resumeAt == 1) goto postNoDoneLabel;
+ Ldloc(resumeAt);
+ Ldc(1);
+ BeqFar(postNoDoneLabel);
+ }
+
+ // goto postConditionalDoneLabel;
+ BrFar(postConditionalDoneLabel);
}
- if (postNoDoneLabel != postConditionalDoneLabel && postNoDoneLabel != originalDoneLabel)
+ if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel)
{
- Ldloc(resumeAt);
- Ldc(1);
- BeqFar(postNoDoneLabel);
+ // if (runstackpos + 1 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2);
+ // base.runstack[runstackpos++] = resumeAt;
+ EmitRunstackResizeIfNeeded(1);
+ EmitRunstackPush(() => Ldloc(resumeAt));
}
-
- BrFar(postConditionalDoneLabel);
- }
-
- if (postYesDoneLabel != originalDoneLabel || postNoDoneLabel != originalDoneLabel)
- {
- EmitRunstackResizeIfNeeded(1);
- EmitRunstackPush(() => Ldloc(resumeAt));
}
MarkLabel(end);
Debug.Assert(node.Type == RegexNode.Capture);
int capnum = RegexParser.MapCaptureNumber(node.M, _code!.Caps);
int uncapnum = RegexParser.MapCaptureNumber(node.N, _code.Caps);
+ bool isAtomic = node.IsAtomicByParent();
// runtextpos += textSpanPos;
// textSpan = textSpan.Slice(textSpanPos);
Call(s_transferCaptureMethod);
}
- if (childBacktracks || node.IsInLoop())
+ if (!isAtomic && (childBacktracks || node.IsInLoop()))
{
+ // if (runstackpos + 1 >= base.runstack.Length) Array.Resize(ref base.runstack, base.runstack.Length * 2);
+ // base.runstack[runstackpos++] = startingRunTextPos;
EmitRunstackResizeIfNeeded(1);
EmitRunstackPush(() => Ldloc(startingRunTextPos));
doneLabel = backtrack;
MarkLabel(end);
}
+ else
+ {
+ doneLabel = originalDoneLabel;
+ }
}
// Emits code to unwind the capture stack until the crawl position specified in the provided local.
Label originalDoneLabel = doneLabel;
// Save off runtextpos. We'll need to reset this upon successful completion of the lookahead.
+ // startingRunTextPos = runtextpos;
LocalBuilder startingRunTextPos = DeclareInt32();
Ldloc(runtextposLocal);
Stloc(startingRunTextPos);
// After the child completes successfully, reset the text positions.
// Do not reset captures, which persist beyond the lookahead.
+ // runtextpos = startingRunTextPos;
+ // textSpan = runtext.AsSpan(runtextpos, runtextend - runtextpos);
Ldloc(startingRunTextPos);
Stloc(runtextposLocal);
LoadTextSpanLocal();
Label originalDoneLabel = doneLabel;
// Save off runtextpos. We'll need to reset this upon successful completion of the lookahead.
+ // startingRunTextPos = runtextpos;
LocalBuilder startingRunTextPos = DeclareInt32();
Ldloc(runtextposLocal);
Stloc(startingRunTextPos);
// If the generated code ends up here, it matched the lookahead, which actually
// means failure for a _negative_ lookahead, so we need to jump to the original done.
+ // goto originalDoneLabel;
BrFar(originalDoneLabel);
// Failures (success for a negative lookahead) jump here.
}
// After the child completes in failure (success for negative lookahead), reset the text positions.
+ // runtextpos = startingRunTextPos;
Ldloc(startingRunTextPos);
Stloc(runtextposLocal);
LoadTextSpanLocal();
// Emits code for a concatenation
void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired)
{
+ // Emit the code for each child one after the other.
int childCount = node.ChildCount();
for (int i = 0; i < childCount; i++)
{
+ // If we can find a subsequence of fixed-length children, we can emit a length check once for that sequence
+ // and then skip the individual length checks for each.
if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd))
{
EmitSpanLengthCheck(requiredLength);
// Track the current runtextpos. Each time we backtrack, we'll reset to the stored position, which
// is also incremented each time we match another character in the loop.
+ // int startingRunTextPos = runtextpos;
LocalBuilder startingRunTextPos = DeclareInt32();
Ldloc(runtextposLocal);
Stloc(startingRunTextPos);
// Skip the backtracking section for the initial subsequent matching. We've already matched the
// minimum number of iterations, which means we can successfully match with zero additional iterations.
+ // goto endLoopLabel;
Label endLoopLabel = DefineLabel();
BrFar(endLoopLabel);
// are before this node, in which case this is wasted effort, but still functionally correct.
if (crawlPos is not null)
{
+ // while (base.Crawlpos() > crawlPos) base.Uncapture();
EmitUncaptureUntil(crawlPos);
}
int minIterations = node.M;
int maxIterations = node.N;
Label originalDoneLabel = doneLabel;
+ bool isAtomic = node.IsAtomicByParent();
// If this is actually an atomic lazy loop, we need to output just the minimum number of iterations,
// as nothing will backtrack into the lazy loop to get it progress further.
- if (node.IsAtomicByParent())
+ if (isAtomic)
{
switch (minIterations)
{
MarkLabel(endLoop);
- // Store the capture's state and skip the backtracking section
- EmitRunstackResizeIfNeeded(3);
- EmitRunstackPush(() => Ldloc(startingRunTextPos));
- EmitRunstackPush(() => Ldloc(iterationCount));
- EmitRunstackPush(() => Ldloc(sawEmpty));
- Label skipBacktrack = DefineLabel();
- BrFar(skipBacktrack);
+ if (!isAtomic)
+ {
+ // Store the capture's state and skip the backtracking section
+ EmitRunstackResizeIfNeeded(3);
+ EmitRunstackPush(() => Ldloc(startingRunTextPos));
+ EmitRunstackPush(() => Ldloc(iterationCount));
+ EmitRunstackPush(() => Ldloc(sawEmpty));
+ Label skipBacktrack = DefineLabel();
+ BrFar(skipBacktrack);
- // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
- Label backtrack = DefineLabel();
- MarkLabel(backtrack);
+ // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
+ Label backtrack = DefineLabel();
+ MarkLabel(backtrack);
- // sawEmpty = base.runstack[--runstackpos];
- // iterationCount = base.runstack[--runstackpos];
- // startingRunTextPos = base.runstack[--runstackpos];
- EmitRunstackPop();
- Stloc(sawEmpty);
- EmitRunstackPop();
- Stloc(iterationCount);
- EmitRunstackPop();
- Stloc(startingRunTextPos);
+ // sawEmpty = base.runstack[--runstackpos];
+ // iterationCount = base.runstack[--runstackpos];
+ // startingRunTextPos = base.runstack[--runstackpos];
+ EmitRunstackPop();
+ Stloc(sawEmpty);
+ EmitRunstackPop();
+ Stloc(iterationCount);
+ EmitRunstackPop();
+ Stloc(startingRunTextPos);
- if (maxIterations == int.MaxValue)
- {
- // if (sawEmpty != 0) goto doneLabel;
- Ldloc(sawEmpty);
- Ldc(0);
- BneFar(doneLabel);
- }
- else
- {
- // if (iterationCount >= maxIterations || sawEmpty != 0) goto doneLabel;
- Ldloc(iterationCount);
- Ldc(maxIterations);
- BgeFar(doneLabel);
- Ldloc(sawEmpty);
- Ldc(0);
- BneFar(doneLabel);
- }
+ if (maxIterations == int.MaxValue)
+ {
+ // if (sawEmpty != 0) goto doneLabel;
+ Ldloc(sawEmpty);
+ Ldc(0);
+ BneFar(doneLabel);
+ }
+ else
+ {
+ // if (iterationCount >= maxIterations || sawEmpty != 0) goto doneLabel;
+ Ldloc(iterationCount);
+ Ldc(maxIterations);
+ BgeFar(doneLabel);
+ Ldloc(sawEmpty);
+ Ldc(0);
+ BneFar(doneLabel);
+ }
- // goto body;
- BrFar(body);
+ // goto body;
+ BrFar(body);
- doneLabel = backtrack;
- MarkLabel(skipBacktrack);
+ doneLabel = backtrack;
+ MarkLabel(skipBacktrack);
+ }
}
// Emits the code to handle a loop (repeater) with a fixed number of iterations.
Debug.Assert(node.N >= node.M, $"Unexpected M={node.M}, N={node.N}");
int minIterations = node.M;
int maxIterations = node.N;
+ bool isAtomic = node.IsAtomicByParent();
// We might loop any number of times. In order to ensure this loop and subsequent code sees textSpanPos
// the same regardless, we always need it to contain the same value, and the easiest such value is 0.
// int poppedCrawlPos = base.runstack[--runstackpos];
// while (base.Crawlpos() > poppedCrawlPos) base.Uncapture();
using RentedLocalBuilder poppedCrawlPos = RentInt32Local();
- EmitRunstackPop();
- Stloc(poppedCrawlPos);
- EmitUncaptureUntil(poppedCrawlPos);
- }
- LoadTextSpanLocal();
-
- if (minIterations > 0)
- {
- // if (iterationCount == 0) goto originalDoneLabel;
- Ldloc(iterationCount);
- Ldc(0);
- BeqFar(originalDoneLabel);
-
- // if (iterationCount < minIterations) goto doneLabel/originalDoneLabel;
- Ldloc(iterationCount);
- Ldc(minIterations);
- BltFar(childBacktracks ? doneLabel : originalDoneLabel);
- }
-
- if (childBacktracks)
- {
- // goto endLoop;
- BrFar(endLoop);
-
- // Backtrack:
- Label backtrack = DefineLabel();
- MarkLabel(backtrack);
-
- // if (iterationCount == 0) goto originalDoneLabel;
- Ldloc(iterationCount);
- Ldc(0);
- BeqFar(originalDoneLabel);
-
- // goto doneLabel;
- BrFar(doneLabel);
-
- doneLabel = backtrack;
- }
-
- MarkLabel(endLoop);
-
- if (node.IsInLoop())
- {
- // Store the capture's state
- EmitRunstackResizeIfNeeded(3);
- EmitRunstackPush(() => Ldloc(startingRunTextPos));
- EmitRunstackPush(() => Ldloc(iterationCount));
-
- // Skip past the backtracking section
- // goto end;
- Label end = DefineLabel();
- BrFar(end);
-
- // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
- Label backtrack = DefineLabel();
- MarkLabel(backtrack);
-
- // iterationCount = base.runstack[--runstack];
- // startingRunTextPos = base.runstack[--runstack];
- EmitRunstackPop();
- Stloc(iterationCount);
- EmitRunstackPop();
- Stloc(startingRunTextPos);
-
- // goto doneLabel;
- BrFar(doneLabel);
-
- doneLabel = backtrack;
- MarkLabel(end);
- }
- }
-
- void EmitRunstackResizeIfNeeded(int count)
- {
- Debug.Assert(count >= 1);
-
- // if (runstackpos >= base.runstack!.Length - (count - 1))
- // {
- // Array.Resize(ref base.runstack, base.runstack.Length * 2);
- // }
-
- Label skipResize = DefineLabel();
-
- Ldloc(runstackpos);
- Ldthisfld(s_runstackField);
- Ldlen();
- if (count > 1)
- {
- Ldc(count - 1);
- Sub();
- }
- Blt(skipResize);
-
- Ldthis();
- _ilg!.Emit(OpCodes.Ldflda, s_runstackField);
- Ldthisfld(s_runstackField);
- Ldlen();
- Ldc(2);
- Mul();
- Call(s_arrayResize);
-
- MarkLabel(skipResize);
- }
-
- void EmitRunstackPush(Action load)
- {
- // base.runstack[runstackpos] = load();
- Ldthisfld(s_runstackField);
- Ldloc(runstackpos);
- load();
- StelemI4();
-
- // runstackpos++;
- Ldloc(runstackpos);
- Ldc(1);
- Add();
- Stloc(runstackpos);
- }
-
- void EmitRunstackPop()
- {
- // ... = base.runstack[--runstackpos];
- Ldthisfld(s_runstackField);
- Ldloc(runstackpos);
- Ldc(1);
- Sub();
- Stloc(runstackpos);
- Ldloc(runstackpos);
- LdelemI4();
- }
- }
-
- /// <summary>Generates the code for "RegexRunner.Go".</summary>
- protected void GenerateGo()
- {
- Debug.Assert(_code != null);
- _int32LocalsPool?.Clear();
- _readOnlySpanCharLocalsPool?.Clear();
-
- // Generate simpler code when we're dealing with simpler regexes.
- if (TryGenerateSimplifiedGo(_code.Tree.Root))
- {
- return;
- }
-
- // We're dealing with a regex more complicated that the fast-path non-backtracking
- // implementation can handle. Do the full-fledged thing.
-
- // declare some locals
-
- _runtextposLocal = DeclareInt32();
- _runtextLocal = DeclareString();
- _runtrackposLocal = DeclareInt32();
- _runtrackLocal = DeclareInt32Array();
- _runstackposLocal = DeclareInt32();
- _runstackLocal = DeclareInt32Array();
- if (_hasTimeout)
- {
- _loopTimeoutCounterLocal = DeclareInt32();
- }
- _runtextbegLocal = DeclareInt32();
- _runtextendLocal = DeclareInt32();
-
- InitializeCultureForGoIfNecessary();
-
- // clear some tables
-
- _labels = null;
- _notes = null;
- _notecount = 0;
-
- // globally used labels
-
- _backtrack = DefineLabel();
-
- // emit the code!
-
- GenerateForwardSection();
- GenerateMiddleSection();
- GenerateBacktrackSection();
- }
-
- private void InitializeCultureForGoIfNecessary()
- {
- _textInfoLocal = null;
- if ((_options & RegexOptions.CultureInvariant) == 0)
- {
- bool needsCulture = (_options & RegexOptions.IgnoreCase) != 0;
- if (!needsCulture)
- {
- for (int codepos = 0; codepos < _codes!.Length; codepos += RegexCode.OpcodeSize(_codes[codepos]))
- {
- if ((_codes[codepos] & RegexCode.Ci) == RegexCode.Ci)
- {
- needsCulture = true;
- break;
- }
- }
- }
-
- if (needsCulture)
- {
- // cache CultureInfo in local variable which saves excessive thread local storage accesses
- _textInfoLocal = DeclareTextInfo();
- InitLocalCultureInfo();
- }
- }
- }
-
- /// <summary>
- /// The main translation function. It translates the logic for a single opcode at
- /// the current position. The structure of this function exactly mirrors
- /// the structure of the inner loop of RegexInterpreter.Go().
- /// </summary>
- /// <remarks>
- /// The C# code from RegexInterpreter.Go() that corresponds to each case is
- /// included as a comment.
- ///
- /// Note that since we're generating code, we can collapse many cases that are
- /// dealt with one-at-a-time in RegexIntepreter. We can also unroll loops that
- /// iterate over constant strings or sets.
- /// </remarks>
- private void GenerateOneCode()
- {
-#if DEBUG
- if ((_options & RegexOptions.Debug) != 0)
- DumpBacktracking();
-#endif
-
- // Before executing any RegEx code in the unrolled loop,
- // we try checking for the match timeout:
-
- if (_hasTimeout)
- {
- Ldthis();
- Call(s_checkTimeoutMethod);
- }
-
- // Now generate the IL for the RegEx code saved in _regexopcode.
- // We unroll the loop done by the RegexCompiler creating as very long method
- // that is longer if the pattern is longer:
-
- switch (_regexopcode)
- {
- case RegexCode.Stop:
- //: return;
- Mvlocfld(_runtextposLocal!, s_runtextposField); // update _textpos
- Ret();
- break;
-
- case RegexCode.Nothing:
- //: break Backward;
- Back();
- break;
-
- case RegexCode.UpdateBumpalong:
- // UpdateBumpalong should only exist in the code stream at such a point where the root
- // of the backtracking stack contains the runtextpos from the start of this Go call. Replace
- // that tracking value with the current runtextpos value.
- //: base.runtrack[base.runtrack.Length - 1] = runtextpos;
- Ldloc(_runtrackLocal!);
- Dup();
- Ldlen();
- Ldc(1);
- Sub();
- Ldloc(_runtextposLocal!);
- StelemI4();
- break;
-
- case RegexCode.Goto:
- //: Goto(Operand(0));
- Goto(Operand(0));
- break;
-
- case RegexCode.Testref:
- //: if (!_match.IsMatched(Operand(0)))
- //: break Backward;
- Ldthis();
- Ldc(Operand(0));
- Call(s_isMatchedMethod);
- BrfalseFar(_backtrack);
- break;
-
- case RegexCode.Lazybranch:
- //: Track(Textpos());
- PushTrack(_runtextposLocal!);
- Track();
- break;
-
- case RegexCode.Lazybranch | RegexCode.Back:
- //: Trackframe(1);
- //: Textto(Tracked(0));
- //: Goto(Operand(0));
- PopTrack();
- Stloc(_runtextposLocal!);
- Goto(Operand(0));
- break;
-
- case RegexCode.Nullmark:
- //: Stack(-1);
- //: Track();
- ReadyPushStack();
- Ldc(-1);
- DoPush();
- TrackUnique(Stackpop);
- break;
-
- case RegexCode.Setmark:
- //: Stack(Textpos());
- //: Track();
- PushStack(_runtextposLocal!);
- TrackUnique(Stackpop);
- break;
-
- case RegexCode.Nullmark | RegexCode.Back:
- case RegexCode.Setmark | RegexCode.Back:
- //: Stackframe(1);
- //: break Backward;
- PopDiscardStack();
- Back();
- break;
-
- case RegexCode.Getmark:
- //: Stackframe(1);
- //: Track(Stacked(0));
- //: Textto(Stacked(0));
- ReadyPushTrack();
- PopStack();
- Stloc(_runtextposLocal!);
- Ldloc(_runtextposLocal!);
- DoPush();
-
- Track();
- break;
-
- case RegexCode.Getmark | RegexCode.Back:
- //: Trackframe(1);
- //: Stack(Tracked(0));
- //: break Backward;
- ReadyPushStack();
- PopTrack();
- DoPush();
- Back();
- break;
-
- case RegexCode.Capturemark:
- //: if (!IsMatched(Operand(1)))
- //: break Backward;
- //: Stackframe(1);
- //: if (Operand(1) != -1)
- //: TransferCapture(Operand(0), Operand(1), Stacked(0), Textpos());
- //: else
- //: Capture(Operand(0), Stacked(0), Textpos());
- //: Track(Stacked(0));
-
- //: Stackframe(1);
- //: Capture(Operand(0), Stacked(0), Textpos());
- //: Track(Stacked(0));
-
- if (Operand(1) != -1)
- {
- Ldthis();
- Ldc(Operand(1));
- Call(s_isMatchedMethod);
- BrfalseFar(_backtrack);
- }
-
- using (RentedLocalBuilder stackedLocal = RentInt32Local())
- {
- PopStack();
- Stloc(stackedLocal);
-
- if (Operand(1) != -1)
- {
- Ldthis();
- Ldc(Operand(0));
- Ldc(Operand(1));
- Ldloc(stackedLocal);
- Ldloc(_runtextposLocal!);
- Call(s_transferCaptureMethod);
- }
- else
- {
- Ldthis();
- Ldc(Operand(0));
- Ldloc(stackedLocal);
- Ldloc(_runtextposLocal!);
- Call(s_captureMethod);
- }
-
- PushTrack(stackedLocal);
- }
-
- TrackUnique(Operand(0) != -1 && Operand(1) != -1 ? Capback2 : Capback);
- break;
-
-
- case RegexCode.Capturemark | RegexCode.Back:
- //: Trackframe(1);
- //: Stack(Tracked(0));
- //: Uncapture();
- //: if (Operand(0) != -1 && Operand(1) != -1)
- //: Uncapture();
- //: break Backward;
- ReadyPushStack();
- PopTrack();
- DoPush();
- Ldthis();
- Call(s_uncaptureMethod);
- if (Operand(0) != -1 && Operand(1) != -1)
- {
- Ldthis();
- Call(s_uncaptureMethod);
- }
- Back();
- break;
-
- case RegexCode.Branchmark:
- //: Stackframe(1);
- //:
- //: if (Textpos() != Stacked(0))
- //: { // Nonempty match -> loop now
- //: Track(Stacked(0), Textpos()); // Save old mark, textpos
- //: Stack(Textpos()); // Make new mark
- //: Goto(Operand(0)); // Loop
- //: }
- //: else
- //: { // Empty match -> straight now
- //: Track2(Stacked(0)); // Save old mark
- //: Advance(1); // Straight
- //: }
- //: continue Forward;
- {
- Label l1 = DefineLabel();
-
- PopStack();
- using (RentedLocalBuilder mark = RentInt32Local())
- {
- Stloc(mark); // Stacked(0) -> temp
- PushTrack(mark);
- Ldloc(mark);
- }
- Ldloc(_runtextposLocal!);
- Beq(l1); // mark == textpos -> branch
-
- // (matched != 0)
-
- PushTrack(_runtextposLocal!);
- PushStack(_runtextposLocal!);
- Track();
- Goto(Operand(0)); // Goto(Operand(0))
-
- // else
-
- MarkLabel(l1);
- TrackUnique2(Branchmarkback2);
- break;
- }
-
- case RegexCode.Branchmark | RegexCode.Back:
- //: Trackframe(2);
- //: Stackframe(1);
- //: Textto(Tracked(1)); // Recall position
- //: Track2(Tracked(0)); // Save old mark
- //: Advance(1);
- PopTrack();
- Stloc(_runtextposLocal!);
- PopStack();
- Pop();
- // track spot 0 is already in place
- TrackUnique2(Branchmarkback2);
- Advance();
- break;
-
- case RegexCode.Branchmark | RegexCode.Back2:
- //: Trackframe(1);
- //: Stack(Tracked(0)); // Recall old mark
- //: break Backward; // Backtrack
- ReadyPushStack();
- PopTrack();
- DoPush();
- Back();
- break;
-
- case RegexCode.Lazybranchmark:
- //: StackPop();
- //: int oldMarkPos = StackPeek();
- //:
- //: if (Textpos() != oldMarkPos) { // Nonempty match -> next loop
- //: { // Nonempty match -> next loop
- //: if (oldMarkPos != -1)
- //: Track(Stacked(0), Textpos()); // Save old mark, textpos
- //: else
- //: TrackPush(Textpos(), Textpos());
- //: }
- //: else
- //: { // Empty match -> no loop
- //: Track2(Stacked(0)); // Save old mark
- //: }
- //: Advance(1);
- //: continue Forward;
- {
- using (RentedLocalBuilder mark = RentInt32Local())
- {
- PopStack();
- Stloc(mark); // Stacked(0) -> temp
-
- // if (oldMarkPos != -1)
- Label l2 = DefineLabel();
- Label l3 = DefineLabel();
- Ldloc(mark);
- Ldc(-1);
- Beq(l2); // mark == -1 -> branch
- PushTrack(mark);
- Br(l3);
- // else
- MarkLabel(l2);
- PushTrack(_runtextposLocal!);
- MarkLabel(l3);
-
- // if (Textpos() != mark)
- Label l1 = DefineLabel();
- Ldloc(_runtextposLocal!);
- Ldloc(mark);
- Beq(l1); // mark == textpos -> branch
- PushTrack(_runtextposLocal!);
- Track();
- Br(AdvanceLabel()); // Advance (near)
- // else
- MarkLabel(l1);
- ReadyPushStack(); // push the current textPos on the stack.
- // May be ignored by 'back2' or used by a true empty match.
- Ldloc(mark);
- }
-
- DoPush();
- TrackUnique2(Lazybranchmarkback2);
-
- break;
- }
-
- case RegexCode.Lazybranchmark | RegexCode.Back:
- //: Trackframe(2);
- //: Track2(Tracked(0)); // Save old mark
- //: Stack(Textpos()); // Make new mark
- //: Textto(Tracked(1)); // Recall position
- //: Goto(Operand(0)); // Loop
-
- PopTrack();
- Stloc(_runtextposLocal!);
- PushStack(_runtextposLocal!);
- TrackUnique2(Lazybranchmarkback2);
- Goto(Operand(0));
- break;
-
- case RegexCode.Lazybranchmark | RegexCode.Back2:
- //: Stackframe(1);
- //: Trackframe(1);
- //: Stack(Tracked(0)); // Recall old mark
- //: break Backward;
- ReadyReplaceStack(0);
- PopTrack();
- DoReplace();
- Back();
- break;
-
- case RegexCode.Nullcount:
- //: Stack(-1, Operand(0));
- //: Track();
- ReadyPushStack();
- Ldc(-1);
- DoPush();
- ReadyPushStack();
- Ldc(Operand(0));
- DoPush();
- TrackUnique(Stackpop2);
- break;
-
- case RegexCode.Setcount:
- //: Stack(Textpos(), Operand(0));
- //: Track();
- PushStack(_runtextposLocal!);
- ReadyPushStack();
- Ldc(Operand(0));
- DoPush();
- TrackUnique(Stackpop2);
- break;
-
- case RegexCode.Nullcount | RegexCode.Back:
- case RegexCode.Setcount | RegexCode.Back:
- //: Stackframe(2);
- //: break Backward;
- PopDiscardStack(2);
- Back();
- break;
-
- case RegexCode.Branchcount:
- //: Stackframe(2);
- //: int mark = Stacked(0);
- //: int count = Stacked(1);
- //:
- //: if (count >= Operand(1) || Textpos() == mark && count >= 0)
- //: { // Max loops or empty match -> straight now
- //: Track2(mark, count); // Save old mark, count
- //: Advance(2); // Straight
- //: }
- //: else
- //: { // Nonempty match -> count+loop now
- //: Track(mark); // remember mark
- //: Stack(Textpos(), count + 1); // Make new mark, incr count
- //: Goto(Operand(0)); // Loop
- //: }
- //: continue Forward;
- {
- using (RentedLocalBuilder count = RentInt32Local())
- {
- PopStack();
- Stloc(count); // count -> temp
- PopStack();
- using (RentedLocalBuilder mark = RentInt32Local())
- {
- Stloc(mark); // mark -> temp2
- PushTrack(mark);
- Ldloc(mark);
- }
-
- Label l1 = DefineLabel();
- Label l2 = DefineLabel();
- Ldloc(_runtextposLocal!);
- Bne(l1); // mark != textpos -> l1
- Ldloc(count);
- Ldc(0);
- Bge(l2); // count >= 0 && mark == textpos -> l2
-
- MarkLabel(l1);
- Ldloc(count);
- Ldc(Operand(1));
- Bge(l2); // count >= Operand(1) -> l2
-
- // else
- PushStack(_runtextposLocal!);
- ReadyPushStack();
- Ldloc(count); // mark already on track
- Ldc(1);
- Add();
- DoPush();
- Track();
- Goto(Operand(0));
-
- // if (count >= Operand(1) || Textpos() == mark)
- MarkLabel(l2);
- PushTrack(count); // mark already on track
- }
- TrackUnique2(Branchcountback2);
- break;
- }
-
- case RegexCode.Branchcount | RegexCode.Back:
- //: Trackframe(1);
- //: Stackframe(2);
- //: if (Stacked(1) > 0) // Positive -> can go straight
- //: {
- //: Textto(Stacked(0)); // Zap to mark
- //: Track2(Tracked(0), Stacked(1) - 1); // Save old mark, old count
- //: Advance(2); // Straight
- //: continue Forward;
- //: }
- //: Stack(Tracked(0), Stacked(1) - 1); // recall old mark, old count
- //: break Backward;
- {
- using (RentedLocalBuilder count = RentInt32Local())
- {
- Label l1 = DefineLabel();
- PopStack();
- Ldc(1);
- Sub();
- Stloc(count);
- Ldloc(count);
- Ldc(0);
- Blt(l1);
-
- // if (count >= 0)
- PopStack();
- Stloc(_runtextposLocal!);
- PushTrack(count); // Tracked(0) is already on the track
- TrackUnique2(Branchcountback2);
- Advance();
-
- // else
- MarkLabel(l1);
- ReadyReplaceStack(0);
- PopTrack();
- DoReplace();
- PushStack(count);
- }
- Back();
- break;
- }
-
- case RegexCode.Branchcount | RegexCode.Back2:
- //: Trackframe(2);
- //: Stack(Tracked(0), Tracked(1)); // Recall old mark, old count
- //: break Backward; // Backtrack
-
- PopTrack();
- using (RentedLocalBuilder tmp = RentInt32Local())
- {
- Stloc(tmp);
- ReadyPushStack();
- PopTrack();
- DoPush();
- PushStack(tmp);
- }
- Back();
- break;
-
- case RegexCode.Lazybranchcount:
- //: Stackframe(2);
- //: int mark = Stacked(0);
- //: int count = Stacked(1);
- //:
- //: if (count < 0)
- //: { // Negative count -> loop now
- //: Track2(mark); // Save old mark
- //: Stack(Textpos(), count + 1); // Make new mark, incr count
- //: Goto(Operand(0)); // Loop
- //: }
- //: else
- //: { // Nonneg count or empty match -> straight now
- //: Track(mark, count, Textpos()); // Save mark, count, position
- //: }
- {
- PopStack();
- using (RentedLocalBuilder count = RentInt32Local())
- {
- Stloc(count); // count -> temp
- PopStack();
- using (RentedLocalBuilder mark = RentInt32Local())
- {
- Stloc(mark); // mark -> temp2
-
- Label l1 = DefineLabel();
- Ldloc(count);
- Ldc(0);
- Bge(l1); // count >= 0 -> l1
-
- // if (count < 0)
- PushTrack(mark);
- PushStack(_runtextposLocal!);
- ReadyPushStack();
- Ldloc(count);
- Ldc(1);
- Add();
- DoPush();
- TrackUnique2(Lazybranchcountback2);
- Goto(Operand(0));
-
- // else
- MarkLabel(l1);
- PushTrack(mark);
- }
- PushTrack(count);
- }
- PushTrack(_runtextposLocal!);
- Track();
- break;
- }
-
- case RegexCode.Lazybranchcount | RegexCode.Back:
- //: Trackframe(3);
- //: int mark = Tracked(0);
- //: int textpos = Tracked(2);
- //: if (Tracked(1) < Operand(1) && textpos != mark)
- //: { // Under limit and not empty match -> loop
- //: Textto(Tracked(2)); // Recall position
- //: Stack(Textpos(), Tracked(1) + 1); // Make new mark, incr count
- //: Track2(Tracked(0)); // Save old mark
- //: Goto(Operand(0)); // Loop
- //: continue Forward;
- //: }
- //: else
- //: {
- //: Stack(Tracked(0), Tracked(1)); // Recall old mark, count
- //: break Backward; // backtrack
- //: }
- {
- using (RentedLocalBuilder cLocal = RentInt32Local())
- {
- Label l1 = DefineLabel();
-
- PopTrack();
- Stloc(_runtextposLocal!);
- PopTrack();
- Stloc(cLocal);
- Ldloc(cLocal);
- Ldc(Operand(1));
- Bge(l1); // Tracked(1) >= Operand(1) -> l1
-
- Ldloc(_runtextposLocal!);
- TopTrack();
- Beq(l1); // textpos == mark -> l1
-
- PushStack(_runtextposLocal!);
- ReadyPushStack();
- Ldloc(cLocal);
- Ldc(1);
- Add();
- DoPush();
- TrackUnique2(Lazybranchcountback2);
- Goto(Operand(0));
-
- MarkLabel(l1);
- ReadyPushStack();
- PopTrack();
- DoPush();
- PushStack(cLocal);
- }
- Back();
- break;
- }
-
- case RegexCode.Lazybranchcount | RegexCode.Back2:
- // <
- ReadyReplaceStack(1);
- PopTrack();
- DoReplace();
- ReadyReplaceStack(0);
- TopStack();
- Ldc(1);
- Sub();
- DoReplace();
- Back();
- break;
-
- case RegexCode.Setjump:
- //: Stack(Trackpos(), Crawlpos());
- //: Track();
- ReadyPushStack();
- Ldthisfld(s_runtrackField);
- Ldlen();
- Ldloc(_runtrackposLocal!);
- Sub();
- DoPush();
- ReadyPushStack();
- Ldthis();
- Call(s_crawlposMethod);
- DoPush();
- TrackUnique(Stackpop2);
- break;
-
- case RegexCode.Setjump | RegexCode.Back:
- //: Stackframe(2);
- PopDiscardStack(2);
- Back();
- break;
-
- case RegexCode.Backjump:
- //: Stackframe(2);
- //: Trackto(Stacked(0));
- //: while (Crawlpos() != Stacked(1))
- //: Uncapture();
- //: break Backward;
- {
- Label l1 = DefineLabel();
- Label l2 = DefineLabel();
-
- using (RentedLocalBuilder stackedLocal = RentInt32Local())
- {
- PopStack();
- Stloc(stackedLocal);
- Ldthisfld(s_runtrackField);
- Ldlen();
- PopStack();
- Sub();
- Stloc(_runtrackposLocal!);
-
- MarkLabel(l1);
- Ldthis();
- Call(s_crawlposMethod);
- Ldloc(stackedLocal);
- Beq(l2);
- Ldthis();
- Call(s_uncaptureMethod);
- Br(l1);
- }
-
- MarkLabel(l2);
- Back();
- break;
- }
-
- case RegexCode.Forejump:
- //: Stackframe(2);
- //: Trackto(Stacked(0));
- //: Track(Stacked(1));
- PopStack();
- using (RentedLocalBuilder tmp = RentInt32Local())
- {
- Stloc(tmp);
- Ldthisfld(s_runtrackField);
- Ldlen();
- PopStack();
- Sub();
- Stloc(_runtrackposLocal!);
- PushTrack(tmp);
- }
- TrackUnique(Forejumpback);
- break;
-
- case RegexCode.Forejump | RegexCode.Back:
- //: Trackframe(1);
- //: while (Crawlpos() != Tracked(0))
- //: Uncapture();
- //: break Backward;
- {
- Label l1 = DefineLabel();
- Label l2 = DefineLabel();
-
- using (RentedLocalBuilder trackedLocal = RentInt32Local())
- {
- PopTrack();
- Stloc(trackedLocal);
-
- MarkLabel(l1);
- Ldthis();
- Call(s_crawlposMethod);
- Ldloc(trackedLocal);
- Beq(l2);
- Ldthis();
- Call(s_uncaptureMethod);
- Br(l1);
- }
-
- MarkLabel(l2);
- Back();
- break;
- }
-
- case RegexCode.Bol:
- //: if (Leftchars() > 0 && CharAt(Textpos() - 1) != '\n')
- //: break Backward;
- {
- Label l1 = _labels![NextCodepos()];
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- Ble(l1);
- Leftchar();
- Ldc('\n');
- BneFar(_backtrack);
- break;
- }
-
- case RegexCode.Eol:
- //: if (Rightchars() > 0 && CharAt(Textpos()) != '\n')
- //: break Backward;
- {
- Label l1 = _labels![NextCodepos()];
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextendLocal!);
- Bge(l1);
- Rightchar();
- Ldc('\n');
- BneFar(_backtrack);
- break;
- }
-
- case RegexCode.Boundary:
- case RegexCode.NonBoundary:
- //: if (!IsBoundary(Textpos(), _textbeg, _textend))
- //: break Backward;
- Ldthis();
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- Ldloc(_runtextendLocal!);
- Call(s_isBoundaryMethod);
- if (Code() == RegexCode.Boundary)
- {
- BrfalseFar(_backtrack);
- }
- else
- {
- BrtrueFar(_backtrack);
- }
- break;
-
- case RegexCode.ECMABoundary:
- case RegexCode.NonECMABoundary:
- //: if (!IsECMABoundary(Textpos(), _textbeg, _textend))
- //: break Backward;
- Ldthis();
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- Ldloc(_runtextendLocal!);
- Call(s_isECMABoundaryMethod);
- if (Code() == RegexCode.ECMABoundary)
- {
- BrfalseFar(_backtrack);
- }
- else
- {
- BrtrueFar(_backtrack);
- }
- break;
-
- case RegexCode.Beginning:
- //: if (Leftchars() > 0)
- //: break Backward;
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- BgtFar(_backtrack);
- break;
-
- case RegexCode.Start:
- //: if (Textpos() != Textstart())
- //: break Backward;
- Ldloc(_runtextposLocal!);
- Ldthisfld(s_runtextstartField);
- BneFar(_backtrack);
- break;
-
- case RegexCode.EndZ:
- //: if (Rightchars() > 1 || Rightchars() == 1 && CharAt(Textpos()) != '\n')
- //: break Backward;
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextendLocal!);
- Ldc(1);
- Sub();
- BltFar(_backtrack);
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextendLocal!);
- Bge(_labels![NextCodepos()]);
- Rightchar();
- Ldc('\n');
- BneFar(_backtrack);
- break;
-
- case RegexCode.End:
- //: if (Rightchars() > 0)
- //: break Backward;
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextendLocal!);
- BltFar(_backtrack);
- break;
-
- case RegexCode.One:
- case RegexCode.Notone:
- case RegexCode.Set:
- case RegexCode.One | RegexCode.Rtl:
- case RegexCode.Notone | RegexCode.Rtl:
- case RegexCode.Set | RegexCode.Rtl:
- case RegexCode.One | RegexCode.Ci:
- case RegexCode.Notone | RegexCode.Ci:
- case RegexCode.Set | RegexCode.Ci:
- case RegexCode.One | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notone | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Set | RegexCode.Ci | RegexCode.Rtl:
-
- //: if (Rightchars() < 1 || Rightcharnext() != (char)Operand(0))
- //: break Backward;
-
- Ldloc(_runtextposLocal!);
-
- if (!IsRightToLeft())
- {
- Ldloc(_runtextendLocal!);
- BgeFar(_backtrack);
- Rightcharnext();
- }
- else
- {
- Ldloc(_runtextbegLocal!);
- BleFar(_backtrack);
- Leftcharnext();
- }
-
- if (Code() == RegexCode.Set)
- {
- EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive());
- BrfalseFar(_backtrack);
- }
- else
- {
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
-
- Ldc(Operand(0));
- if (Code() == RegexCode.One)
- {
- BneFar(_backtrack);
- }
- else
- {
- BeqFar(_backtrack);
- }
- }
- break;
-
- case RegexCode.Multi:
- case RegexCode.Multi | RegexCode.Ci:
- //: String Str = _strings[Operand(0)];
- //: int i, c;
- //: if (Rightchars() < (c = Str.Length))
- //: break Backward;
- //: for (i = 0; c > 0; i++, c--)
- //: if (Str[i] != Rightcharnext())
- //: break Backward;
- {
- string str = _strings![Operand(0)];
-
- Ldc(str.Length);
- Ldloc(_runtextendLocal!);
- Ldloc(_runtextposLocal!);
- Sub();
- BgtFar(_backtrack);
-
- // unroll the string
- for (int i = 0; i < str.Length; i++)
- {
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- if (i != 0)
- {
- Ldc(i);
- Add();
- }
- Call(s_stringGetCharsMethod);
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
-
- Ldc(str[i]);
- BneFar(_backtrack);
- }
-
- Ldloc(_runtextposLocal!);
- Ldc(str.Length);
- Add();
- Stloc(_runtextposLocal!);
- break;
- }
-
- case RegexCode.Multi | RegexCode.Rtl:
- case RegexCode.Multi | RegexCode.Ci | RegexCode.Rtl:
- //: String Str = _strings[Operand(0)];
- //: int c;
- //: if (Leftchars() < (c = Str.Length))
- //: break Backward;
- //: while (c > 0)
- //: if (Str[--c] != Leftcharnext())
- //: break Backward;
- {
- string str = _strings![Operand(0)];
-
- Ldc(str.Length);
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- Sub();
- BgtFar(_backtrack);
-
- // unroll the string
- for (int i = str.Length; i > 0;)
- {
- i--;
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Ldc(str.Length - i);
- Sub();
- Call(s_stringGetCharsMethod);
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
- Ldc(str[i]);
- BneFar(_backtrack);
- }
-
- Ldloc(_runtextposLocal!);
- Ldc(str.Length);
- Sub();
- Stloc(_runtextposLocal!);
-
- break;
- }
-
- case RegexCode.Ref:
- case RegexCode.Ref | RegexCode.Rtl:
- case RegexCode.Ref | RegexCode.Ci:
- case RegexCode.Ref | RegexCode.Ci | RegexCode.Rtl:
- //: int capnum = Operand(0);
- //: int j, c;
- //: if (!_match.IsMatched(capnum)) {
- //: if (!RegexOptions.ECMAScript)
- //: break Backward;
- //: } else {
- //: if (Rightchars() < (c = _match.MatchLength(capnum)))
- //: break Backward;
- //: for (j = _match.MatchIndex(capnum); c > 0; j++, c--)
- //: if (CharAt(j) != Rightcharnext())
- //: break Backward;
- //: }
- {
- using RentedLocalBuilder lenLocal = RentInt32Local();
- using RentedLocalBuilder indexLocal = RentInt32Local();
- Label l1 = DefineLabel();
-
- Ldthis();
- Ldc(Operand(0));
- Call(s_isMatchedMethod);
- if ((_options & RegexOptions.ECMAScript) != 0)
- {
- Brfalse(AdvanceLabel());
- }
- else
- {
- BrfalseFar(_backtrack); // !IsMatched() -> back
- }
-
- Ldthis();
- Ldc(Operand(0));
- Call(s_matchLengthMethod);
- Stloc(lenLocal);
- Ldloc(lenLocal);
- if (!IsRightToLeft())
- {
- Ldloc(_runtextendLocal!);
- Ldloc(_runtextposLocal!);
- }
- else
- {
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- }
- Sub();
- BgtFar(_backtrack); // Matchlength() > Rightchars() -> back
-
- Ldthis();
- Ldc(Operand(0));
- Call(s_matchIndexMethod);
- if (!IsRightToLeft())
- {
- Ldloc(lenLocal);
- Add(IsRightToLeft());
- }
- Stloc(indexLocal); // index += len
-
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Add(IsRightToLeft());
- Stloc(_runtextposLocal!); // texpos += len
-
- MarkLabel(l1);
- Ldloc(lenLocal);
- Ldc(0);
- Ble(AdvanceLabel());
- Ldloc(_runtextLocal!);
- Ldloc(indexLocal);
- Ldloc(lenLocal);
- if (IsRightToLeft())
- {
- Ldc(1);
- Sub();
- Stloc(lenLocal);
- Ldloc(lenLocal);
- }
- Sub(IsRightToLeft());
- Call(s_stringGetCharsMethod);
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
-
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- if (!IsRightToLeft())
- {
- Ldloc(lenLocal);
- Ldc(1);
- Sub();
- Stloc(lenLocal);
- }
- Sub(IsRightToLeft());
- Call(s_stringGetCharsMethod);
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
-
- Beq(l1);
- Back();
- break;
- }
-
- case RegexCode.Onerep:
- case RegexCode.Notonerep:
- case RegexCode.Setrep:
- case RegexCode.Onerep | RegexCode.Rtl:
- case RegexCode.Notonerep | RegexCode.Rtl:
- case RegexCode.Setrep | RegexCode.Rtl:
- case RegexCode.Onerep | RegexCode.Ci:
- case RegexCode.Notonerep | RegexCode.Ci:
- case RegexCode.Setrep | RegexCode.Ci:
- case RegexCode.Onerep | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notonerep | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setrep | RegexCode.Ci | RegexCode.Rtl:
- //: int c = Operand(1);
- //: if (Rightchars() < c)
- //: break Backward;
- //: char ch = (char)Operand(0);
- //: while (c-- > 0)
- //: if (Rightcharnext() != ch)
- //: break Backward;
- {
- int c = Operand(1);
- if (c == 0)
- break;
-
- Ldc(c);
- if (!IsRightToLeft())
- {
- Ldloc(_runtextendLocal!);
- Ldloc(_runtextposLocal!);
- }
- else
- {
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- }
- Sub();
- BgtFar(_backtrack); // Matchlength() > Rightchars() -> back
-
- Ldloc(_runtextposLocal!);
- Ldc(c);
- Add(IsRightToLeft());
- Stloc(_runtextposLocal!); // texpos += len
-
- using RentedLocalBuilder lenLocal = RentInt32Local();
- Label l1 = DefineLabel();
- Ldc(c);
- Stloc(lenLocal);
-
- MarkLabel(l1);
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- if (IsRightToLeft())
- {
- Ldc(1);
- Sub();
- Stloc(lenLocal);
- Ldloc(lenLocal);
- Add();
- }
- else
- {
- Ldloc(lenLocal);
- Ldc(1);
- Sub();
- Stloc(lenLocal);
- Sub();
- }
- Call(s_stringGetCharsMethod);
-
- if (Code() == RegexCode.Setrep)
- {
- EmitTimeoutCheck();
- EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive());
- BrfalseFar(_backtrack);
- }
- else
- {
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
-
- Ldc(Operand(0));
- if (Code() == RegexCode.Onerep)
- {
- BneFar(_backtrack);
- }
- else
- {
- BeqFar(_backtrack);
- }
- }
- Ldloc(lenLocal);
- Ldc(0);
- if (Code() == RegexCode.Setrep)
- {
- BgtFar(l1);
- }
- else
- {
- Bgt(l1);
- }
- break;
- }
-
- case RegexCode.Oneloop:
- case RegexCode.Notoneloop:
- case RegexCode.Setloop:
- case RegexCode.Oneloop | RegexCode.Rtl:
- case RegexCode.Notoneloop | RegexCode.Rtl:
- case RegexCode.Setloop | RegexCode.Rtl:
- case RegexCode.Oneloop | RegexCode.Ci:
- case RegexCode.Notoneloop | RegexCode.Ci:
- case RegexCode.Setloop | RegexCode.Ci:
- case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Oneloopatomic:
- case RegexCode.Notoneloopatomic:
- case RegexCode.Setloopatomic:
- case RegexCode.Oneloopatomic | RegexCode.Rtl:
- case RegexCode.Notoneloopatomic | RegexCode.Rtl:
- case RegexCode.Setloopatomic | RegexCode.Rtl:
- case RegexCode.Oneloopatomic | RegexCode.Ci:
- case RegexCode.Notoneloopatomic | RegexCode.Ci:
- case RegexCode.Setloopatomic | RegexCode.Ci:
- case RegexCode.Oneloopatomic | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notoneloopatomic | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setloopatomic | RegexCode.Ci | RegexCode.Rtl:
- //: int len = Operand(1);
- //: if (len > Rightchars())
- //: len = Rightchars();
- //: char ch = (char)Operand(0);
- //: int i;
- //: for (i = len; i > 0; i--)
- //: {
- //: if (Rightcharnext() != ch)
- //: {
- //: Leftnext();
- //: break;
- //: }
- //: }
- //: if (len > i)
- //: Track(len - i - 1, Textpos() - 1);
- {
- int c = Operand(1);
- if (c == 0)
- {
- break;
- }
-
- using RentedLocalBuilder lenLocal = RentInt32Local();
- using RentedLocalBuilder iLocal = RentInt32Local();
+ EmitRunstackPop();
+ Stloc(poppedCrawlPos);
+ EmitUncaptureUntil(poppedCrawlPos);
+ }
+ LoadTextSpanLocal();
- if (!IsRightToLeft())
- {
- Ldloc(_runtextendLocal!);
- Ldloc(_runtextposLocal!);
- }
- else
- {
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- }
- Sub();
- Stloc(lenLocal);
- if (c != int.MaxValue)
- {
- Label l4 = DefineLabel();
- Ldloc(lenLocal);
- Ldc(c);
- Blt(l4);
- Ldc(c);
- Stloc(lenLocal);
- MarkLabel(l4);
- }
+ if (minIterations > 0)
+ {
+ // if (iterationCount == 0) goto originalDoneLabel;
+ Ldloc(iterationCount);
+ Ldc(0);
+ BeqFar(originalDoneLabel);
- Label loopEnd = DefineLabel();
- string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? _strings![Operand(0)] : null;
- Span<char> setChars = stackalloc char[5]; // max optimized by IndexOfAny today
- int numSetChars;
+ // if (iterationCount < minIterations) goto doneLabel/originalDoneLabel;
+ Ldloc(iterationCount);
+ Ldc(minIterations);
+ BltFar(childBacktracks ? doneLabel : originalDoneLabel);
+ }
- // If this is a notoneloop{atomic} and we're left-to-right and case-sensitive,
- // we can use the vectorized IndexOf to search for the target character.
- if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) &&
- !IsRightToLeft() &&
- (!IsCaseInsensitive()))
- {
- // i = runtext.AsSpan(runtextpos, len).IndexOf(ch);
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Call(s_stringAsSpanIntIntMethod);
- Ldc(Operand(0));
- Call(s_spanIndexOfChar);
- Stloc(iLocal);
+ if (isAtomic)
+ {
+ doneLabel = originalDoneLabel;
+ MarkLabel(endLoop);
+ }
+ else
+ {
+ if (childBacktracks)
+ {
+ // goto endLoop;
+ BrFar(endLoop);
- Label charFound = DefineLabel();
+ // Backtrack:
+ Label backtrack = DefineLabel();
+ MarkLabel(backtrack);
- // if (i != -1) goto charFound;
- Ldloc(iLocal);
- Ldc(-1);
- Bne(charFound);
-
- // runtextpos += len;
- // i = 0;
- // goto loopEnd;
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Add();
- Stloc(_runtextposLocal!);
- Ldc(0);
- Stloc(iLocal);
- BrFar(loopEnd);
-
- // charFound:
- // runtextpos += i;
- // i = len - i;
- // goto loopEnd;
- MarkLabel(charFound);
- Ldloc(_runtextposLocal!);
- Ldloc(iLocal);
- Add();
- Stloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Ldloc(iLocal);
- Sub();
- Stloc(iLocal);
- BrFar(loopEnd);
- }
- else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) &&
- !IsRightToLeft() &&
- !IsCaseInsensitive() &&
- (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) != 0 &&
- RegexCharClass.IsNegated(set!))
- {
- // Similarly, if this is a setloop{atomic} and we're left-to-right and case-sensitive,
- // and if the set contains only a few negated chars, we can use the vectorized IndexOfAny
- // to search for those chars.
- Debug.Assert(numSetChars > 1);
-
- // i = runtext.AsSpan(runtextpos, len).IndexOfAny(ch1, ch2{, ch3});
- Ldloc(_runtextLocal!);
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Call(s_stringAsSpanIntIntMethod);
- switch (numSetChars)
- {
- case 2:
- Ldc(setChars[0]);
- Ldc(setChars[1]);
- Call(s_spanIndexOfAnyCharChar);
- break;
-
- case 3:
- Ldc(setChars[0]);
- Ldc(setChars[1]);
- Ldc(setChars[2]);
- Call(s_spanIndexOfAnyCharCharChar);
- break;
-
- default:
- Ldstr(setChars.Slice(0, numSetChars).ToString());
- Call(s_stringAsSpanMethod);
- Call(s_spanIndexOfSpan);
- break;
- }
- Stloc(iLocal);
+ // if (iterationCount == 0) goto originalDoneLabel;
+ Ldloc(iterationCount);
+ Ldc(0);
+ BeqFar(originalDoneLabel);
- Label charFound = DefineLabel();
+ // goto doneLabel;
+ BrFar(doneLabel);
- // if (i != -1) goto charFound;
- Ldloc(iLocal);
- Ldc(-1);
- Bne(charFound);
-
- // runtextpos += len;
- // i = 0;
- // goto loopEnd;
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Add();
- Stloc(_runtextposLocal!);
- Ldc(0);
- Stloc(iLocal);
- BrFar(loopEnd);
-
- // charFound:
- // runtextpos += i;
- // i = len - i;
- // goto loopEnd;
- MarkLabel(charFound);
- Ldloc(_runtextposLocal!);
- Ldloc(iLocal);
- Add();
- Stloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Ldloc(iLocal);
- Sub();
- Stloc(iLocal);
- BrFar(loopEnd);
- }
- else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) &&
- !IsRightToLeft() &&
- set == RegexCharClass.AnyClass)
- {
- // If someone uses .* along with RegexOptions.Singleline, that becomes [anycharacter]*, which means it'll
- // consume everything. As such, we can simply update our position to be the last allowed, without
- // actually checking anything.
-
- // runtextpos += len;
- // i = 0;
- // goto loopEnd;
- Ldloc(_runtextposLocal!);
- Ldloc(lenLocal);
- Add();
- Stloc(_runtextposLocal!);
- Ldc(0);
- Stloc(iLocal);
- BrFar(loopEnd);
- }
- else
- {
- // Otherwise, we emit the open-coded loop.
+ doneLabel = backtrack;
+ }
- Ldloc(lenLocal);
- Ldc(1);
- Add();
- Stloc(iLocal);
+ MarkLabel(endLoop);
- Label loopCondition = DefineLabel();
- MarkLabel(loopCondition);
- Ldloc(iLocal);
- Ldc(1);
- Sub();
- Stloc(iLocal);
- Ldloc(iLocal);
- Ldc(0);
- if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
- {
- BleFar(loopEnd);
- }
- else
- {
- Ble(loopEnd);
- }
+ if (node.IsInLoop())
+ {
+ // Store the capture's state
+ EmitRunstackResizeIfNeeded(3);
+ EmitRunstackPush(() => Ldloc(startingRunTextPos));
+ EmitRunstackPush(() => Ldloc(iterationCount));
- if (IsRightToLeft())
- {
- Leftcharnext();
- }
- else
- {
- Rightcharnext();
- }
+ // Skip past the backtracking section
+ // goto end;
+ Label end = DefineLabel();
+ BrFar(end);
- if (Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic)
- {
- EmitTimeoutCheck();
- EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive());
- BrtrueFar(loopCondition);
- }
- else
- {
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
+ // Emit a backtracking section that restores the capture's state and then jumps to the previous done label
+ Label backtrack = DefineLabel();
+ MarkLabel(backtrack);
- Ldc(Operand(0));
- if (Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic)
- {
- Beq(loopCondition);
- }
- else
- {
- Debug.Assert(Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic);
- Bne(loopCondition);
- }
- }
+ // iterationCount = base.runstack[--runstack];
+ // startingRunTextPos = base.runstack[--runstack];
+ EmitRunstackPop();
+ Stloc(iterationCount);
+ EmitRunstackPop();
+ Stloc(startingRunTextPos);
- Ldloc(_runtextposLocal!);
- Ldc(1);
- Sub(IsRightToLeft());
- Stloc(_runtextposLocal!);
- }
+ // goto doneLabel;
+ BrFar(doneLabel);
- // loopEnd:
- MarkLabel(loopEnd);
- if (Code() != RegexCode.Oneloopatomic && Code() != RegexCode.Notoneloopatomic && Code() != RegexCode.Setloopatomic)
- {
- // if (len <= i) goto advance;
- Ldloc(lenLocal);
- Ldloc(iLocal);
- Ble(AdvanceLabel());
+ doneLabel = backtrack;
+ MarkLabel(end);
+ }
+ }
+ }
- // TrackPush(len - i - 1, runtextpos - Bump())
- ReadyPushTrack();
- Ldloc(lenLocal);
- Ldloc(iLocal);
- Sub();
- Ldc(1);
- Sub();
- DoPush();
+ void EmitRunstackResizeIfNeeded(int count)
+ {
+ Debug.Assert(count >= 1);
- ReadyPushTrack();
- Ldloc(_runtextposLocal!);
- Ldc(1);
- Sub(IsRightToLeft());
- DoPush();
+ // if (runstackpos >= base.runstack!.Length - (count - 1))
+ // {
+ // Array.Resize(ref base.runstack, base.runstack.Length * 2);
+ // }
- Track();
- }
- break;
- }
+ Label skipResize = DefineLabel();
- case RegexCode.Oneloop | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Back:
- case RegexCode.Oneloop | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Oneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notoneloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setloop | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- //: Trackframe(2);
- //: int i = Tracked(0);
- //: int pos = Tracked(1);
- //: Textto(pos);
- //: if (i > 0)
- //: Track(i - 1, pos - 1);
- //: Advance(2);
- PopTrack();
- Stloc(_runtextposLocal!);
- PopTrack();
- using (RentedLocalBuilder posLocal = RentInt32Local())
- {
- Stloc(posLocal);
- Ldloc(posLocal);
- Ldc(0);
- BleFar(AdvanceLabel());
- ReadyPushTrack();
- Ldloc(posLocal);
- }
- Ldc(1);
+ Ldloc(runstackpos);
+ Ldthisfld(s_runstackField);
+ Ldlen();
+ if (count > 1)
+ {
+ Ldc(count - 1);
Sub();
- DoPush();
- ReadyPushTrack();
- Ldloc(_runtextposLocal!);
- Ldc(1);
- Sub(IsRightToLeft());
- DoPush();
- Trackagain();
- Advance();
- break;
+ }
+ Blt(skipResize);
- case RegexCode.Onelazy:
- case RegexCode.Notonelazy:
- case RegexCode.Setlazy:
- case RegexCode.Onelazy | RegexCode.Rtl:
- case RegexCode.Notonelazy | RegexCode.Rtl:
- case RegexCode.Setlazy | RegexCode.Rtl:
- case RegexCode.Onelazy | RegexCode.Ci:
- case RegexCode.Notonelazy | RegexCode.Ci:
- case RegexCode.Setlazy | RegexCode.Ci:
- case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl:
- case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl:
- //: int c = Operand(1);
- //: if (c > Rightchars())
- //: c = Rightchars();
- //: if (c > 0)
- //: Track(c - 1, Textpos());
- {
- int c = Operand(1);
- if (c == 0)
- {
- break;
- }
+ Ldthis();
+ _ilg!.Emit(OpCodes.Ldflda, s_runstackField);
+ Ldthisfld(s_runstackField);
+ Ldlen();
+ Ldc(2);
+ Mul();
+ Call(s_arrayResize);
- if (!IsRightToLeft())
- {
- Ldloc(_runtextendLocal!);
- Ldloc(_runtextposLocal!);
- }
- else
- {
- Ldloc(_runtextposLocal!);
- Ldloc(_runtextbegLocal!);
- }
- Sub();
- using (RentedLocalBuilder cLocal = RentInt32Local())
- {
- Stloc(cLocal);
- if (c != int.MaxValue)
- {
- Label l4 = DefineLabel();
- Ldloc(cLocal);
- Ldc(c);
- Blt(l4);
- Ldc(c);
- Stloc(cLocal);
- MarkLabel(l4);
- }
- Ldloc(cLocal);
- Ldc(0);
- Ble(AdvanceLabel());
- ReadyPushTrack();
- Ldloc(cLocal);
- }
- Ldc(1);
- Sub();
- DoPush();
- PushTrack(_runtextposLocal!);
- Track();
- break;
- }
+ MarkLabel(skipResize);
+ }
- case RegexCode.Onelazy | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Back:
- case RegexCode.Onelazy | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Back:
- case RegexCode.Onelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Notonelazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- case RegexCode.Setlazy | RegexCode.Ci | RegexCode.Rtl | RegexCode.Back:
- //: Trackframe(2);
- //: int pos = Tracked(1);
- //: Textto(pos);
- //: if (Rightcharnext() != (char)Operand(0))
- //: break Backward;
- //: int i = Tracked(0);
- //: if (i > 0)
- //: Track(i - 1, pos + 1);
-
- PopTrack();
- Stloc(_runtextposLocal!);
- PopTrack();
- using (RentedLocalBuilder iLocal = RentInt32Local())
- {
- Stloc(iLocal);
+ void EmitRunstackPush(Action load)
+ {
+ // base.runstack[runstackpos] = load();
+ Ldthisfld(s_runstackField);
+ Ldloc(runstackpos);
+ load();
+ StelemI4();
- if (!IsRightToLeft())
- {
- Rightcharnext();
- }
- else
- {
- Leftcharnext();
- }
+ // runstackpos++;
+ Ldloc(runstackpos);
+ Ldc(1);
+ Add();
+ Stloc(runstackpos);
+ }
- if (Code() == RegexCode.Setlazy)
- {
- EmitMatchCharacterClass(_strings![Operand(0)], IsCaseInsensitive());
- BrfalseFar(_backtrack);
- }
- else
- {
- if (IsCaseInsensitive())
- {
- CallToLower();
- }
+ void EmitRunstackPop()
+ {
+ // ... = base.runstack[--runstackpos];
+ Ldthisfld(s_runstackField);
+ Ldloc(runstackpos);
+ Ldc(1);
+ Sub();
+ Stloc(runstackpos);
+ Ldloc(runstackpos);
+ LdelemI4();
+ }
+ }
- Ldc(Operand(0));
- if (Code() == RegexCode.Onelazy)
- {
- BneFar(_backtrack);
- }
- else
- {
- BeqFar(_backtrack);
- }
+ private void InitializeCultureForGoIfNecessary()
+ {
+ _textInfo = null;
+ if ((_options & RegexOptions.CultureInvariant) == 0)
+ {
+ bool needsCulture = (_options & RegexOptions.IgnoreCase) != 0;
+ if (!needsCulture)
+ {
+ int[] codes = _code!.Codes;
+ for (int codepos = 0; codepos < codes.Length; codepos += RegexCode.OpcodeSize(codes[codepos]))
+ {
+ if ((codes[codepos] & RegexCode.Ci) == RegexCode.Ci)
+ {
+ needsCulture = true;
+ break;
}
-
- Ldloc(iLocal);
- Ldc(0);
- BleFar(AdvanceLabel());
- ReadyPushTrack();
- Ldloc(iLocal);
}
- Ldc(1);
- Sub();
- DoPush();
- PushTrack(_runtextposLocal!);
- Trackagain();
- Advance();
- break;
+ }
- default:
- Debug.Fail($"Unimplemented state: {_regexopcode:X8}");
- break;
+ if (needsCulture)
+ {
+ // cache CultureInfo in local variable which saves excessive thread local storage accesses
+ _textInfo = DeclareTextInfo();
+ InitLocalCultureInfo();
+ }
}
}
return;
}
- Debug.Assert(_loopTimeoutCounterLocal != null);
+ Debug.Assert(_loopTimeoutCounter != null);
// Increment counter for each loop iteration.
- Ldloc(_loopTimeoutCounterLocal);
+ Ldloc(_loopTimeoutCounter);
Ldc(1);
Add();
- Stloc(_loopTimeoutCounterLocal);
+ Stloc(_loopTimeoutCounter);
// Emit code to check the timeout every 2048th iteration.
Label label = DefineLabel();
- Ldloc(_loopTimeoutCounterLocal);
+ Ldloc(_loopTimeoutCounter);
Ldc(LoopTimeoutCheckCount);
RemUn();
Brtrue(label);
Call(s_checkTimeoutMethod);
MarkLabel(label);
}
-
-#if DEBUG
- /// <summary>Emit code to print out the current state of the runner.</summary>
- [ExcludeFromCodeCoverage(Justification = "Debug only")]
- private void DumpBacktracking()
- {
- Mvlocfld(_runtextposLocal!, s_runtextposField);
- Mvlocfld(_runtrackposLocal!, s_runtrackposField);
- Mvlocfld(_runstackposLocal!, s_runstackposField);
- Ldthis();
- Call(s_dumpStateM);
-
- var sb = new StringBuilder();
- if (_backpos > 0)
- {
- sb.Append($"{_backpos:D6} ");
- }
- else
- {
- sb.Append(" ");
- }
- sb.Append(_code!.OpcodeDescription(_codepos));
-
- if ((_regexopcode & RegexCode.Back) != 0)
- {
- sb.Append(" Back");
- }
-
- if ((_regexopcode & RegexCode.Back2) != 0)
- {
- sb.Append(" Back2");
- }
-
- Ldstr(sb.ToString());
- Call(s_debugWriteLine!);
- }
-#endif
}
}
/// <summary>Id number to use for the next compiled regex.</summary>
private static int s_regexCount;
- public RegexLWCGCompiler()
- {
- }
-
/// <summary>The top-level driver. Initializes everything then calls the Generate* methods.</summary>
- public RegexRunnerFactory FactoryInstanceFromCode(string pattern, RegexCode code, RegexOptions options, bool hasTimeout)
+ public RegexRunnerFactory? FactoryInstanceFromCode(string pattern, RegexCode code, RegexOptions options, bool hasTimeout)
{
+ if (!code.Tree.Root.SupportsCompilation())
+ {
+ return null;
+ }
+
_code = code;
- _codes = code.Codes;
- _strings = code.Strings;
- _trackcount = code.TrackCount;
_options = options;
_hasTimeout = hasTimeout;
description = string.Concat("_", pattern.Length > DescriptionLimit ? pattern.AsSpan(0, DescriptionLimit) : pattern);
}
- DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner));
- GenerateGo();
-
DynamicMethod findFirstCharMethod = DefineDynamicMethod($"Regex{regexNum}_FindFirstChar{description}", typeof(bool), typeof(CompiledRegexRunner));
- GenerateFindFirstChar();
+ EmitFindFirstChar();
+
+ DynamicMethod goMethod = DefineDynamicMethod($"Regex{regexNum}_Go{description}", null, typeof(CompiledRegexRunner));
+ EmitGo();
- return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, _trackcount);
+ return new CompiledRegexRunnerFactory(goMethod, findFirstCharMethod, code.TrackCount);
}
/// <summary>Begins the definition of a new method (no args) with a specified return value.</summary>
public bool IsAtomicByParent()
{
// Walk up the parent hierarchy.
- for (RegexNode? parent = Next; parent is not null; parent = parent.Next)
+ RegexNode child = this;
+ for (RegexNode? parent = child.Next; parent is not null; child = parent, parent = child.Next)
{
switch (parent.Type)
{
// so any atomicity applied to the alternation also applies to
// each individual branch. This is true as well for conditional
// backreferences, where each of the yes/no branches are independent.
- case Testgroup when parent.Child(0) != this:
+ case Testgroup when parent.Child(0) != child:
// As with alternations, each yes/no branch of an expression conditional
// are independent from each other, but the conditional expression itself
// can be backtracked into from each of the branches, so we can't make
// it atomic just because the whole conditional is.
case Capture:
// Skip captures. They don't affect atomicity.
- case Concatenate when parent.Child(parent.ChildCount() - 1) == this:
+ case Concatenate when parent.Child(parent.ChildCount() - 1) == child:
// If the parent is a concatenation and this is the last node,
// any atomicity applying to the concatenation applies to this
// node, too.
return 1;
}
- // Determines whether the node supports an optimized code gen strategy based on walking the node tree.
- internal bool SupportsSimplifiedCodeGenerationImplementation()
+ // Determines whether the node supports a compilation / code generation strategy based on walking the node tree.
+ internal bool SupportsCompilation()
{
if (!StackHelper.TryEnsureSufficientExecutionStack())
{
- // If we can't recur further, simplified code generation isn't supported as the tree is too deep.
+ // If we can't recur further, code generation isn't supported as the tree is too deep.
return false;
}
- if ((Options & RegexOptions.RightToLeft) != 0)
+ if ((Options & (RegexOptions.RightToLeft | RegexOptions.NonBacktracking)) != 0)
{
- // RightToLeft isn't supported. That applies to both the top-level options as well as when used
- // to specify positive and negative lookbehinds.
+ // NonBacktracking isn't supported, nor RightToLeft. The latter applies to both the top-level
+ // options as well as when used to specify positive and negative lookbehinds.
return false;
}
- // TODO: This should be moved somewhere else, to a pass somewhere where we explicitly
- // annotate the tree, potentially as part of the final optimization pass. It doesn't
- // belong in this check.
- switch (Type)
- {
- case Capture:
- // If we've found a supported capture, mark all of the nodes in its parent
- // hierarchy as containing a capture.
- RegexNode? parent = this;
- while (parent != null && ((parent.Options & HasCapturesFlag) == 0))
- {
- parent.Options |= HasCapturesFlag;
- parent = parent.Next;
- }
- break;
- }
-
int childCount = ChildCount();
for (int i = 0; i < childCount; i++)
{
// The node isn't supported if any of its children aren't supported.
- if (!Child(i).SupportsSimplifiedCodeGenerationImplementation())
+ if (!Child(i).SupportsCompilation())
{
return false;
}
}
+ // TODO: This should be moved somewhere else, to a pass somewhere where we explicitly
+ // annotate the tree, potentially as part of the final optimization pass. It doesn't
+ // belong in this check.
+ if (Type == Capture)
+ {
+ // If we've found a supported capture, mark all of the nodes in its parent hierarchy as containing a capture.
+ for (RegexNode? parent = this; parent != null && (parent.Options & HasCapturesFlag) == 0; parent = parent.Next)
+ {
+ parent.Options |= HasCapturesFlag;
+ }
+ }
+
// Supported.
return true;
}
using System.Threading.Tasks;
using Xunit;
using System.Linq;
+using System.Runtime.CompilerServices;
namespace System.Text.RegularExpressions.Tests
{
};
}
}
+
+#if !NETFRAMEWORK // these tests currently fail on .NET Framework, and we need to check IsDynamicCodeCompiled but that doesn't exist on .NET Framework
+ if (engine != RegexEngine.Interpreter && // these tests currently fail with RegexInterpreter
+ RuntimeFeature.IsDynamicCodeCompiled) // if dynamic code isn't compiled, RegexOptions.Compiled falls back to the interpreter, for which these tests currently fail
+ {
+ // Fails on interpreter and .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")]
+ yield return new object[]
+ {
+ engine, "@(a*)+?", "@", RegexOptions.None, new[]
+ {
+ new CaptureData("@", 0, 1)
+ }
+ };
+
+ // Fails on interpreter and .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")]
+ yield return new object[]
+ {
+ engine, @"(?:){93}", "x", RegexOptions.None, new[]
+ {
+ new CaptureData("", 0, 0),
+ new CaptureData("", 1, 0)
+ }
+ };
+
+ if (!RegexHelpers.IsNonBacktracking(engine)) // atomic subexpressions aren't supported
+ {
+ // Fails on interpreter and .NET Framework: [ActiveIssue("https://github.com/dotnet/runtime/issues/62094")]
+ yield return new object[]
+ {
+ engine, @"()(?>\1+?).\b", "xxxx", RegexOptions.None, new[]
+ {
+ new CaptureData("x", 3, 1),
+ }
+ };
+ }
+ }
+#endif
}
}
Regex regexAdvanced = await RegexHelpers.GetRegexAsync(engine, pattern, options);
VerifyMatches(regexAdvanced.Matches(input), expected);
VerifyMatches(regexAdvanced.Match(input), expected);
-
- VerifyMatches(Regex.Matches(input, pattern, options), expected);
- VerifyMatches(Regex.Match(input, pattern, options), expected);
}
private static void VerifyMatches(Match match, CaptureData[] expected)
private static void VerifyMatch(Match match, CaptureData expected)
{
Assert.True(match.Success);
- RegexAssert.Equal(expected.Value, match);
Assert.Equal(expected.Index, match.Index);
Assert.Equal(expected.Length, match.Length);
+ RegexAssert.Equal(expected.Value, match);
- RegexAssert.Equal(expected.Value, match.Groups[0]);
Assert.Equal(expected.Index, match.Groups[0].Index);
Assert.Equal(expected.Length, match.Groups[0].Length);
+ RegexAssert.Equal(expected.Value, match.Groups[0]);
Assert.Equal(1, match.Captures.Count);
- RegexAssert.Equal(expected.Value, match.Captures[0]);
Assert.Equal(expected.Index, match.Captures[0].Index);
Assert.Equal(expected.Length, match.Captures[0].Length);
+ RegexAssert.Equal(expected.Value, match.Captures[0]);
}
[Fact]
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.
-using System.Collections.Generic;
+using System.Collections.Immutable;
using System.Diagnostics;
using System.Globalization;
using System.IO;
// Run the generator
GeneratorDriverRunResult generatorResults = s_generatorDriver.RunGenerators(comp!, cancellationToken).GetRunResult();
- if (generatorResults.Diagnostics.Length != 0)
+ ImmutableArray<Diagnostic> generatorDiagnostics = generatorResults.Diagnostics.RemoveAll(d => d.Severity <= DiagnosticSeverity.Info);
+ if (generatorDiagnostics.Length != 0)
{
throw new ArgumentException(
string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => NumberLines(t.ToString()))) + Environment.NewLine +
- string.Join(Environment.NewLine, generatorResults.Diagnostics));
+ string.Join(Environment.NewLine, generatorDiagnostics));
}
// Compile the assembly to a stream
{
throw new ArgumentException(
string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => NumberLines(t.ToString()))) + Environment.NewLine +
- string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)));
+ string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorDiagnostics)));
}
dll.Position = 0;
}
[Fact]
+ public async Task Diagnostic_RightToLeft_LimitedSupport()
+ {
+ IReadOnlyList<Diagnostic> diagnostics = await RunGenerator(@"
+ using System.Text.RegularExpressions;
+ partial class C
+ {
+ [RegexGenerator(""ab"", RegexOptions.RightToLeft)]
+ private static partial Regex RightToLeftNotSupported();
+ }
+ ");
+
+ Assert.Equal("SYSLIB1045", Assert.Single(diagnostics).Id);
+ }
+
+ [Fact]
+ public async Task Diagnostic_NonBacktracking_LimitedSupport()
+ {
+ IReadOnlyList<Diagnostic> diagnostics = await RunGenerator(@"
+ using System.Text.RegularExpressions;
+ partial class C
+ {
+ [RegexGenerator(""ab"", RegexOptions.NonBacktracking)]
+ private static partial Regex RightToLeftNotSupported();
+ }
+ ");
+
+ Assert.Equal("SYSLIB1045", Assert.Single(diagnostics).Id);
+ }
+
+ [Fact]
+ public async Task Diagnostic_PositiveLookbehind_LimitedSupport()
+ {
+ IReadOnlyList<Diagnostic> diagnostics = await RunGenerator(@"
+ using System.Text.RegularExpressions;
+ partial class C
+ {
+ [RegexGenerator(""(?<=\b20)\d{2}\b"")]
+ private static partial Regex PositiveLookbehindNotSupported();
+ }
+ ");
+
+ Assert.Equal("SYSLIB1045", Assert.Single(diagnostics).Id);
+ }
+
+ [Fact]
+ public async Task Diagnostic_NegativeLookbehind_LimitedSupport()
+ {
+ IReadOnlyList<Diagnostic> diagnostics = await RunGenerator(@"
+ using System.Text.RegularExpressions;
+ partial class C
+ {
+ [RegexGenerator(""(?<!(Saturday|Sunday) )\b\w+ \d{1,2}, \d{4}\b"")]
+ private static partial Regex NegativeLookbehindNotSupported();
+ }
+ ");
+
+ Assert.Equal("SYSLIB1045", Assert.Single(diagnostics).Id);
+ }
+
+ [Fact]
public async Task Valid_ClassWithoutNamespace()
{
Assert.Empty(await RunGenerator(@"