using System.Globalization;
using System.IO;
using System.Linq;
+using System.Net.Cache;
using System.Runtime.InteropServices;
using System.Threading;
using Microsoft.CodeAnalysis;
using Microsoft.CodeAnalysis.CSharp;
-// NOTE: The logic in this file is largely a copy of logic in RegexCompiler, emitting C# instead of MSIL.
+// NOTE: The logic in this file is largely a duplicate of logic in RegexCompiler, emitting C# instead of MSIL.
// Most changes made to this file should be kept in sync, so far as bug fixes and relevant optimizations
// are concerned.
{
public partial class RegexGenerator
{
- /// <summary>Code for a [GeneratedCode] attribute to put on the top-level generated members.</summary>
- private static readonly string s_generatedCodeAttribute = $"[global::System.CodeDom.Compiler.GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]";
- /// <summary>Header comments and usings to include at the top of every generated file.</summary>
- private static readonly string[] s_headers = new string[]
+ /// <summary>Emits the definition of the partial method. This method just delegates to the property cache on the generated Regex-derived type.</summary>
+ private static void EmitRegexPartialMethod(RegexMethod regexMethod, IndentedTextWriter writer, string generatedClassName)
{
- "// <auto-generated/>",
- "#nullable enable",
- "#pragma warning disable CS0162 // Unreachable code",
- "#pragma warning disable CS0164 // Unreferenced label",
- "#pragma warning disable CS0219 // Variable assigned but never used",
- "",
- };
-
- /// <summary>Generates the code for one regular expression class.</summary>
- private static (string, ImmutableArray<Diagnostic>) EmitRegexType(RegexType regexClass, bool allowUnsafe)
- {
- var sb = new StringBuilder(1024);
- var writer = new IndentedTextWriter(new StringWriter(sb));
-
- // Emit the namespace
- if (!string.IsNullOrWhiteSpace(regexClass.Namespace))
+ // Emit the namespace.
+ RegexType? parent = regexMethod.DeclaringType;
+ if (!string.IsNullOrWhiteSpace(parent.Namespace))
{
- writer.WriteLine($"namespace {regexClass.Namespace}");
+ writer.WriteLine($"namespace {parent.Namespace}");
writer.WriteLine("{");
writer.Indent++;
}
- // Emit containing types
- RegexType? parent = regexClass.ParentClass;
+ // Emit containing types.
var parentClasses = new Stack<string>();
while (parent is not null)
{
parentClasses.Push($"partial {parent.Keyword} {parent.Name}");
- parent = parent.ParentClass;
+ parent = parent.Parent;
}
while (parentClasses.Count != 0)
{
writer.Indent++;
}
- // Emit the direct parent type
- writer.WriteLine($"partial {regexClass.Keyword} {regexClass.Name}");
- writer.WriteLine("{");
- writer.Indent++;
-
- // Generate a name to describe the regex instance. This includes the method name
- // the user provided and a non-randomized (for determinism) hash of it to try to make
- // the name that much harder to predict.
- Debug.Assert(regexClass.Method is not null);
- string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_";
- generatedName += ComputeStringHash(generatedName).ToString("X");
-
- // Generate the regex type
- ImmutableArray<Diagnostic> diagnostics = EmitRegexMethod(writer, regexClass.Method, generatedName, allowUnsafe);
+ // Emit the partial method definition.
+ writer.WriteLine($"[global::System.CodeDom.Compiler.{s_generatedCodeAttribute}]");
+ writer.WriteLine($"{regexMethod.Modifiers} global::System.Text.RegularExpressions.Regex {regexMethod.MethodName}() => global::{GeneratedNamespace}.{generatedClassName}.{regexMethod.GeneratedName}.Instance;");
+ // Unwind all scopes
while (writer.Indent != 0)
{
writer.Indent--;
writer.WriteLine("}");
}
-
- writer.Flush();
- return (sb.ToString(), diagnostics);
-
- // FNV-1a hash function. The actual algorithm used doesn't matter; just something simple
- // to create a deterministic, pseudo-random value that's based on input text.
- static uint ComputeStringHash(string s)
- {
- uint hashCode = 2166136261;
- foreach (char c in s)
- {
- hashCode = (c ^ hashCode) * 16777619;
- }
- return hashCode;
- }
}
- /// <summary>Generates the code for a regular expression method.</summary>
- private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id, bool allowUnsafe)
+ /// <summary>Emits the Regex-derived type for a method where we're unable to generate custom code.</summary>
+ private static void EmitRegexLimitedBoilerplate(
+ IndentedTextWriter writer, RegexMethod rm, int id, string reason)
{
- string patternExpression = Literal(rm.Pattern);
- string optionsExpression = Literal(rm.Options);
- string timeoutExpression = rm.MatchTimeout == Timeout.Infinite ?
- "global::System.Threading.Timeout.InfiniteTimeSpan" :
- $"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.ToString(CultureInfo.InvariantCulture)})";
-
- writer.WriteLine(s_generatedCodeAttribute);
- writer.WriteLine($"{rm.Modifiers} global::System.Text.RegularExpressions.Regex {rm.MethodName}() => {id}.Instance;");
- writer.WriteLine();
- writer.WriteLine(s_generatedCodeAttribute);
- writer.WriteLine("[global::System.ComponentModel.EditorBrowsable(global::System.ComponentModel.EditorBrowsableState.Never)]");
- writer.WriteLine($"{(writer.Indent != 0 ? "private" : "internal")} sealed class {id} : global::System.Text.RegularExpressions.Regex");
- writer.WriteLine("{");
- writer.Write(" public static global::System.Text.RegularExpressions.Regex Instance { get; } = ");
-
- // If we can't support custom generation for this regex, spit out a Regex constructor call.
- if (!rm.Tree.Root.SupportsCompilation(out string? reason))
- {
- writer.WriteLine();
- writer.WriteLine($" // Cannot generate Regex-derived implementation because {reason}.");
- writer.WriteLine($" new global::System.Text.RegularExpressions.Regex({patternExpression}, {optionsExpression}, {timeoutExpression});");
- writer.WriteLine("}");
- return ImmutableArray.Create(Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, rm.MethodSyntax.GetLocation()));
- }
-
- AnalysisResults analysis = RegexTreeAnalyzer.Analyze(rm.Tree);
+ writer.WriteLine($"/// <summary>Caches a <see cref=\"Regex\"/> instance for the {rm.MethodName} method.</summary>");
+ writer.WriteLine($"/// <remarks>A custom Regex-derived type could not be generated because {reason}.</remarks>");
+ writer.WriteLine($"internal sealed class {rm.GeneratedName} : Regex");
+ writer.WriteLine($"{{");
+ writer.WriteLine($" /// <summary>Cached, thread-safe singleton instance.</summary>");
+ writer.WriteLine($" internal static Regex Instance {{ get; }} = new({Literal(rm.Pattern)}, {Literal(rm.Options)}, {GetTimeoutExpression(rm.MatchTimeout)});");
+ writer.WriteLine($"}}");
+ }
- writer.WriteLine($"new {id}();");
- writer.WriteLine();
- writer.WriteLine($" private {id}()");
+ /// <summary>Emits the Regex-derived type for a method whose RunnerFactory implementation was generated into <paramref name="runnerFactoryImplementation"/>.</summary>
+ private static void EmitRegexDerivedImplementation(
+ IndentedTextWriter writer, RegexMethod rm, int id, string runnerFactoryImplementation)
+ {
+ writer.WriteLine($"/// <summary>Custom <see cref=\"Regex\"/>-derived type for the {rm.MethodName} method.</summary>");
+ writer.WriteLine($"internal sealed class {rm.GeneratedName} : Regex");
+ writer.WriteLine($"{{");
+ writer.WriteLine($" /// <summary>Cached, thread-safe singleton instance.</summary>");
+ writer.WriteLine($" internal static {rm.GeneratedName} Instance {{ get; }} = new();");
+ writer.WriteLine($"");
+ writer.WriteLine($" /// <summary>Initializes the instance.</summary>");
+ writer.WriteLine($" private {rm.GeneratedName}()");
writer.WriteLine($" {{");
- writer.WriteLine($" base.pattern = {patternExpression};");
- writer.WriteLine($" base.roptions = {optionsExpression};");
- writer.WriteLine($" base.internalMatchTimeout = {timeoutExpression};");
+ writer.WriteLine($" base.pattern = {Literal(rm.Pattern)};");
+ writer.WriteLine($" base.roptions = {Literal(rm.Options)};");
+ writer.WriteLine($" base.internalMatchTimeout = {GetTimeoutExpression(rm.MatchTimeout)};");
writer.WriteLine($" base.factory = new RunnerFactory();");
if (rm.Tree.CaptureNumberSparseMapping is not null)
{
- writer.Write(" base.Caps = new global::System.Collections.Hashtable {");
+ writer.Write(" base.Caps = new Hashtable {");
AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping);
- writer.WriteLine(" };");
+ writer.WriteLine($" }};");
}
if (rm.Tree.CaptureNameToNumberMapping is not null)
{
- writer.Write(" base.CapNames = new global::System.Collections.Hashtable {");
+ writer.Write(" base.CapNames = new Hashtable {");
AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping);
- writer.WriteLine(" };");
+ writer.WriteLine($" }};");
}
if (rm.Tree.CaptureNames is not null)
{
writer.Write(Literal(s));
separator = ", ";
}
- writer.WriteLine(" };");
+ writer.WriteLine($" }};");
}
writer.WriteLine($" base.capsize = {rm.Tree.CaptureCount};");
writer.WriteLine($" }}");
- writer.WriteLine(" ");
- writer.WriteLine($" private sealed class RunnerFactory : global::System.Text.RegularExpressions.RegexRunnerFactory");
- writer.WriteLine($" {{");
- writer.WriteLine($" protected override global::System.Text.RegularExpressions.RegexRunner CreateInstance() => new Runner();");
- writer.WriteLine();
- writer.WriteLine($" private sealed class Runner : global::System.Text.RegularExpressions.RegexRunner");
- writer.WriteLine($" {{");
-
- // Main implementation methods
- writer.WriteLine(" // Description:");
- DescribeExpression(writer, rm.Tree.Root.Child(0), " // ", analysis); // skip implicit root capture
- writer.WriteLine();
-
- writer.WriteLine($" protected override void Scan(global::System.ReadOnlySpan<char> text)");
- writer.WriteLine($" {{");
- writer.Indent += 4;
- EmitScan(writer, rm, id);
- writer.Indent -= 4;
- writer.WriteLine($" }}");
- writer.WriteLine();
-
- writer.WriteLine($" private bool TryFindNextPossibleStartingPosition(global::System.ReadOnlySpan<char> inputSpan)");
- writer.WriteLine($" {{");
- writer.Indent += 4;
- RequiredHelperFunctions requiredHelpers = EmitTryFindNextPossibleStartingPosition(writer, rm, id);
- writer.Indent -= 4;
- writer.WriteLine($" }}");
- writer.WriteLine();
- if (allowUnsafe)
- {
- writer.WriteLine($" [global::System.Runtime.CompilerServices.SkipLocalsInit]");
- }
- writer.WriteLine($" private bool TryMatchAtCurrentPosition(global::System.ReadOnlySpan<char> inputSpan)");
- writer.WriteLine($" {{");
- writer.Indent += 4;
- requiredHelpers |= EmitTryMatchAtCurrentPosition(writer, rm, id, analysis);
- writer.Indent -= 4;
- writer.WriteLine($" }}");
-
- if ((requiredHelpers & RequiredHelperFunctions.IsWordChar) != 0)
- {
- writer.WriteLine();
- writer.WriteLine($" /// <summary>Determines whether the character is part of the [\\w] set.</summary>");
- writer.WriteLine($" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
- writer.WriteLine($" private static bool IsWordChar(char ch)");
- writer.WriteLine($" {{");
- writer.WriteLine($" global::System.ReadOnlySpan<byte> ascii = new byte[]");
- writer.WriteLine($" {{");
- writer.WriteLine($" 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,");
- writer.WriteLine($" 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07");
- writer.WriteLine($" }};");
- writer.WriteLine();
- writer.WriteLine($" int chDiv8 = ch >> 3;");
- writer.WriteLine($" return (uint)chDiv8 < (uint)ascii.Length ?");
- writer.WriteLine($" (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :");
- writer.WriteLine($" global::System.Globalization.CharUnicodeInfo.GetUnicodeCategory(ch) switch");
- writer.WriteLine($" {{");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.UppercaseLetter or");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.LowercaseLetter or");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.TitlecaseLetter or");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.ModifierLetter or");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.OtherLetter or");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.NonSpacingMark or");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.DecimalDigitNumber or");
- writer.WriteLine($" global::System.Globalization.UnicodeCategory.ConnectorPunctuation => true,");
- writer.WriteLine($" _ => false,");
- writer.WriteLine($" }};");
- writer.WriteLine($" }}");
- }
-
- if ((requiredHelpers & RequiredHelperFunctions.IsBoundary) != 0)
- {
- writer.WriteLine();
- writer.WriteLine($" /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
- writer.WriteLine($" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
- writer.WriteLine($" private static bool IsBoundary(global::System.ReadOnlySpan<char> inputSpan, int index)");
- writer.WriteLine($" {{");
- writer.WriteLine($" int indexM1 = index - 1;");
- writer.WriteLine($" return ((uint)indexM1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexM1])) !=");
- writer.WriteLine($" ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));");
- writer.WriteLine();
- writer.WriteLine($" static bool IsBoundaryWordChar(char ch) =>");
- writer.WriteLine($" IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');");
- writer.WriteLine($" }}");
- }
-
- if ((requiredHelpers & RequiredHelperFunctions.IsECMABoundary) != 0)
- {
- writer.WriteLine();
- writer.WriteLine($" /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
- writer.WriteLine($" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
- writer.WriteLine($" private static bool IsECMABoundary(global::System.ReadOnlySpan<char> inputSpan, int index)");
- writer.WriteLine($" {{");
- writer.WriteLine($" int indexM1 = index - 1;");
- writer.WriteLine($" return ((uint)indexM1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexM1])) !=");
- writer.WriteLine($" ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));");
- writer.WriteLine();
- writer.WriteLine($" static bool IsECMAWordChar(char ch) =>");
- writer.WriteLine($" ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter");
- writer.WriteLine($" (((uint)ch - '0') < 10) || // digit");
- writer.WriteLine($" ch == '_' || // underscore");
- writer.WriteLine($" ch == '\\u0130'; // latin capital letter I with dot above");
- writer.WriteLine($" }}");
- }
-
- writer.WriteLine($" }}");
- writer.WriteLine($" }}");
- writer.WriteLine("}");
- return ImmutableArray<Diagnostic>.Empty;
+ writer.WriteLine(runnerFactoryImplementation);
+ writer.WriteLine($"}}");
static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
{
}
}
+ /// <summary>Emits the code for the RunnerFactory. This is the actual logic for the regular expression.</summary>
+ private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
+ {
+ AnalysisResults analysis = RegexTreeAnalyzer.Analyze(rm.Tree);
+
+ writer.WriteLine($"/// <summary>Provides a factory for creating <see cref=\"RegexRunner\"/> instances to be used by methods on <see cref=\"Regex\"/>.</summary>");
+ writer.WriteLine($"private sealed class RunnerFactory : RegexRunnerFactory");
+ writer.WriteLine($"{{");
+ writer.WriteLine($" /// <summary>Creates an instance of a <see cref=\"RegexRunner\"/> used by methods on <see cref=\"Regex\"/>.</summary>");
+ writer.WriteLine($" protected override RegexRunner CreateInstance() => new Runner();");
+ writer.WriteLine();
+ writer.WriteLine($" /// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>");
+ writer.WriteLine($" private sealed class Runner : RegexRunner");
+ writer.WriteLine($" {{");
+
+ // Main implementation methods
+ writer.WriteLine($" // Description:");
+ DescribeExpression(writer, rm.Tree.Root.Child(0), " // ", analysis); // skip implicit root capture
+ writer.WriteLine();
+
+ writer.WriteLine($" /// <summary>Scan the <paramref name=\"inputSpan\"/> starting from base.runtextstart for the next match.</summary>");
+ writer.WriteLine($" /// <param name=\"inputSpan\">The text being scanned by the regular expression.</param>");
+ writer.WriteLine($" protected override void Scan(ReadOnlySpan<char> inputSpan)");
+ writer.WriteLine($" {{");
+ writer.Indent += 3;
+ EmitScan(writer, rm);
+ writer.Indent -= 3;
+ writer.WriteLine($" }}");
+ writer.WriteLine();
+
+ writer.WriteLine($" /// <summary>Search <paramref name=\"inputSpan\"/> starting from base.runtextpos for the next location a match could possibly start.</summary>");
+ writer.WriteLine($" /// <param name=\"inputSpan\">The text being scanned by the regular expression.</param>");
+ writer.WriteLine($" /// <returns>true if a possible match was found; false if no more matches are possible.</returns>");
+ writer.WriteLine($" private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)");
+ writer.WriteLine($" {{");
+ writer.Indent += 3;
+ EmitTryFindNextPossibleStartingPosition(writer, rm, requiredHelpers);
+ writer.Indent -= 3;
+ writer.WriteLine($" }}");
+ writer.WriteLine();
+ writer.WriteLine($" /// <summary>Determine whether <paramref name=\"inputSpan\"/> at base.runtextpos is a match for the regular expression.</summary>");
+ writer.WriteLine($" /// <param name=\"inputSpan\">The text being scanned by the regular expression.</param>");
+ writer.WriteLine($" /// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>");
+ writer.WriteLine($" private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)");
+ writer.WriteLine($" {{");
+ writer.Indent += 3;
+ EmitTryMatchAtCurrentPosition(writer, rm, analysis, requiredHelpers);
+ writer.Indent -= 3;
+ writer.WriteLine($" }}");
+ writer.WriteLine($" }}");
+ writer.WriteLine($"}}");
+ }
+
+ /// <summary>Gets a C# expression representing the specified timeout value.</summary>
+ private static string GetTimeoutExpression(int matchTimeout) =>
+ matchTimeout == Timeout.Infinite ?
+ "Timeout.InfiniteTimeSpan" :
+ $"TimeSpan.FromMilliseconds({matchTimeout.ToString(CultureInfo.InvariantCulture)})";
+
+ /// <summary>Adds the IsWordChar helper to the required helpers collection.</summary>
+ private static void AddIsWordCharHelper(Dictionary<string, string[]> requiredHelpers)
+ {
+ const string IsWordChar = nameof(IsWordChar);
+ if (!requiredHelpers.ContainsKey(IsWordChar))
+ {
+ requiredHelpers.Add(IsWordChar, new string[]
+ {
+ "/// <summary>Determines whether the character is part of the [\\w] set.</summary>",
+ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+ "internal static bool IsWordChar(char ch)",
+ "{",
+ " // Bitmap for whether each character 0 through 127 is in [\\w]",
+ " ReadOnlySpan<byte> ascii = new byte[]",
+ " {",
+ " 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,",
+ " 0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07",
+ " };",
+ "",
+ " // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.",
+ " int chDiv8 = ch >> 3;",
+ " return (uint)chDiv8 < (uint)ascii.Length ?",
+ " (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :",
+ " CharUnicodeInfo.GetUnicodeCategory(ch) switch",
+ " {",
+ " UnicodeCategory.UppercaseLetter or",
+ " UnicodeCategory.LowercaseLetter or",
+ " UnicodeCategory.TitlecaseLetter or",
+ " UnicodeCategory.ModifierLetter or",
+ " UnicodeCategory.OtherLetter or",
+ " UnicodeCategory.NonSpacingMark or",
+ " UnicodeCategory.DecimalDigitNumber or",
+ " UnicodeCategory.ConnectorPunctuation => true,",
+ " _ => false,",
+ " };",
+ "}",
+ });
+ }
+ }
+
+ /// <summary>Adds the IsBoundary helper to the required helpers collection.</summary>
+ private static void AddIsBoundaryHelper(Dictionary<string, string[]> requiredHelpers)
+ {
+ const string IsBoundary = nameof(IsBoundary);
+ if (!requiredHelpers.ContainsKey(IsBoundary))
+ {
+ requiredHelpers.Add(IsBoundary, new string[]
+ {
+ "/// <summary>Determines whether the specified index is a boundary.</summary>",
+ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+ "internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)",
+ "{",
+ " int indexMinus1 = index - 1;",
+ " return ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=",
+ " ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));",
+ "",
+ " static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');",
+ "}",
+ });
+
+ AddIsWordCharHelper(requiredHelpers);
+ }
+ }
+
+ /// <summary>Adds the IsECMABoundary helper to the required helpers collection.</summary>
+ private static void AddIsECMABoundaryHelper(Dictionary<string, string[]> requiredHelpers)
+ {
+ const string IsECMABoundary = nameof(IsECMABoundary);
+ if (!requiredHelpers.ContainsKey(IsECMABoundary))
+ {
+ requiredHelpers.Add(IsECMABoundary, new string[]
+ {
+ "/// <summary>Determines whether the specified index is a boundary (ECMAScript).</summary>",
+ "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+ "internal static bool IsECMABoundary(ReadOnlySpan<char> inputSpan, int index)",
+ "{",
+ " int indexMinus1 = index - 1;",
+ " return ((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=",
+ " ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));",
+ "",
+ " static bool IsECMAWordChar(char ch) =>",
+ " ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter",
+ " (((uint)ch - '0') < 10) || // digit",
+ " ch == '_' || // underscore",
+ " ch == '\\u0130'; // latin capital letter I with dot above",
+ "}",
+ });
+ }
+ }
+
/// <summary>Emits the body of the Scan method override.</summary>
- private static void EmitScan(IndentedTextWriter writer, RegexMethod rm, string id)
+ private static void EmitScan(IndentedTextWriter writer, RegexMethod rm)
{
bool rtl = (rm.Options & RegexOptions.RightToLeft) != 0;
- using (EmitBlock(writer, "while (TryFindNextPossibleStartingPosition(text))"))
+ using (EmitBlock(writer, "while (TryFindNextPossibleStartingPosition(inputSpan))"))
{
if (rm.MatchTimeout != Timeout.Infinite)
{
writer.WriteLine();
}
- writer.WriteLine("// If we find a match on the current position, or we have reached the end of the input, we are done.");
- using (EmitBlock(writer, $"if (TryMatchAtCurrentPosition(text) || base.runtextpos == {(!rtl ? "text.Length" : "0")})"))
+ writer.WriteLine("// If we find a match at the current position, or we have reached the end of the input, we are done.");
+ using (EmitBlock(writer, $"if (TryMatchAtCurrentPosition(inputSpan) || base.runtextpos == {(!rtl ? "inputSpan.Length" : "0")})"))
{
writer.WriteLine("return;");
}
}
/// <summary>Emits the body of the TryFindNextPossibleStartingPosition.</summary>
- private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, string id)
+ private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
{
RegexOptions options = (RegexOptions)rm.Options;
RegexTree regexTree = rm.Tree;
bool hasTextInfo = false;
- RequiredHelperFunctions requiredHelpers = RequiredHelperFunctions.None;
bool rtl = (options & RegexOptions.RightToLeft) != 0;
// In some cases, we need to emit declarations at the beginning of the method, but we only discover we need them later.
// We're done. Patch up any additional declarations.
ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent);
- return requiredHelpers;
+ return;
// Emit a goto for the specified label.
void Goto(string label) => writer.WriteLine($"goto {label};");
writer.WriteLine("// Beginning-of-line anchor");
using (EmitBlock(writer, "if (pos > 0 && inputSpan[pos - 1] != '\\n')"))
{
- writer.WriteLine("int newlinePos = global::System.MemoryExtensions.IndexOf(inputSpan.Slice(pos), '\\n');");
+ writer.WriteLine("int newlinePos = inputSpan.Slice(pos).IndexOf('\\n');");
using (EmitBlock(writer, "if ((uint)newlinePos > inputSpan.Length - pos - 1)"))
{
Goto(NoStartingPositionFound);
// Emits a case-sensitive prefix search for a string at the beginning of the pattern.
void EmitIndexOf_LeftToRight(string prefix)
{
- writer.WriteLine($"int i = global::System.MemoryExtensions.IndexOf(inputSpan.Slice(pos), {Literal(prefix)});");
+ writer.WriteLine($"int i = inputSpan.Slice(pos).IndexOf({Literal(prefix)});");
writer.WriteLine("if (i >= 0)");
writer.WriteLine("{");
writer.WriteLine(" base.runtextpos = pos + i;");
// Emits a case-sensitive right-to-left prefix search for a string at the beginning of the pattern.
void EmitIndexOf_RightToLeft(string prefix)
{
- writer.WriteLine($"pos = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice(0, pos), {Literal(prefix)});");
+ writer.WriteLine($"pos = inputSpan.Slice(0, pos).LastIndexOf({Literal(prefix)});");
writer.WriteLine("if (pos >= 0)");
writer.WriteLine("{");
writer.WriteLine($" base.runtextpos = pos + {prefix.Length};");
FinishEmitScope loopBlock = default;
if (needLoop)
{
- writer.WriteLine("global::System.ReadOnlySpan<char> span = inputSpan.Slice(pos);");
+ writer.WriteLine("ReadOnlySpan<char> span = inputSpan.Slice(pos);");
string upperBound = "span.Length" + (setsToUse > 1 || primarySet.Distance != 0 ? $" - {minRequiredLength - 1}" : "");
loopBlock = EmitBlock(writer, $"for (int i = 0; i < {upperBound}; i++)");
}
string indexOf = primarySet.Chars!.Length switch
{
- 1 => $"global::System.MemoryExtensions.IndexOf({span}, {Literal(primarySet.Chars[0])})",
- 2 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])})",
- 3 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
- _ => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(new string(primarySet.Chars))})",
+ 1 => $"{span}.IndexOf({Literal(primarySet.Chars[0])})",
+ 2 => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])})",
+ 3 => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
+ _ => $"{span}.IndexOfAny({Literal(new string(primarySet.Chars))})",
};
if (needLoop)
for (; setIndex < setsToUse; setIndex++)
{
string spanIndex = $"span[i{(sets[setIndex].Distance > 0 ? $" + {sets[setIndex].Distance}" : "")}]";
- string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, sets[setIndex].Set, sets[setIndex].CaseInsensitive, negate: false, additionalDeclarations, ref requiredHelpers);
+ string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, sets[setIndex].Set, sets[setIndex].CaseInsensitive, negate: false, additionalDeclarations, requiredHelpers);
if (setIndex == start)
{
if (set.Chars is { Length: 1 } && !set.CaseInsensitive)
{
- writer.WriteLine($"pos = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice(0, pos), {Literal(set.Chars[0])});");
+ writer.WriteLine($"pos = inputSpan.Slice(0, pos).LastIndexOf({Literal(set.Chars[0])});");
writer.WriteLine("if (pos >= 0)");
writer.WriteLine("{");
writer.WriteLine(" base.runtextpos = pos + 1;");
{
using (EmitBlock(writer, "while ((uint)--pos < (uint)inputSpan.Length)"))
{
- using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "inputSpan[pos]", set.Set, set.CaseInsensitive, negate: false, additionalDeclarations, ref requiredHelpers)})"))
+ using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "inputSpan[pos]", set.Set, set.CaseInsensitive, negate: false, additionalDeclarations, requiredHelpers)})"))
{
writer.WriteLine("base.runtextpos = pos + 1;");
writer.WriteLine("return true;");
using (EmitBlock(writer, "while (true)"))
{
- writer.WriteLine($"global::System.ReadOnlySpan<char> slice = inputSpan.Slice(pos);");
+ writer.WriteLine($"ReadOnlySpan<char> slice = inputSpan.Slice(pos);");
writer.WriteLine();
// Find the literal. If we can't find it, we're done searching.
- writer.Write("int i = global::System.MemoryExtensions.");
+ writer.Write("int i = slice.");
writer.WriteLine(
- target.Literal.String is string literalString ? $"IndexOf(slice, {Literal(literalString)});" :
- target.Literal.Chars is not char[] literalChars ? $"IndexOf(slice, {Literal(target.Literal.Char)});" :
+ target.Literal.String is string literalString ? $"IndexOf({Literal(literalString)});" :
+ target.Literal.Chars is not char[] literalChars ? $"IndexOf({Literal(target.Literal.Char)});" :
literalChars.Length switch
{
- 2 => $"IndexOfAny(slice, {Literal(literalChars[0])}, {Literal(literalChars[1])});",
- 3 => $"IndexOfAny(slice, {Literal(literalChars[0])}, {Literal(literalChars[1])}, {Literal(literalChars[2])});",
- _ => $"IndexOfAny(slice, {Literal(new string(literalChars))});",
+ 2 => $"IndexOfAny({Literal(literalChars[0])}, {Literal(literalChars[1])});",
+ 3 => $"IndexOfAny({Literal(literalChars[0])}, {Literal(literalChars[1])}, {Literal(literalChars[2])});",
+ _ => $"IndexOfAny({Literal(new string(literalChars))});",
});
using (EmitBlock(writer, $"if (i < 0)"))
{
// We found the literal. Walk backwards from it finding as many matches as we can against the loop.
writer.WriteLine("int prev = i;");
- writer.WriteLine($"while ((uint)--prev < (uint)slice.Length && {MatchCharacterClass(hasTextInfo, options, "slice[prev]", target.LoopNode.Str!, caseInsensitive: false, negate: false, additionalDeclarations, ref requiredHelpers)});");
+ writer.WriteLine($"while ((uint)--prev < (uint)slice.Length && {MatchCharacterClass(hasTextInfo, options, "slice[prev]", target.LoopNode.Str!, caseInsensitive: false, negate: false, additionalDeclarations, requiredHelpers)});");
if (target.LoopNode.M > 0)
{
if (needsCulture)
{
hasTextInfo = true;
- writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
+ writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;");
}
}
}
}
/// <summary>Emits the body of the TryMatchAtCurrentPosition.</summary>
- private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, string id, AnalysisResults analysis)
+ private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, AnalysisResults analysis, Dictionary<string, string[]> requiredHelpers)
{
// In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
// version of what RegexInterpreter would process. The RegexNode tree would be turned into a series of opcodes via
RegexOptions options = (RegexOptions)rm.Options;
RegexTree regexTree = rm.Tree;
- RequiredHelperFunctions requiredHelpers = RequiredHelperFunctions.None;
// Helper to define names. Names start unadorned, but as soon as there's repetition,
// they begin to have a numbered suffix.
writer.WriteLine("base.Capture(0, start, end);");
writer.WriteLine("base.runtextpos = end;");
writer.WriteLine("return true;");
- return requiredHelpers;
+ return;
case RegexNodeKind.Empty:
// This case isn't common in production, but it's very common when first getting started with the
// it from a learning perspective, this is very common, as it's the empty string you start with.
writer.WriteLine("base.Capture(0, base.runtextpos, base.runtextpos);");
writer.WriteLine("return true;");
- return requiredHelpers;
+ return;
}
// In some cases, we need to emit declarations at the beginning of the method, but we only discover we need them later.
}
}
- return requiredHelpers;
+ return;
// Helper to create a name guaranteed to be unique within the function.
string ReserveName(string prefix)
{
if (defineLocal)
{
- writer.Write("global::System.ReadOnlySpan<char> ");
+ writer.Write("ReadOnlySpan<char> ");
}
writer.WriteLine($"{sliceSpan} = inputSpan.Slice(pos);");
}
// If we're case-sensitive, we can simply validate that the remaining length of the slice is sufficient
// to possibly match, and then do a SequenceEqual against the matched text.
writer.WriteLine($"if ({sliceSpan}.Length < matchLength || ");
- using (EmitBlock(writer, $" !global::System.MemoryExtensions.SequenceEqual(inputSpan.Slice(base.MatchIndex({capnum}), matchLength), {sliceSpan}.Slice(0, matchLength)))"))
+ using (EmitBlock(writer, $" !inputSpan.Slice(base.MatchIndex({capnum}), matchLength).SequenceEqual({sliceSpan}.Slice(0, matchLength)))"))
{
Goto(doneLabel);
}
if (node.IsSetFamily)
{
- expr = $"{MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: true, additionalDeclarations, ref requiredHelpers)}";
+ expr = $"{MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: true, additionalDeclarations, requiredHelpers)}";
}
else
{
// Emits the code to handle a boundary check on a character.
void EmitBoundary(RegexNode node)
{
- Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected type: {node.Kind}");
+ Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected kind: {node.Kind}");
- string call = node.Kind switch
+ string call;
+ if (node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary)
{
- RegexNodeKind.Boundary => "!IsBoundary",
- RegexNodeKind.NonBoundary => "IsBoundary",
- RegexNodeKind.ECMABoundary => "!IsECMABoundary",
- _ => "IsECMABoundary",
- };
-
- RequiredHelperFunctions boundaryFunctionRequired = node.Kind switch
+ call = node.Kind is RegexNodeKind.Boundary ?
+ $"!{HelpersTypeName}.IsBoundary" :
+ $"{HelpersTypeName}.IsBoundary";
+ AddIsBoundaryHelper(requiredHelpers);
+ }
+ else
{
- RegexNodeKind.Boundary or
- RegexNodeKind.NonBoundary => RequiredHelperFunctions.IsBoundary | RequiredHelperFunctions.IsWordChar, // IsBoundary internally uses IsWordChar
- _ => RequiredHelperFunctions.IsECMABoundary
- };
-
- requiredHelpers |= boundaryFunctionRequired;
+ call = node.Kind is RegexNodeKind.ECMABoundary ?
+ $"!{HelpersTypeName}.IsECMABoundary" :
+ $"{HelpersTypeName}.IsECMABoundary";
+ AddIsECMABoundaryHelper(requiredHelpers);
+ }
using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))"))
{
else
{
string sourceSpan = sliceStaticPos > 0 ? $"{sliceSpan}.Slice({sliceStaticPos})" : sliceSpan;
- string clause = $"!global::System.MemoryExtensions.StartsWith({sourceSpan}, {Literal(str)})";
+ string clause = $"!{sourceSpan}.StartsWith({Literal(str)})";
if (clauseOnly)
{
writer.Write(clause);
{
writer.WriteLine($"if ({startingPos} >= {endingPos} ||");
using (EmitBlock(writer,
- literal.Item2 is not null ? $" ({endingPos} = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice({startingPos}, global::System.Math.Min(inputSpan.Length, {endingPos} + {literal.Item2.Length - 1}) - {startingPos}), {Literal(literal.Item2)})) < 0)" :
- literal.Item3 is null ? $" ({endingPos} = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item1)})) < 0)" :
+ literal.Item2 is not null ? $" ({endingPos} = inputSpan.Slice({startingPos}, Math.Min(inputSpan.Length, {endingPos} + {literal.Item2.Length - 1}) - {startingPos}).LastIndexOf({Literal(literal.Item2)})) < 0)" :
+ literal.Item3 is null ? $" ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOf({Literal(literal.Item1)})) < 0)" :
literal.Item3.Length switch
{
- 2 => $" ({endingPos} = global::System.MemoryExtensions.LastIndexOfAny(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])})) < 0)",
- 3 => $" ({endingPos} = global::System.MemoryExtensions.LastIndexOfAny(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])}, {Literal(literal.Item3[2])})) < 0)",
- _ => $" ({endingPos} = global::System.MemoryExtensions.LastIndexOfAny(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item3)})) < 0)",
+ 2 => $" ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOfAny({Literal(literal.Item3[0])}, {Literal(literal.Item3[1])})) < 0)",
+ 3 => $" ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOfAny({Literal(literal.Item3[0])}, {Literal(literal.Item3[1])}, {Literal(literal.Item3[2])})) < 0)",
+ _ => $" ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOfAny({Literal(literal.Item3)})) < 0)",
}))
{
Goto(doneLabel);
// We can implement it to search for either that char or the literal, whichever comes first.
// If it ends up being that node.Ch, the loop fails (we're only here if we're backtracking).
writer.WriteLine(
- literal.Item2 is not null ? $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch)}, {Literal(literal.Item2[0])});" :
- literal.Item3 is null ? $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch)}, {Literal(literal.Item1)});" :
+ literal.Item2 is not null ? $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch)}, {Literal(literal.Item2[0])});" :
+ literal.Item3 is null ? $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch)}, {Literal(literal.Item1)});" :
literal.Item3.Length switch
{
- 2 => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch)}, {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])});",
- _ => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch + literal.Item3)});",
+ 2 => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch)}, {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])});",
+ _ => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch + literal.Item3)});",
});
using (EmitBlock(writer, $"if ((uint){startingPos} >= (uint){sliceSpan}.Length || {sliceSpan}[{startingPos}] == {Literal(node.Ch)})"))
{
// This lazy loop will consume all characters until the subsequent literal. If the subsequent literal
// isn't found, the loop fails. We can implement it to just search for that literal.
writer.WriteLine(
- literal2.Item2 is not null ? $"{startingPos} = global::System.MemoryExtensions.IndexOf({sliceSpan}, {Literal(literal2.Item2)});" :
- literal2.Item3 is null ? $"{startingPos} = global::System.MemoryExtensions.IndexOf({sliceSpan}, {Literal(literal2.Item1)});" :
+ literal2.Item2 is not null ? $"{startingPos} = {sliceSpan}.IndexOf({Literal(literal2.Item2)});" :
+ literal2.Item3 is null ? $"{startingPos} = {sliceSpan}.IndexOf({Literal(literal2.Item1)});" :
literal2.Item3.Length switch
{
- 2 => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])});",
- 3 => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])}, {Literal(literal2.Item3[2])});",
- _ => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(literal2.Item3)});",
+ 2 => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])});",
+ 3 => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])}, {Literal(literal2.Item3[2])});",
+ _ => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(literal2.Item3)});",
});
using (EmitBlock(writer, $"if ({startingPos} < 0)"))
{
}
string repeaterSpan = "repeaterSlice"; // As this repeater doesn't wrap arbitrary node emits, this shouldn't conflict with anything
- writer.WriteLine($"global::System.ReadOnlySpan<char> {repeaterSpan} = {sliceSpan}.Slice({sliceStaticPos}, {iterations});");
+ writer.WriteLine($"ReadOnlySpan<char> {repeaterSpan} = {sliceSpan}.Slice({sliceStaticPos}, {iterations});");
using (EmitBlock(writer, $"for (int i = 0; i < {repeaterSpan}.Length; i++)"))
{
EmitTimeoutCheck(writer, hasTimeout);
string expr = $"inputSpan[pos - {iterationLocal} - 1]";
if (node.IsSetFamily)
{
- expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, ref requiredHelpers);
+ expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, requiredHelpers);
}
else
{
// restriction is purely for simplicity; it could be removed in the future with additional code to
// handle the unbounded case.
- writer.Write($"int {iterationLocal} = global::System.MemoryExtensions.IndexOf({sliceSpan}");
+ writer.Write($"int {iterationLocal} = {sliceSpan}");
if (sliceStaticPos > 0)
{
writer.Write($".Slice({sliceStaticPos})");
}
- writer.WriteLine($", {Literal(node.Ch)});");
+ writer.WriteLine($".IndexOf({Literal(node.Ch)});");
using (EmitBlock(writer, $"if ({iterationLocal} < 0)"))
{
// As with the notoneloopatomic above, the unbounded constraint is purely for simplicity.
Debug.Assert(numSetChars > 1);
- writer.Write($"int {iterationLocal} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}");
+ writer.Write($"int {iterationLocal} = {sliceSpan}");
if (sliceStaticPos != 0)
{
writer.Write($".Slice({sliceStaticPos})");
}
writer.WriteLine(numSetChars switch
{
- 2 => $", {Literal(setChars[0])}, {Literal(setChars[1])});",
- 3 => $", {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});",
- _ => $", {Literal(setChars.Slice(0, numSetChars).ToString())});",
+ 2 => $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])});",
+ 3 => $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});",
+ _ => $".IndexOfAny({Literal(setChars.Slice(0, numSetChars).ToString())});",
});
using (EmitBlock(writer, $"if ({iterationLocal} < 0)"))
{
string expr = $"{sliceSpan}[{iterationLocal}]";
if (node.IsSetFamily)
{
- expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, ref requiredHelpers);
+ expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, requiredHelpers);
}
else
{
if (node.IsSetFamily)
{
- expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, ref requiredHelpers);
+ expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, requiredHelpers);
}
else
{
// Emits code to unwind the capture stack until the crawl position specified in the provided local.
void EmitUncaptureUntil(string capturepos)
{
- string name = "UncaptureUntil";
+ const string UncaptureUntil = nameof(UncaptureUntil);
- if (!additionalLocalFunctions.ContainsKey(name))
+ if (!additionalLocalFunctions.ContainsKey(UncaptureUntil))
{
- var lines = new string[9];
- lines[0] = "// <summary>Undo captures until we reach the specified capture position.</summary>";
- lines[1] = "[global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]";
- lines[2] = $"void {name}(int capturepos)";
- lines[3] = "{";
- lines[4] = " while (base.Crawlpos() > capturepos)";
- lines[5] = " {";
- lines[6] = " base.Uncapture();";
- lines[7] = " }";
- lines[8] = "}";
-
- additionalLocalFunctions.Add(name, lines);
+ additionalLocalFunctions.Add(UncaptureUntil, new string[]
+ {
+ $"// <summary>Undo captures until it reaches the specified capture position.</summary>",
+ $"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+ $"void {UncaptureUntil}(int capturePosition)",
+ $"{{",
+ $" while (base.Crawlpos() > capturePosition)",
+ $" {{",
+ $" base.Uncapture();",
+ $" }}",
+ $"}}",
+ });
}
- writer.WriteLine($"{name}({capturepos});");
+ writer.WriteLine($"{UncaptureUntil}({capturepos});");
}
/// <summary>Pushes values on to the backtracking stack.</summary>
void EmitStackPush(params string[] args)
{
Debug.Assert(args.Length is >= 1);
- string function = $"StackPush{args.Length}";
+ string methodName = $"StackPush{args.Length}";
additionalDeclarations.Add("int stackpos = 0;");
- if (!additionalLocalFunctions.ContainsKey(function))
+ if (!requiredHelpers.ContainsKey(methodName))
{
var lines = new string[24 + args.Length];
- lines[0] = $"// <summary>Push {args.Length} value{(args.Length == 1 ? "" : "s")} onto the backtracking stack.</summary>";
- lines[1] = $"[global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]";
- lines[2] = $"static void {function}(ref int[] stack, ref int pos{FormatN(", int arg{0}", args.Length)})";
+ lines[0] = $"// <summary>Pushes {args.Length} value{(args.Length == 1 ? "" : "s")} onto the backtracking stack.</summary>";
+ lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]";
+ lines[2] = $"internal static void {methodName}(ref int[] stack, ref int pos{FormatN(", int arg{0}", args.Length)})";
lines[3] = $"{{";
lines[4] = $" // If there's space available for {(args.Length > 1 ? $"all {args.Length} values, store them" : "the value, store it")}.";
lines[5] = $" int[] s = stack;";
lines[14 + args.Length] = $" WithResize(ref stack, ref pos{FormatN(", arg{0}", args.Length)});";
lines[15 + args.Length] = $"";
lines[16 + args.Length] = $" // <summary>Resize the backtracking stack array and push {args.Length} value{(args.Length == 1 ? "" : "s")} onto the stack.</summary>";
- lines[17 + args.Length] = $" [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.NoInlining)]";
+ lines[17 + args.Length] = $" [MethodImpl(MethodImplOptions.NoInlining)]";
lines[18 + args.Length] = $" static void WithResize(ref int[] stack, ref int pos{FormatN(", int arg{0}", args.Length)})";
lines[19 + args.Length] = $" {{";
- lines[20 + args.Length] = $" global::System.Array.Resize(ref stack, (pos + {args.Length - 1}) * 2);";
- lines[21 + args.Length] = $" {function}(ref stack, ref pos{FormatN(", arg{0}", args.Length)});";
+ lines[20 + args.Length] = $" Array.Resize(ref stack, (pos + {args.Length - 1}) * 2);";
+ lines[21 + args.Length] = $" {methodName}(ref stack, ref pos{FormatN(", arg{0}", args.Length)});";
lines[22 + args.Length] = $" }}";
lines[23 + args.Length] = $"}}";
- additionalLocalFunctions.Add(function, lines);
+ requiredHelpers.Add(methodName, lines);
}
- writer.WriteLine($"{function}(ref base.runstack!, ref stackpos, {string.Join(", ", args)});");
+ writer.WriteLine($"{HelpersTypeName}.{methodName}(ref base.runstack!, ref stackpos, {string.Join(", ", args)});");
}
/// <summary>Pops values from the backtracking stack into the specified locations.</summary>
return;
}
- string function = $"StackPop{args.Length}";
+ string methodName = $"StackPop{args.Length}";
- if (!additionalLocalFunctions.ContainsKey(function))
+ if (!requiredHelpers.ContainsKey(methodName))
{
var lines = new string[5 + args.Length];
- lines[0] = $"// <summary>Pop {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack.</summary>";
- lines[1] = $"[global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]";
- lines[2] = $"static void {function}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})";
+ lines[0] = $"// <summary>Pops {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack.</summary>";
+ lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]";
+ lines[2] = $"internal static void {methodName}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})";
lines[3] = $"{{";
for (int i = 0; i < args.Length; i++)
{
}
lines[4 + args.Length] = $"}}";
- additionalLocalFunctions.Add(function, lines);
+ requiredHelpers.Add(methodName, lines);
}
- writer.WriteLine($"{function}(base.runstack, ref stackpos, out {string.Join(", out ", args)});");
+ writer.WriteLine($"{HelpersTypeName}.{methodName}(base.runstack, ref stackpos, out {string.Join(", out ", args)});");
}
/// <summary>Expression for popping the next item from the backtracking stack.</summary>
{
if (analysis.HasIgnoreCase && ((RegexOptions)rm.Options & RegexOptions.CultureInvariant) == 0)
{
- writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
+ writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;");
return true;
}
private static string ToLowerIfNeeded(bool hasTextInfo, RegexOptions options, string expression, bool toLower) => toLower ? ToLower(hasTextInfo, options, expression) : expression;
- private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options, string chExpr, string charClass, bool caseInsensitive, bool negate, HashSet<string> additionalDeclarations, ref RequiredHelperFunctions requiredHelpers)
+ private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options, string chExpr, string charClass, bool caseInsensitive, bool negate, HashSet<string> additionalDeclarations, Dictionary<string, string[]> requiredHelpers)
{
// We need to perform the equivalent of calling RegexRunner.CharInClass(ch, charClass),
// but that call is relatively expensive. Before we fall back to it, we try to optimize
case RegexCharClass.WordClass:
case RegexCharClass.NotWordClass:
- requiredHelpers |= RequiredHelperFunctions.IsWordChar;
+ AddIsWordCharHelper(requiredHelpers);
negate ^= charClass == RegexCharClass.NotWordClass;
- return $"{(negate ? "!" : "")}IsWordChar({chExpr})";
+ return $"{(negate ? "!" : "")}{HelpersTypeName}.IsWordChar({chExpr})";
}
// If we're meant to be doing a case-insensitive lookup, and if we're not using the invariant culture,
if (!invariant && RegexCharClass.TryGetSingleUnicodeCategory(charClass, out UnicodeCategory category, out bool negated))
{
negate ^= negated;
- return $"(char.GetUnicodeCategory({chExpr}) {(negate ? "!=" : "==")} global::System.Globalization.UnicodeCategory.{category})";
+ return $"(char.GetUnicodeCategory({chExpr}) {(negate ? "!=" : "==")} UnicodeCategory.{category})";
}
// Next, if there's only 2 or 3 chars in the set (fairly common due to the sets we create for prefixes),
// extend the analysis to produce a known lower-bound and compare against
// that rather than always using 128 as the pivot point.)
return negate ?
- $"((ch = {chExpr}) < 128 || !global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
- $"((ch = {chExpr}) >= 128 && global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
+ $"((ch = {chExpr}) < 128 || !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
+ $"((ch = {chExpr}) >= 128 && RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
}
if (analysis.AllAsciiContained)
// if the class were the negated example from case 1 above:
// [^\p{IsGreek}\p{IsGreekExtended}].
return negate ?
- $"((ch = {chExpr}) >= 128 && !global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
- $"((ch = {chExpr}) < 128 || global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
+ $"((ch = {chExpr}) >= 128 && !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
+ $"((ch = {chExpr}) < 128 || RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
}
}
// were [\w\d], so since ch >= 128, we need to fall back to calling CharInClass.
return (negate, invariant) switch
{
- (false, false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
- (true, false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
- (false, true) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : global::System.Text.RegularExpressions.RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
- (true, true) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !global::System.Text.RegularExpressions.RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
+ (false, false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
+ (true, false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
+ (false, true) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
+ (true, true) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
};
}
// The options were formatted as an int, which means the runtime couldn't
// produce a textual representation. So just output casting the value as an int.
Debug.Fail("This shouldn't happen, as we should only get to the point of emitting code if RegexOptions was valid.");
- return $"(global::System.Text.RegularExpressions.RegexOptions)({(int)options})";
+ return $"(RegexOptions)({(int)options})";
}
// Parse the runtime-generated "Option1, Option2" into each piece and then concat
string[] parts = s.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
for (int i = 0; i < parts.Length; i++)
{
- parts[i] = "global::System.Text.RegularExpressions.RegexOptions." + parts[i].Trim();
+ parts[i] = "RegexOptions." + parts[i].Trim();
}
return string.Join(" | ", parts);
}
}
}
}
-
- /// <summary>Bit flags indicating which additional helpers should be emitted into the regex class.</summary>
- [Flags]
- private enum RequiredHelperFunctions
- {
- /// <summary>No additional functions are required.</summary>
- None = 0b0,
- /// <summary>The IsWordChar helper is required.</summary>
- IsWordChar = 0b1,
- /// <summary>The IsBoundary helper is required.</summary>
- IsBoundary = 0b10,
- /// <summary>The IsECMABoundary helper is required.</summary>
- IsECMABoundary = 0b100
- }
}
}
// The .NET Foundation licenses this file to you under the MIT license.
using System;
+using System.CodeDom.Compiler;
using System.Collections.Generic;
using System.Collections.Immutable;
using System.Diagnostics;
using System.Diagnostics.CodeAnalysis;
using System.Diagnostics.Tracing;
+using System.IO;
using System.Linq;
using System.Runtime.CompilerServices;
using System.Text;
[Generator(LanguageNames.CSharp)]
public partial class RegexGenerator : IIncrementalGenerator
{
- public void Initialize(IncrementalGeneratorInitializationContext context)
+ /// <summary>Name of the type emitted to contain helpers used by the generated code.</summary>
+ private const string HelpersTypeName = "Utilities";
+ /// <summary>Namespace containing all the generated code.</summary>
+ private const string GeneratedNamespace = "System.Text.RegularExpressions.Generated";
+ /// <summary>Code for a [GeneratedCode] attribute to put on the top-level generated members.</summary>
+ private static readonly string s_generatedCodeAttribute = $"GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")";
+ /// <summary>Header comments and usings to include at the top of every generated file.</summary>
+ private static readonly string[] s_headers = new string[]
{
- // To avoid invalidating the generator's output when anything from the compilation
- // changes, we will extract from it the only thing we care about: whether unsafe
- // code is allowed.
- IncrementalValueProvider<bool> allowUnsafeProvider =
- context.CompilationProvider
- .Select((x, _) => x.Options is CSharpCompilationOptions { AllowUnsafe: true });
+ "// <auto-generated/>",
+ "#nullable enable",
+ "#pragma warning disable CS0162 // Unreachable code",
+ "#pragma warning disable CS0164 // Unreferenced label",
+ "#pragma warning disable CS0219 // Variable assigned but never used",
+ };
- // Contains one entry per regex method, either the generated code for that regex method,
- // a diagnostic to fail with, or null if no action should be taken for that regex.
+ public void Initialize(IncrementalGeneratorInitializationContext context)
+ {
+ // Produces one entry per generated regex. This may be:
+ // - Diagnostic in the case of a failure that should end the compilation
+ // - (RegexMethod regexMethod, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers) in the case of valid regex
+ // - (RegexMethod regexMethod, string reason, Diagnostic diagnostic) in the case of a limited-support regex
IncrementalValueProvider<ImmutableArray<object?>> codeOrDiagnostics =
context.SyntaxProvider
- // Find all MethodDeclarationSyntax nodes attributed with RegexGenerator and gather the required information
+ // Find all MethodDeclarationSyntax nodes attributed with RegexGenerator and gather the required information.
.CreateSyntaxProvider(IsSyntaxTargetForGeneration, GetSemanticTargetForGeneration)
.Where(static m => m is not null)
- // Pair each with whether unsafe code is allowed
- .Combine(allowUnsafeProvider)
-
- // Get the resulting code string or error Diagnostic for
- // each MethodDeclarationSyntax/allow-unsafe-blocks pair
+ // Generate the RunnerFactory for each regex, if possible. This is where the bulk of the implementation occurs.
.Select((state, _) =>
{
- Debug.Assert(state.Left is not null);
- return state.Left is RegexType regexType ? EmitRegexType(regexType, state.Right) : state.Left;
+ if (state is not RegexMethod regexMethod)
+ {
+ Debug.Assert(state is Diagnostic);
+ return state;
+ }
+
+ // If we're unable to generate a full implementation for this regex, report a diagnostic.
+ // We'll still output a limited implementation that just caches a new Regex(...).
+ if (!regexMethod.Tree.Root.SupportsCompilation(out string? reason))
+ {
+ return (regexMethod, reason, Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, regexMethod.MethodSyntax.GetLocation()));
+ }
+
+ // Generate the core logic for the regex.
+ Dictionary<string, string[]> requiredHelpers = new();
+ var sw = new StringWriter();
+ var writer = new IndentedTextWriter(sw);
+ writer.Indent += 3;
+ writer.WriteLine();
+ EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers);
+ writer.Indent -= 3;
+ return (regexMethod, sw.ToString(), requiredHelpers);
})
.Collect();
+ // To avoid invalidating every regex's output when anything from the compilation changes,
+ // we extract from it the only things we care about: whether unsafe code is allowed,
+ // and a name based on the assembly's name, and only that information is then fed into
+ // RegisterSourceOutput along with all of the cached generated data from each regex.
+ IncrementalValueProvider<(bool AllowUnsafe, string? AssemblyName)> compilationDataProvider =
+ context.CompilationProvider
+ .Select((x, _) => (x.Options is CSharpCompilationOptions { AllowUnsafe: true }, x.AssemblyName));
+
// When there something to output, take all the generated strings and concatenate them to output,
// and raise all of the created diagnostics.
- context.RegisterSourceOutput(codeOrDiagnostics, static (context, results) =>
+ context.RegisterSourceOutput(codeOrDiagnostics.Combine(compilationDataProvider), static (context, compilationDataAndResults) =>
{
- var code = new List<string>(s_headers.Length + results.Length);
-
- // Add file header and required usings
- code.AddRange(s_headers);
+ ImmutableArray<object?> results = compilationDataAndResults.Left;
+ // Report any top-level diagnostics.
+ bool allFailures = true;
foreach (object? result in results)
{
- switch (result)
+ if (result is Diagnostic d)
{
- case Diagnostic d:
- context.ReportDiagnostic(d);
- break;
+ context.ReportDiagnostic(d);
+ }
+ else
+ {
+ allFailures = false;
+ }
+ }
+ if (allFailures)
+ {
+ return;
+ }
+
+ // At this point we'll be emitting code. Create a writer to hold it all.
+ var sw = new StringWriter();
+ IndentedTextWriter writer = new(sw);
+
+ // Add file headers and required usings.
+ foreach (string header in s_headers)
+ {
+ writer.WriteLine(header);
+ }
+ writer.WriteLine();
- case ValueTuple<string, ImmutableArray<Diagnostic>> t:
- code.Add(t.Item1);
- foreach (Diagnostic d in t.Item2)
+ // For every generated type, we give it an incrementally increasing ID, in order to create
+ // unique type names even in situations where method names were the same, while also keeping
+ // the type names short. Note that this is why we only generate the RunnerFactory implementations
+ // earlier in the pipeline... we want to avoid generating code that relies on the class names
+ // until we're able to iterate through them linearly keeping track of a deterministic ID
+ // used to name them. The boilerplate code generation that happens here is minimal when compared to
+ // the work required to generate the actual matching code for the regex.
+ int id = 0;
+ string generatedClassName = $"__{ComputeStringHash(compilationDataAndResults.Right.AssemblyName ?? ""):x}";
+
+ // If we have any (RegexMethod regexMethod, string generatedName, string reason, Diagnostic diagnostic), these are regexes for which we have
+ // limited support and need to simply output boilerplate. We need to emit their diagnostics.
+ // If we have any (RegexMethod regexMethod, string generatedName, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers),
+ // those are generated implementations to be emitted. We need to gather up their required helpers.
+ Dictionary<string, string[]> requiredHelpers = new();
+ foreach (object? result in results)
+ {
+ RegexMethod? regexMethod = null;
+ if (result is ValueTuple<RegexMethod, string, Diagnostic> limitedSupportResult)
+ {
+ context.ReportDiagnostic(limitedSupportResult.Item3);
+ regexMethod = limitedSupportResult.Item1;
+ }
+ else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
+ {
+ foreach (KeyValuePair<string, string[]> helper in regexImpl.Item3)
+ {
+ if (!requiredHelpers.ContainsKey(helper.Key))
{
- context.ReportDiagnostic(d);
+ requiredHelpers.Add(helper.Key, helper.Value);
}
- break;
+ }
+
+ regexMethod = regexImpl.Item1;
+ }
+
+ if (regexMethod is not null)
+ {
+ regexMethod.GeneratedId = id++;
+ EmitRegexPartialMethod(regexMethod, writer, generatedClassName);
+ writer.WriteLine();
+ }
+ }
+
+ // At this point we've emitted all the partial method definitions, but we still need to emit the actual regex-derived implementations.
+ // These are all emitted inside of our generated class.
+ // TODO https://github.com/dotnet/csharplang/issues/5529:
+ // When C# provides a mechanism for shielding generated code from the rest of the project, it should be employed
+ // here for the generated class. At that point, the generated class wrapper can be removed, and all of the types
+ // generated inside of it (one for each regex as well as the helpers type) should be shielded.
+
+ writer.WriteLine($"namespace {GeneratedNamespace}");
+ writer.WriteLine($"{{");
+
+ // We emit usings here now that we're inside of a namespace block and are no longer emitting code into
+ // a user's partial type. We can now rely on binding rules mapping to these usings and don't need to
+ // use global-qualified names for the rest of the implementation.
+ writer.WriteLine($" using System;");
+ writer.WriteLine($" using System.CodeDom.Compiler;");
+ writer.WriteLine($" using System.Collections;");
+ writer.WriteLine($" using System.ComponentModel;");
+ writer.WriteLine($" using System.Globalization;");
+ writer.WriteLine($" using System.Runtime.CompilerServices;");
+ writer.WriteLine($" using System.Text.RegularExpressions;");
+ writer.WriteLine($" using System.Threading;");
+ writer.WriteLine($"");
+ if (compilationDataAndResults.Right.AllowUnsafe)
+ {
+ writer.WriteLine($" [SkipLocalsInit]");
+ }
+ writer.WriteLine($" [{s_generatedCodeAttribute}]");
+ writer.WriteLine($" [EditorBrowsable(EditorBrowsableState.Never)]");
+ writer.WriteLine($" internal static class {generatedClassName}");
+ writer.WriteLine($" {{");
+
+ // Emit each Regex-derived type.
+ writer.Indent += 2;
+ foreach (object? result in results)
+ {
+ if (result is ValueTuple<RegexMethod, string, Diagnostic> limitedSupportResult)
+ {
+ EmitRegexLimitedBoilerplate(writer, limitedSupportResult.Item1, limitedSupportResult.Item1.GeneratedId, limitedSupportResult.Item2);
+ writer.WriteLine();
+ }
+ else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
+ {
+ EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item1.GeneratedId, regexImpl.Item2);
+ writer.WriteLine();
+ }
+ }
+ writer.Indent -= 2;
+
+ // If any of the Regex-derived types asked for helper methods, emit those now.
+ if (requiredHelpers.Count != 0)
+ {
+ writer.Indent += 2;
+ writer.WriteLine($"private static class {HelpersTypeName}");
+ writer.WriteLine($"{{");
+ writer.Indent++;
+ foreach (KeyValuePair<string, string[]> helper in requiredHelpers)
+ {
+ foreach (string value in helper.Value)
+ {
+ writer.WriteLine(value);
+ }
+ writer.WriteLine();
}
+ writer.Indent--;
+ writer.WriteLine($"}}");
+ writer.Indent -= 2;
}
- context.AddSource("RegexGenerator.g.cs", string.Join(Environment.NewLine, code));
+ writer.WriteLine($" }}");
+ writer.WriteLine($"}}");
+
+ // Save out the source
+ context.AddSource("RegexGenerator.g.cs", sw.ToString());
});
}
+
+ /// <summary>Computes a hash of the string.</summary>
+ /// <remarks>
+ /// Currently an FNV-1a hash function. The actual algorithm used doesn't matter; just something
+ /// simple to create a deterministic, pseudo-random value that's based on input text.
+ /// </remarks>
+ private static uint ComputeStringHash(string s)
+ {
+ uint hashCode = 2166136261;
+ foreach (char c in s)
+ {
+ hashCode = (c ^ hashCode) * 16777619;
+ }
+ return hashCode;
+ }
}
}