Overhaul how regex generated code is structured (#66432)
authorStephen Toub <stoub@microsoft.com>
Thu, 10 Mar 2022 23:42:19 +0000 (18:42 -0500)
committerGitHub <noreply@github.com>
Thu, 10 Mar 2022 23:42:19 +0000 (18:42 -0500)
* Overhaul how regex generated code is structured

* Address PR feedback

* Change parser to return RegexMethod

Clean up how the data is structured.

* Add a test for same method name in multiple types

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs
src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.cs
src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorHelper.netcoreapp.cs
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs

index 88812c2..d7f11af 100644 (file)
@@ -10,12 +10,13 @@ using System.Diagnostics;
 using System.Globalization;
 using System.IO;
 using System.Linq;
+using System.Net.Cache;
 using System.Runtime.InteropServices;
 using System.Threading;
 using Microsoft.CodeAnalysis;
 using Microsoft.CodeAnalysis.CSharp;
 
-// NOTE: The logic in this file is largely a copy of logic in RegexCompiler, emitting C# instead of MSIL.
+// NOTE: The logic in this file is largely a duplicate of logic in RegexCompiler, emitting C# instead of MSIL.
 // Most changes made to this file should be kept in sync, so far as bug fixes and relevant optimizations
 // are concerned.
 
@@ -23,40 +24,24 @@ namespace System.Text.RegularExpressions.Generator
 {
     public partial class RegexGenerator
     {
-        /// <summary>Code for a [GeneratedCode] attribute to put on the top-level generated members.</summary>
-        private static readonly string s_generatedCodeAttribute = $"[global::System.CodeDom.Compiler.GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")]";
-        /// <summary>Header comments and usings to include at the top of every generated file.</summary>
-        private static readonly string[] s_headers = new string[]
+        /// <summary>Emits the definition of the partial method. This method just delegates to the property cache on the generated Regex-derived type.</summary>
+        private static void EmitRegexPartialMethod(RegexMethod regexMethod, IndentedTextWriter writer, string generatedClassName)
         {
-            "// <auto-generated/>",
-            "#nullable enable",
-            "#pragma warning disable CS0162 // Unreachable code",
-            "#pragma warning disable CS0164 // Unreferenced label",
-            "#pragma warning disable CS0219 // Variable assigned but never used",
-            "",
-        };
-
-        /// <summary>Generates the code for one regular expression class.</summary>
-        private static (string, ImmutableArray<Diagnostic>) EmitRegexType(RegexType regexClass, bool allowUnsafe)
-        {
-            var sb = new StringBuilder(1024);
-            var writer = new IndentedTextWriter(new StringWriter(sb));
-
-            // Emit the namespace
-            if (!string.IsNullOrWhiteSpace(regexClass.Namespace))
+            // Emit the namespace.
+            RegexType? parent = regexMethod.DeclaringType;
+            if (!string.IsNullOrWhiteSpace(parent.Namespace))
             {
-                writer.WriteLine($"namespace {regexClass.Namespace}");
+                writer.WriteLine($"namespace {parent.Namespace}");
                 writer.WriteLine("{");
                 writer.Indent++;
             }
 
-            // Emit containing types
-            RegexType? parent = regexClass.ParentClass;
+            // Emit containing types.
             var parentClasses = new Stack<string>();
             while (parent is not null)
             {
                 parentClasses.Push($"partial {parent.Keyword} {parent.Name}");
-                parent = parent.ParentClass;
+                parent = parent.Parent;
             }
             while (parentClasses.Count != 0)
             {
@@ -65,92 +50,59 @@ namespace System.Text.RegularExpressions.Generator
                 writer.Indent++;
             }
 
-            // Emit the direct parent type
-            writer.WriteLine($"partial {regexClass.Keyword} {regexClass.Name}");
-            writer.WriteLine("{");
-            writer.Indent++;
-
-            // Generate a name to describe the regex instance.  This includes the method name
-            // the user provided and a non-randomized (for determinism) hash of it to try to make
-            // the name that much harder to predict.
-            Debug.Assert(regexClass.Method is not null);
-            string generatedName = $"GeneratedRegex_{regexClass.Method.MethodName}_";
-            generatedName += ComputeStringHash(generatedName).ToString("X");
-
-            // Generate the regex type
-            ImmutableArray<Diagnostic> diagnostics = EmitRegexMethod(writer, regexClass.Method, generatedName, allowUnsafe);
+            // Emit the partial method definition.
+            writer.WriteLine($"[global::System.CodeDom.Compiler.{s_generatedCodeAttribute}]");
+            writer.WriteLine($"{regexMethod.Modifiers} global::System.Text.RegularExpressions.Regex {regexMethod.MethodName}() => global::{GeneratedNamespace}.{generatedClassName}.{regexMethod.GeneratedName}.Instance;");
 
+            // Unwind all scopes
             while (writer.Indent != 0)
             {
                 writer.Indent--;
                 writer.WriteLine("}");
             }
-
-            writer.Flush();
-            return (sb.ToString(), diagnostics);
-
-            // FNV-1a hash function.  The actual algorithm used doesn't matter; just something simple
-            // to create a deterministic, pseudo-random value that's based on input text.
-            static uint ComputeStringHash(string s)
-            {
-                uint hashCode = 2166136261;
-                foreach (char c in s)
-                {
-                    hashCode = (c ^ hashCode) * 16777619;
-                }
-                return hashCode;
-            }
         }
 
-        /// <summary>Generates the code for a regular expression method.</summary>
-        private static ImmutableArray<Diagnostic> EmitRegexMethod(IndentedTextWriter writer, RegexMethod rm, string id, bool allowUnsafe)
+        /// <summary>Emits the Regex-derived type for a method where we're unable to generate custom code.</summary>
+        private static void EmitRegexLimitedBoilerplate(
+            IndentedTextWriter writer, RegexMethod rm, int id, string reason)
         {
-            string patternExpression = Literal(rm.Pattern);
-            string optionsExpression = Literal(rm.Options);
-            string timeoutExpression = rm.MatchTimeout == Timeout.Infinite ?
-                "global::System.Threading.Timeout.InfiniteTimeSpan" :
-                $"global::System.TimeSpan.FromMilliseconds({rm.MatchTimeout.ToString(CultureInfo.InvariantCulture)})";
-
-            writer.WriteLine(s_generatedCodeAttribute);
-            writer.WriteLine($"{rm.Modifiers} global::System.Text.RegularExpressions.Regex {rm.MethodName}() => {id}.Instance;");
-            writer.WriteLine();
-            writer.WriteLine(s_generatedCodeAttribute);
-            writer.WriteLine("[global::System.ComponentModel.EditorBrowsable(global::System.ComponentModel.EditorBrowsableState.Never)]");
-            writer.WriteLine($"{(writer.Indent != 0 ? "private" : "internal")} sealed class {id} : global::System.Text.RegularExpressions.Regex");
-            writer.WriteLine("{");
-            writer.Write("    public static global::System.Text.RegularExpressions.Regex Instance { get; } = ");
-
-            // If we can't support custom generation for this regex, spit out a Regex constructor call.
-            if (!rm.Tree.Root.SupportsCompilation(out string? reason))
-            {
-                writer.WriteLine();
-                writer.WriteLine($"        // Cannot generate Regex-derived implementation because {reason}.");
-                writer.WriteLine($"        new global::System.Text.RegularExpressions.Regex({patternExpression}, {optionsExpression}, {timeoutExpression});");
-                writer.WriteLine("}");
-                return ImmutableArray.Create(Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, rm.MethodSyntax.GetLocation()));
-            }
-
-            AnalysisResults analysis = RegexTreeAnalyzer.Analyze(rm.Tree);
+            writer.WriteLine($"/// <summary>Caches a <see cref=\"Regex\"/> instance for the {rm.MethodName} method.</summary>");
+            writer.WriteLine($"/// <remarks>A custom Regex-derived type could not be generated because {reason}.</remarks>");
+            writer.WriteLine($"internal sealed class {rm.GeneratedName} : Regex");
+            writer.WriteLine($"{{");
+            writer.WriteLine($"    /// <summary>Cached, thread-safe singleton instance.</summary>");
+            writer.WriteLine($"    internal static Regex Instance {{ get; }} = new({Literal(rm.Pattern)}, {Literal(rm.Options)}, {GetTimeoutExpression(rm.MatchTimeout)});");
+            writer.WriteLine($"}}");
+        }
 
-            writer.WriteLine($"new {id}();");
-            writer.WriteLine();
-            writer.WriteLine($"    private {id}()");
+        /// <summary>Emits the Regex-derived type for a method whose RunnerFactory implementation was generated into <paramref name="runnerFactoryImplementation"/>.</summary>
+        private static void EmitRegexDerivedImplementation(
+            IndentedTextWriter writer, RegexMethod rm, int id, string runnerFactoryImplementation)
+        {
+            writer.WriteLine($"/// <summary>Custom <see cref=\"Regex\"/>-derived type for the {rm.MethodName} method.</summary>");
+            writer.WriteLine($"internal sealed class {rm.GeneratedName} : Regex");
+            writer.WriteLine($"{{");
+            writer.WriteLine($"    /// <summary>Cached, thread-safe singleton instance.</summary>");
+            writer.WriteLine($"    internal static {rm.GeneratedName} Instance {{ get; }} = new();");
+            writer.WriteLine($"");
+            writer.WriteLine($"    /// <summary>Initializes the instance.</summary>");
+            writer.WriteLine($"    private {rm.GeneratedName}()");
             writer.WriteLine($"    {{");
-            writer.WriteLine($"        base.pattern = {patternExpression};");
-            writer.WriteLine($"        base.roptions = {optionsExpression};");
-            writer.WriteLine($"        base.internalMatchTimeout = {timeoutExpression};");
+            writer.WriteLine($"        base.pattern = {Literal(rm.Pattern)};");
+            writer.WriteLine($"        base.roptions = {Literal(rm.Options)};");
+            writer.WriteLine($"        base.internalMatchTimeout = {GetTimeoutExpression(rm.MatchTimeout)};");
             writer.WriteLine($"        base.factory = new RunnerFactory();");
             if (rm.Tree.CaptureNumberSparseMapping is not null)
             {
-                writer.Write("        base.Caps = new global::System.Collections.Hashtable {");
+                writer.Write("        base.Caps = new Hashtable {");
                 AppendHashtableContents(writer, rm.Tree.CaptureNumberSparseMapping);
-                writer.WriteLine(};");
+                writer.WriteLine($" }};");
             }
             if (rm.Tree.CaptureNameToNumberMapping is not null)
             {
-                writer.Write("        base.CapNames = new global::System.Collections.Hashtable {");
+                writer.Write("        base.CapNames = new Hashtable {");
                 AppendHashtableContents(writer, rm.Tree.CaptureNameToNumberMapping);
-                writer.WriteLine(};");
+                writer.WriteLine($" }};");
             }
             if (rm.Tree.CaptureNames is not null)
             {
@@ -162,119 +114,12 @@ namespace System.Text.RegularExpressions.Generator
                     writer.Write(Literal(s));
                     separator = ", ";
                 }
-                writer.WriteLine(};");
+                writer.WriteLine($" }};");
             }
             writer.WriteLine($"        base.capsize = {rm.Tree.CaptureCount};");
             writer.WriteLine($"    }}");
-            writer.WriteLine("    ");
-            writer.WriteLine($"    private sealed class RunnerFactory : global::System.Text.RegularExpressions.RegexRunnerFactory");
-            writer.WriteLine($"    {{");
-            writer.WriteLine($"        protected override global::System.Text.RegularExpressions.RegexRunner CreateInstance() => new Runner();");
-            writer.WriteLine();
-            writer.WriteLine($"        private sealed class Runner : global::System.Text.RegularExpressions.RegexRunner");
-            writer.WriteLine($"        {{");
-
-            // Main implementation methods
-            writer.WriteLine("            // Description:");
-            DescribeExpression(writer, rm.Tree.Root.Child(0), "            // ", analysis); // skip implicit root capture
-            writer.WriteLine();
-
-            writer.WriteLine($"            protected override void Scan(global::System.ReadOnlySpan<char> text)");
-            writer.WriteLine($"            {{");
-            writer.Indent += 4;
-            EmitScan(writer, rm, id);
-            writer.Indent -= 4;
-            writer.WriteLine($"            }}");
-            writer.WriteLine();
-
-            writer.WriteLine($"            private bool TryFindNextPossibleStartingPosition(global::System.ReadOnlySpan<char> inputSpan)");
-            writer.WriteLine($"            {{");
-            writer.Indent += 4;
-            RequiredHelperFunctions requiredHelpers = EmitTryFindNextPossibleStartingPosition(writer, rm, id);
-            writer.Indent -= 4;
-            writer.WriteLine($"            }}");
-            writer.WriteLine();
-            if (allowUnsafe)
-            {
-                writer.WriteLine($"            [global::System.Runtime.CompilerServices.SkipLocalsInit]");
-            }
-            writer.WriteLine($"            private bool TryMatchAtCurrentPosition(global::System.ReadOnlySpan<char> inputSpan)");
-            writer.WriteLine($"            {{");
-            writer.Indent += 4;
-            requiredHelpers |= EmitTryMatchAtCurrentPosition(writer, rm, id, analysis);
-            writer.Indent -= 4;
-            writer.WriteLine($"            }}");
-
-            if ((requiredHelpers & RequiredHelperFunctions.IsWordChar) != 0)
-            {
-                writer.WriteLine();
-                writer.WriteLine($"            /// <summary>Determines whether the character is part of the [\\w] set.</summary>");
-                writer.WriteLine($"            [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
-                writer.WriteLine($"            private static bool IsWordChar(char ch)");
-                writer.WriteLine($"            {{");
-                writer.WriteLine($"                global::System.ReadOnlySpan<byte> ascii = new byte[]");
-                writer.WriteLine($"                {{");
-                writer.WriteLine($"                    0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,");
-                writer.WriteLine($"                    0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07");
-                writer.WriteLine($"                }};");
-                writer.WriteLine();
-                writer.WriteLine($"                int chDiv8 = ch >> 3;");
-                writer.WriteLine($"                return (uint)chDiv8 < (uint)ascii.Length ?");
-                writer.WriteLine($"                    (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :");
-                writer.WriteLine($"                    global::System.Globalization.CharUnicodeInfo.GetUnicodeCategory(ch) switch");
-                writer.WriteLine($"                    {{");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.UppercaseLetter or");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.LowercaseLetter or");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.TitlecaseLetter or");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.ModifierLetter or");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.OtherLetter or");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.NonSpacingMark or");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.DecimalDigitNumber or");
-                writer.WriteLine($"                        global::System.Globalization.UnicodeCategory.ConnectorPunctuation => true,");
-                writer.WriteLine($"                        _ => false,");
-                writer.WriteLine($"                    }};");
-                writer.WriteLine($"            }}");
-            }
-
-            if ((requiredHelpers & RequiredHelperFunctions.IsBoundary) != 0)
-            {
-                writer.WriteLine();
-                writer.WriteLine($"            /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
-                writer.WriteLine($"            [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
-                writer.WriteLine($"            private static bool IsBoundary(global::System.ReadOnlySpan<char> inputSpan, int index)");
-                writer.WriteLine($"            {{");
-                writer.WriteLine($"                int indexM1 = index - 1;");
-                writer.WriteLine($"                return ((uint)indexM1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexM1])) !=");
-                writer.WriteLine($"                       ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));");
-                writer.WriteLine();
-                writer.WriteLine($"                static bool IsBoundaryWordChar(char ch) =>");
-                writer.WriteLine($"                    IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');");
-                writer.WriteLine($"            }}");
-            }
-
-            if ((requiredHelpers & RequiredHelperFunctions.IsECMABoundary) != 0)
-            {
-                writer.WriteLine();
-                writer.WriteLine($"            /// <summary>Determines whether the character at the specified index is a boundary.</summary>");
-                writer.WriteLine($"            [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]");
-                writer.WriteLine($"            private static bool IsECMABoundary(global::System.ReadOnlySpan<char> inputSpan, int index)");
-                writer.WriteLine($"            {{");
-                writer.WriteLine($"                int indexM1 = index - 1;");
-                writer.WriteLine($"                return ((uint)indexM1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexM1])) !=");
-                writer.WriteLine($"                       ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));");
-                writer.WriteLine();
-                writer.WriteLine($"                static bool IsECMAWordChar(char ch) =>");
-                writer.WriteLine($"                    ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter");
-                writer.WriteLine($"                    (((uint)ch - '0') < 10) || // digit");
-                writer.WriteLine($"                    ch == '_' || // underscore");
-                writer.WriteLine($"                    ch == '\\u0130'; // latin capital letter I with dot above");
-                writer.WriteLine($"            }}");
-            }
-
-            writer.WriteLine($"        }}");
-            writer.WriteLine($"    }}");
-            writer.WriteLine("}");
-            return ImmutableArray<Diagnostic>.Empty;
+            writer.WriteLine(runnerFactoryImplementation);
+            writer.WriteLine($"}}");
 
             static void AppendHashtableContents(IndentedTextWriter writer, Hashtable ht)
             {
@@ -299,12 +144,161 @@ namespace System.Text.RegularExpressions.Generator
             }
         }
 
+        /// <summary>Emits the code for the RunnerFactory.  This is the actual logic for the regular expression.</summary>
+        private static void EmitRegexDerivedTypeRunnerFactory(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
+        {
+            AnalysisResults analysis = RegexTreeAnalyzer.Analyze(rm.Tree);
+
+            writer.WriteLine($"/// <summary>Provides a factory for creating <see cref=\"RegexRunner\"/> instances to be used by methods on <see cref=\"Regex\"/>.</summary>");
+            writer.WriteLine($"private sealed class RunnerFactory : RegexRunnerFactory");
+            writer.WriteLine($"{{");
+            writer.WriteLine($"    /// <summary>Creates an instance of a <see cref=\"RegexRunner\"/> used by methods on <see cref=\"Regex\"/>.</summary>");
+            writer.WriteLine($"    protected override RegexRunner CreateInstance() => new Runner();");
+            writer.WriteLine();
+            writer.WriteLine($"    /// <summary>Provides the runner that contains the custom logic implementing the specified regular expression.</summary>");
+            writer.WriteLine($"    private sealed class Runner : RegexRunner");
+            writer.WriteLine($"    {{");
+
+            // Main implementation methods
+            writer.WriteLine($"        // Description:");
+            DescribeExpression(writer, rm.Tree.Root.Child(0), "        // ", analysis); // skip implicit root capture
+            writer.WriteLine();
+
+            writer.WriteLine($"        /// <summary>Scan the <paramref name=\"inputSpan\"/> starting from base.runtextstart for the next match.</summary>");
+            writer.WriteLine($"        /// <param name=\"inputSpan\">The text being scanned by the regular expression.</param>");
+            writer.WriteLine($"        protected override void Scan(ReadOnlySpan<char> inputSpan)");
+            writer.WriteLine($"        {{");
+            writer.Indent += 3;
+            EmitScan(writer, rm);
+            writer.Indent -= 3;
+            writer.WriteLine($"        }}");
+            writer.WriteLine();
+
+            writer.WriteLine($"        /// <summary>Search <paramref name=\"inputSpan\"/> starting from base.runtextpos for the next location a match could possibly start.</summary>");
+            writer.WriteLine($"        /// <param name=\"inputSpan\">The text being scanned by the regular expression.</param>");
+            writer.WriteLine($"        /// <returns>true if a possible match was found; false if no more matches are possible.</returns>");
+            writer.WriteLine($"        private bool TryFindNextPossibleStartingPosition(ReadOnlySpan<char> inputSpan)");
+            writer.WriteLine($"        {{");
+            writer.Indent += 3;
+            EmitTryFindNextPossibleStartingPosition(writer, rm, requiredHelpers);
+            writer.Indent -= 3;
+            writer.WriteLine($"        }}");
+            writer.WriteLine();
+            writer.WriteLine($"        /// <summary>Determine whether <paramref name=\"inputSpan\"/> at base.runtextpos is a match for the regular expression.</summary>");
+            writer.WriteLine($"        /// <param name=\"inputSpan\">The text being scanned by the regular expression.</param>");
+            writer.WriteLine($"        /// <returns>true if the regular expression matches at the current position; otherwise, false.</returns>");
+            writer.WriteLine($"        private bool TryMatchAtCurrentPosition(ReadOnlySpan<char> inputSpan)");
+            writer.WriteLine($"        {{");
+            writer.Indent += 3;
+            EmitTryMatchAtCurrentPosition(writer, rm, analysis, requiredHelpers);
+            writer.Indent -= 3;
+            writer.WriteLine($"        }}");
+            writer.WriteLine($"    }}");
+            writer.WriteLine($"}}");
+        }
+
+        /// <summary>Gets a C# expression representing the specified timeout value.</summary>
+        private static string GetTimeoutExpression(int matchTimeout) =>
+            matchTimeout == Timeout.Infinite ?
+                "Timeout.InfiniteTimeSpan" :
+                $"TimeSpan.FromMilliseconds({matchTimeout.ToString(CultureInfo.InvariantCulture)})";
+
+        /// <summary>Adds the IsWordChar helper to the required helpers collection.</summary>
+        private static void AddIsWordCharHelper(Dictionary<string, string[]> requiredHelpers)
+        {
+            const string IsWordChar = nameof(IsWordChar);
+            if (!requiredHelpers.ContainsKey(IsWordChar))
+            {
+                requiredHelpers.Add(IsWordChar, new string[]
+                {
+                    "/// <summary>Determines whether the character is part of the [\\w] set.</summary>",
+                    "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+                    "internal static bool IsWordChar(char ch)",
+                    "{",
+                    "    // Bitmap for whether each character 0 through 127 is in [\\w]",
+                    "    ReadOnlySpan<byte> ascii = new byte[]",
+                    "    {",
+                    "        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,",
+                    "        0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07",
+                    "    };",
+                    "",
+                    "    // If the char is ASCII, look it up in the bitmap. Otherwise, query its Unicode category.",
+                    "    int chDiv8 = ch >> 3;",
+                    "    return (uint)chDiv8 < (uint)ascii.Length ?",
+                    "        (ascii[chDiv8] & (1 << (ch & 0x7))) != 0 :",
+                    "        CharUnicodeInfo.GetUnicodeCategory(ch) switch",
+                    "        {",
+                    "            UnicodeCategory.UppercaseLetter or",
+                    "            UnicodeCategory.LowercaseLetter or",
+                    "            UnicodeCategory.TitlecaseLetter or",
+                    "            UnicodeCategory.ModifierLetter or",
+                    "            UnicodeCategory.OtherLetter or",
+                    "            UnicodeCategory.NonSpacingMark or",
+                    "            UnicodeCategory.DecimalDigitNumber or",
+                    "            UnicodeCategory.ConnectorPunctuation => true,",
+                    "            _ => false,",
+                    "        };",
+                    "}",
+                });
+            }
+        }
+
+        /// <summary>Adds the IsBoundary helper to the required helpers collection.</summary>
+        private static void AddIsBoundaryHelper(Dictionary<string, string[]> requiredHelpers)
+        {
+            const string IsBoundary = nameof(IsBoundary);
+            if (!requiredHelpers.ContainsKey(IsBoundary))
+            {
+                requiredHelpers.Add(IsBoundary, new string[]
+                {
+                    "/// <summary>Determines whether the specified index is a boundary.</summary>",
+                    "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+                    "internal static bool IsBoundary(ReadOnlySpan<char> inputSpan, int index)",
+                    "{",
+                    "    int indexMinus1 = index - 1;",
+                    "    return ((uint)indexMinus1 < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[indexMinus1])) !=",
+                    "           ((uint)index < (uint)inputSpan.Length && IsBoundaryWordChar(inputSpan[index]));",
+                    "",
+                    "    static bool IsBoundaryWordChar(char ch) => IsWordChar(ch) || (ch == '\\u200C' | ch == '\\u200D');",
+                    "}",
+                });
+
+                AddIsWordCharHelper(requiredHelpers);
+            }
+        }
+
+        /// <summary>Adds the IsECMABoundary helper to the required helpers collection.</summary>
+        private static void AddIsECMABoundaryHelper(Dictionary<string, string[]> requiredHelpers)
+        {
+            const string IsECMABoundary = nameof(IsECMABoundary);
+            if (!requiredHelpers.ContainsKey(IsECMABoundary))
+            {
+                requiredHelpers.Add(IsECMABoundary, new string[]
+                {
+                    "/// <summary>Determines whether the specified index is a boundary (ECMAScript).</summary>",
+                    "[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+                    "internal static bool IsECMABoundary(ReadOnlySpan<char> inputSpan, int index)",
+                    "{",
+                    "    int indexMinus1 = index - 1;",
+                    "    return ((uint)indexMinus1 < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[indexMinus1])) !=",
+                    "           ((uint)index < (uint)inputSpan.Length && IsECMAWordChar(inputSpan[index]));",
+                    "",
+                    "    static bool IsECMAWordChar(char ch) =>",
+                    "        ((((uint)ch - 'A') & ~0x20) < 26) || // ASCII letter",
+                    "        (((uint)ch - '0') < 10) || // digit",
+                    "        ch == '_' || // underscore",
+                    "        ch == '\\u0130'; // latin capital letter I with dot above",
+                    "}",
+                });
+            }
+        }
+
         /// <summary>Emits the body of the Scan method override.</summary>
-        private static void EmitScan(IndentedTextWriter writer, RegexMethod rm, string id)
+        private static void EmitScan(IndentedTextWriter writer, RegexMethod rm)
         {
             bool rtl = (rm.Options & RegexOptions.RightToLeft) != 0;
 
-            using (EmitBlock(writer, "while (TryFindNextPossibleStartingPosition(text))"))
+            using (EmitBlock(writer, "while (TryFindNextPossibleStartingPosition(inputSpan))"))
             {
                 if (rm.MatchTimeout != Timeout.Infinite)
                 {
@@ -312,8 +306,8 @@ namespace System.Text.RegularExpressions.Generator
                     writer.WriteLine();
                 }
 
-                writer.WriteLine("// If we find a match on the current position, or we have reached the end of the input, we are done.");
-                using (EmitBlock(writer, $"if (TryMatchAtCurrentPosition(text) || base.runtextpos == {(!rtl ? "text.Length" : "0")})"))
+                writer.WriteLine("// If we find a match at the current position, or we have reached the end of the input, we are done.");
+                using (EmitBlock(writer, $"if (TryMatchAtCurrentPosition(inputSpan) || base.runtextpos == {(!rtl ? "inputSpan.Length" : "0")})"))
                 {
                     writer.WriteLine("return;");
                 }
@@ -324,12 +318,11 @@ namespace System.Text.RegularExpressions.Generator
         }
 
         /// <summary>Emits the body of the TryFindNextPossibleStartingPosition.</summary>
-        private static RequiredHelperFunctions EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, string id)
+        private static void EmitTryFindNextPossibleStartingPosition(IndentedTextWriter writer, RegexMethod rm, Dictionary<string, string[]> requiredHelpers)
         {
             RegexOptions options = (RegexOptions)rm.Options;
             RegexTree regexTree = rm.Tree;
             bool hasTextInfo = false;
-            RequiredHelperFunctions requiredHelpers = RequiredHelperFunctions.None;
             bool rtl = (options & RegexOptions.RightToLeft) != 0;
 
             // In some cases, we need to emit declarations at the beginning of the method, but we only discover we need them later.
@@ -422,7 +415,7 @@ namespace System.Text.RegularExpressions.Generator
 
             // We're done.  Patch up any additional declarations.
             ReplaceAdditionalDeclarations(writer, additionalDeclarations, additionalDeclarationsPosition, additionalDeclarationsIndent);
-            return requiredHelpers;
+            return;
 
             // Emit a goto for the specified label.
             void Goto(string label) => writer.WriteLine($"goto {label};");
@@ -545,7 +538,7 @@ namespace System.Text.RegularExpressions.Generator
                         writer.WriteLine("// Beginning-of-line anchor");
                         using (EmitBlock(writer, "if (pos > 0 && inputSpan[pos - 1] != '\\n')"))
                         {
-                            writer.WriteLine("int newlinePos = global::System.MemoryExtensions.IndexOf(inputSpan.Slice(pos), '\\n');");
+                            writer.WriteLine("int newlinePos = inputSpan.Slice(pos).IndexOf('\\n');");
                             using (EmitBlock(writer, "if ((uint)newlinePos > inputSpan.Length - pos - 1)"))
                             {
                                 Goto(NoStartingPositionFound);
@@ -594,7 +587,7 @@ namespace System.Text.RegularExpressions.Generator
             // Emits a case-sensitive prefix search for a string at the beginning of the pattern.
             void EmitIndexOf_LeftToRight(string prefix)
             {
-                writer.WriteLine($"int i = global::System.MemoryExtensions.IndexOf(inputSpan.Slice(pos), {Literal(prefix)});");
+                writer.WriteLine($"int i = inputSpan.Slice(pos).IndexOf({Literal(prefix)});");
                 writer.WriteLine("if (i >= 0)");
                 writer.WriteLine("{");
                 writer.WriteLine("    base.runtextpos = pos + i;");
@@ -605,7 +598,7 @@ namespace System.Text.RegularExpressions.Generator
             // Emits a case-sensitive right-to-left prefix search for a string at the beginning of the pattern.
             void EmitIndexOf_RightToLeft(string prefix)
             {
-                writer.WriteLine($"pos = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice(0, pos), {Literal(prefix)});");
+                writer.WriteLine($"pos = inputSpan.Slice(0, pos).LastIndexOf({Literal(prefix)});");
                 writer.WriteLine("if (pos >= 0)");
                 writer.WriteLine("{");
                 writer.WriteLine($"    base.runtextpos = pos + {prefix.Length};");
@@ -631,7 +624,7 @@ namespace System.Text.RegularExpressions.Generator
                 FinishEmitScope loopBlock = default;
                 if (needLoop)
                 {
-                    writer.WriteLine("global::System.ReadOnlySpan<char> span = inputSpan.Slice(pos);");
+                    writer.WriteLine("ReadOnlySpan<char> span = inputSpan.Slice(pos);");
                     string upperBound = "span.Length" + (setsToUse > 1 || primarySet.Distance != 0 ? $" - {minRequiredLength - 1}" : "");
                     loopBlock = EmitBlock(writer, $"for (int i = 0; i < {upperBound}; i++)");
                 }
@@ -652,10 +645,10 @@ namespace System.Text.RegularExpressions.Generator
 
                     string indexOf = primarySet.Chars!.Length switch
                     {
-                        1 => $"global::System.MemoryExtensions.IndexOf({span}, {Literal(primarySet.Chars[0])})",
-                        2 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])})",
-                        3 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
-                        _ => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(new string(primarySet.Chars))})",
+                        1 => $"{span}.IndexOf({Literal(primarySet.Chars[0])})",
+                        2 => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])})",
+                        3 => $"{span}.IndexOfAny({Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
+                        _ => $"{span}.IndexOfAny({Literal(new string(primarySet.Chars))})",
                     };
 
                     if (needLoop)
@@ -703,7 +696,7 @@ namespace System.Text.RegularExpressions.Generator
                         for (; setIndex < setsToUse; setIndex++)
                         {
                             string spanIndex = $"span[i{(sets[setIndex].Distance > 0 ? $" + {sets[setIndex].Distance}" : "")}]";
-                            string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, sets[setIndex].Set, sets[setIndex].CaseInsensitive, negate: false, additionalDeclarations, ref requiredHelpers);
+                            string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, sets[setIndex].Set, sets[setIndex].CaseInsensitive, negate: false, additionalDeclarations, requiredHelpers);
 
                             if (setIndex == start)
                             {
@@ -738,7 +731,7 @@ namespace System.Text.RegularExpressions.Generator
 
                 if (set.Chars is { Length: 1 } && !set.CaseInsensitive)
                 {
-                    writer.WriteLine($"pos = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice(0, pos), {Literal(set.Chars[0])});");
+                    writer.WriteLine($"pos = inputSpan.Slice(0, pos).LastIndexOf({Literal(set.Chars[0])});");
                     writer.WriteLine("if (pos >= 0)");
                     writer.WriteLine("{");
                     writer.WriteLine("    base.runtextpos = pos + 1;");
@@ -749,7 +742,7 @@ namespace System.Text.RegularExpressions.Generator
                 {
                     using (EmitBlock(writer, "while ((uint)--pos < (uint)inputSpan.Length)"))
                     {
-                        using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "inputSpan[pos]", set.Set, set.CaseInsensitive, negate: false, additionalDeclarations, ref requiredHelpers)})"))
+                        using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "inputSpan[pos]", set.Set, set.CaseInsensitive, negate: false, additionalDeclarations, requiredHelpers)})"))
                         {
                             writer.WriteLine("base.runtextpos = pos + 1;");
                             writer.WriteLine("return true;");
@@ -769,19 +762,19 @@ namespace System.Text.RegularExpressions.Generator
 
                 using (EmitBlock(writer, "while (true)"))
                 {
-                    writer.WriteLine($"global::System.ReadOnlySpan<char> slice = inputSpan.Slice(pos);");
+                    writer.WriteLine($"ReadOnlySpan<char> slice = inputSpan.Slice(pos);");
                     writer.WriteLine();
 
                     // Find the literal.  If we can't find it, we're done searching.
-                    writer.Write("int i = global::System.MemoryExtensions.");
+                    writer.Write("int i = slice.");
                     writer.WriteLine(
-                        target.Literal.String is string literalString ? $"IndexOf(slice, {Literal(literalString)});" :
-                        target.Literal.Chars is not char[] literalChars ? $"IndexOf(slice, {Literal(target.Literal.Char)});" :
+                        target.Literal.String is string literalString ? $"IndexOf({Literal(literalString)});" :
+                        target.Literal.Chars is not char[] literalChars ? $"IndexOf({Literal(target.Literal.Char)});" :
                         literalChars.Length switch
                         {
-                            2 => $"IndexOfAny(slice, {Literal(literalChars[0])}, {Literal(literalChars[1])});",
-                            3 => $"IndexOfAny(slice, {Literal(literalChars[0])}, {Literal(literalChars[1])}, {Literal(literalChars[2])});",
-                            _ => $"IndexOfAny(slice, {Literal(new string(literalChars))});",
+                            2 => $"IndexOfAny({Literal(literalChars[0])}, {Literal(literalChars[1])});",
+                            3 => $"IndexOfAny({Literal(literalChars[0])}, {Literal(literalChars[1])}, {Literal(literalChars[2])});",
+                            _ => $"IndexOfAny({Literal(new string(literalChars))});",
                         });
                     using (EmitBlock(writer, $"if (i < 0)"))
                     {
@@ -791,7 +784,7 @@ namespace System.Text.RegularExpressions.Generator
 
                     // We found the literal.  Walk backwards from it finding as many matches as we can against the loop.
                     writer.WriteLine("int prev = i;");
-                    writer.WriteLine($"while ((uint)--prev < (uint)slice.Length && {MatchCharacterClass(hasTextInfo, options, "slice[prev]", target.LoopNode.Str!, caseInsensitive: false, negate: false, additionalDeclarations, ref requiredHelpers)});");
+                    writer.WriteLine($"while ((uint)--prev < (uint)slice.Length && {MatchCharacterClass(hasTextInfo, options, "slice[prev]", target.LoopNode.Str!, caseInsensitive: false, negate: false, additionalDeclarations, requiredHelpers)});");
 
                     if (target.LoopNode.M > 0)
                     {
@@ -833,14 +826,14 @@ namespace System.Text.RegularExpressions.Generator
                     if (needsCulture)
                     {
                         hasTextInfo = true;
-                        writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
+                        writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;");
                     }
                 }
             }
         }
 
         /// <summary>Emits the body of the TryMatchAtCurrentPosition.</summary>
-        private static RequiredHelperFunctions EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, string id, AnalysisResults analysis)
+        private static void EmitTryMatchAtCurrentPosition(IndentedTextWriter writer, RegexMethod rm, AnalysisResults analysis, Dictionary<string, string[]> requiredHelpers)
         {
             // In .NET Framework and up through .NET Core 3.1, the code generated for RegexOptions.Compiled was effectively an unrolled
             // version of what RegexInterpreter would process.  The RegexNode tree would be turned into a series of opcodes via
@@ -868,7 +861,6 @@ namespace System.Text.RegularExpressions.Generator
 
             RegexOptions options = (RegexOptions)rm.Options;
             RegexTree regexTree = rm.Tree;
-            RequiredHelperFunctions requiredHelpers = RequiredHelperFunctions.None;
 
             // Helper to define names.  Names start unadorned, but as soon as there's repetition,
             // they begin to have a numbered suffix.
@@ -893,7 +885,7 @@ namespace System.Text.RegularExpressions.Generator
                     writer.WriteLine("base.Capture(0, start, end);");
                     writer.WriteLine("base.runtextpos = end;");
                     writer.WriteLine("return true;");
-                    return requiredHelpers;
+                    return;
 
                 case RegexNodeKind.Empty:
                     // This case isn't common in production, but it's very common when first getting started with the
@@ -901,7 +893,7 @@ namespace System.Text.RegularExpressions.Generator
                     // it from a learning perspective, this is very common, as it's the empty string you start with.
                     writer.WriteLine("base.Capture(0, base.runtextpos, base.runtextpos);");
                     writer.WriteLine("return true;");
-                    return requiredHelpers;
+                    return;
             }
 
             // In some cases, we need to emit declarations at the beginning of the method, but we only discover we need them later.
@@ -975,7 +967,7 @@ namespace System.Text.RegularExpressions.Generator
                 }
             }
 
-            return requiredHelpers;
+            return;
 
             // Helper to create a name guaranteed to be unique within the function.
             string ReserveName(string prefix)
@@ -1031,7 +1023,7 @@ namespace System.Text.RegularExpressions.Generator
             {
                 if (defineLocal)
                 {
-                    writer.Write("global::System.ReadOnlySpan<char> ");
+                    writer.Write("ReadOnlySpan<char> ");
                 }
                 writer.WriteLine($"{sliceSpan} = inputSpan.Slice(pos);");
             }
@@ -1489,7 +1481,7 @@ namespace System.Text.RegularExpressions.Generator
                             // If we're case-sensitive, we can simply validate that the remaining length of the slice is sufficient
                             // to possibly match, and then do a SequenceEqual against the matched text.
                             writer.WriteLine($"if ({sliceSpan}.Length < matchLength || ");
-                            using (EmitBlock(writer, $"    !global::System.MemoryExtensions.SequenceEqual(inputSpan.Slice(base.MatchIndex({capnum}), matchLength), {sliceSpan}.Slice(0, matchLength)))"))
+                            using (EmitBlock(writer, $"    !inputSpan.Slice(base.MatchIndex({capnum}), matchLength).SequenceEqual({sliceSpan}.Slice(0, matchLength)))"))
                             {
                                 Goto(doneLabel);
                             }
@@ -2378,7 +2370,7 @@ namespace System.Text.RegularExpressions.Generator
 
                 if (node.IsSetFamily)
                 {
-                    expr = $"{MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: true, additionalDeclarations, ref requiredHelpers)}";
+                    expr = $"{MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: true, additionalDeclarations, requiredHelpers)}";
                 }
                 else
                 {
@@ -2416,24 +2408,23 @@ namespace System.Text.RegularExpressions.Generator
             // Emits the code to handle a boundary check on a character.
             void EmitBoundary(RegexNode node)
             {
-                Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected type: {node.Kind}");
+                Debug.Assert(node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary or RegexNodeKind.ECMABoundary or RegexNodeKind.NonECMABoundary, $"Unexpected kind: {node.Kind}");
 
-                string call = node.Kind switch
+                string call;
+                if (node.Kind is RegexNodeKind.Boundary or RegexNodeKind.NonBoundary)
                 {
-                    RegexNodeKind.Boundary => "!IsBoundary",
-                    RegexNodeKind.NonBoundary => "IsBoundary",
-                    RegexNodeKind.ECMABoundary => "!IsECMABoundary",
-                    _ => "IsECMABoundary",
-                };
-
-                RequiredHelperFunctions boundaryFunctionRequired = node.Kind switch
+                    call = node.Kind is RegexNodeKind.Boundary ?
+                        $"!{HelpersTypeName}.IsBoundary" :
+                        $"{HelpersTypeName}.IsBoundary";
+                    AddIsBoundaryHelper(requiredHelpers);
+                }
+                else
                 {
-                    RegexNodeKind.Boundary or
-                    RegexNodeKind.NonBoundary => RequiredHelperFunctions.IsBoundary | RequiredHelperFunctions.IsWordChar, // IsBoundary internally uses IsWordChar
-                    _ => RequiredHelperFunctions.IsECMABoundary
-                };
-
-                requiredHelpers |= boundaryFunctionRequired;
+                    call = node.Kind is RegexNodeKind.ECMABoundary ?
+                        $"!{HelpersTypeName}.IsECMABoundary" :
+                        $"{HelpersTypeName}.IsECMABoundary";
+                    AddIsECMABoundaryHelper(requiredHelpers);
+                }
 
                 using (EmitBlock(writer, $"if ({call}(inputSpan, pos{(sliceStaticPos > 0 ? $" + {sliceStaticPos}" : "")}))"))
                 {
@@ -2561,7 +2552,7 @@ namespace System.Text.RegularExpressions.Generator
                 else
                 {
                     string sourceSpan = sliceStaticPos > 0 ? $"{sliceSpan}.Slice({sliceStaticPos})" : sliceSpan;
-                    string clause = $"!global::System.MemoryExtensions.StartsWith({sourceSpan}, {Literal(str)})";
+                    string clause = $"!{sourceSpan}.StartsWith({Literal(str)})";
                     if (clauseOnly)
                     {
                         writer.Write(clause);
@@ -2643,13 +2634,13 @@ namespace System.Text.RegularExpressions.Generator
                 {
                     writer.WriteLine($"if ({startingPos} >= {endingPos} ||");
                     using (EmitBlock(writer,
-                        literal.Item2 is not null ? $"    ({endingPos} = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice({startingPos}, global::System.Math.Min(inputSpan.Length, {endingPos} + {literal.Item2.Length - 1}) - {startingPos}), {Literal(literal.Item2)})) < 0)" :
-                        literal.Item3 is null ? $"    ({endingPos} = global::System.MemoryExtensions.LastIndexOf(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item1)})) < 0)" :
+                        literal.Item2 is not null ? $"    ({endingPos} = inputSpan.Slice({startingPos}, Math.Min(inputSpan.Length, {endingPos} + {literal.Item2.Length - 1}) - {startingPos}).LastIndexOf({Literal(literal.Item2)})) < 0)" :
+                        literal.Item3 is null ? $"    ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOf({Literal(literal.Item1)})) < 0)" :
                         literal.Item3.Length switch
                         {
-                            2 => $"    ({endingPos} = global::System.MemoryExtensions.LastIndexOfAny(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])})) < 0)",
-                            3 => $"    ({endingPos} = global::System.MemoryExtensions.LastIndexOfAny(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])}, {Literal(literal.Item3[2])})) < 0)",
-                            _ => $"    ({endingPos} = global::System.MemoryExtensions.LastIndexOfAny(inputSpan.Slice({startingPos}, {endingPos} - {startingPos}), {Literal(literal.Item3)})) < 0)",
+                            2 => $"    ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOfAny({Literal(literal.Item3[0])}, {Literal(literal.Item3[1])})) < 0)",
+                            3 => $"    ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOfAny({Literal(literal.Item3[0])}, {Literal(literal.Item3[1])}, {Literal(literal.Item3[2])})) < 0)",
+                            _ => $"    ({endingPos} = inputSpan.Slice({startingPos}, {endingPos} - {startingPos}).LastIndexOfAny({Literal(literal.Item3)})) < 0)",
                         }))
                     {
                         Goto(doneLabel);
@@ -2789,12 +2780,12 @@ namespace System.Text.RegularExpressions.Generator
                         // We can implement it to search for either that char or the literal, whichever comes first.
                         // If it ends up being that node.Ch, the loop fails (we're only here if we're backtracking).
                         writer.WriteLine(
-                            literal.Item2 is not null ? $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch)}, {Literal(literal.Item2[0])});" :
-                            literal.Item3 is null ? $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch)}, {Literal(literal.Item1)});" :
+                            literal.Item2 is not null ? $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch)}, {Literal(literal.Item2[0])});" :
+                            literal.Item3 is null ? $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch)}, {Literal(literal.Item1)});" :
                             literal.Item3.Length switch
                             {
-                                2 => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch)}, {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])});",
-                                _ => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(node.Ch + literal.Item3)});",
+                                2 => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch)}, {Literal(literal.Item3[0])}, {Literal(literal.Item3[1])});",
+                                _ => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(node.Ch + literal.Item3)});",
                             });
                         using (EmitBlock(writer, $"if ((uint){startingPos} >= (uint){sliceSpan}.Length || {sliceSpan}[{startingPos}] == {Literal(node.Ch)})"))
                         {
@@ -2812,13 +2803,13 @@ namespace System.Text.RegularExpressions.Generator
                         // This lazy loop will consume all characters until the subsequent literal. If the subsequent literal
                         // isn't found, the loop fails. We can implement it to just search for that literal.
                         writer.WriteLine(
-                            literal2.Item2 is not null ? $"{startingPos} = global::System.MemoryExtensions.IndexOf({sliceSpan}, {Literal(literal2.Item2)});" :
-                            literal2.Item3 is null ? $"{startingPos} = global::System.MemoryExtensions.IndexOf({sliceSpan}, {Literal(literal2.Item1)});" :
+                            literal2.Item2 is not null ? $"{startingPos} = {sliceSpan}.IndexOf({Literal(literal2.Item2)});" :
+                            literal2.Item3 is null ? $"{startingPos} = {sliceSpan}.IndexOf({Literal(literal2.Item1)});" :
                             literal2.Item3.Length switch
                             {
-                                2 => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])});",
-                                3 => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])}, {Literal(literal2.Item3[2])});",
-                                _ => $"{startingPos} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}, {Literal(literal2.Item3)});",
+                                2 => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])});",
+                                3 => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(literal2.Item3[0])}, {Literal(literal2.Item3[1])}, {Literal(literal2.Item3[2])});",
+                                _ => $"{startingPos} = {sliceSpan}.IndexOfAny({Literal(literal2.Item3)});",
                             });
                         using (EmitBlock(writer, $"if ({startingPos} < 0)"))
                         {
@@ -3146,7 +3137,7 @@ namespace System.Text.RegularExpressions.Generator
                     }
 
                     string repeaterSpan = "repeaterSlice"; // As this repeater doesn't wrap arbitrary node emits, this shouldn't conflict with anything
-                    writer.WriteLine($"global::System.ReadOnlySpan<char> {repeaterSpan} = {sliceSpan}.Slice({sliceStaticPos}, {iterations});");
+                    writer.WriteLine($"ReadOnlySpan<char> {repeaterSpan} = {sliceSpan}.Slice({sliceStaticPos}, {iterations});");
                     using (EmitBlock(writer, $"for (int i = 0; i < {repeaterSpan}.Length; i++)"))
                     {
                         EmitTimeoutCheck(writer, hasTimeout);
@@ -3198,7 +3189,7 @@ namespace System.Text.RegularExpressions.Generator
                     string expr = $"inputSpan[pos - {iterationLocal} - 1]";
                     if (node.IsSetFamily)
                     {
-                        expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, ref requiredHelpers);
+                        expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, requiredHelpers);
                     }
                     else
                     {
@@ -3226,12 +3217,12 @@ namespace System.Text.RegularExpressions.Generator
                     // restriction is purely for simplicity; it could be removed in the future with additional code to
                     // handle the unbounded case.
 
-                    writer.Write($"int {iterationLocal} = global::System.MemoryExtensions.IndexOf({sliceSpan}");
+                    writer.Write($"int {iterationLocal} = {sliceSpan}");
                     if (sliceStaticPos > 0)
                     {
                         writer.Write($".Slice({sliceStaticPos})");
                     }
-                    writer.WriteLine($"{Literal(node.Ch)});");
+                    writer.WriteLine($".IndexOf({Literal(node.Ch)});");
                     
                     using (EmitBlock(writer, $"if ({iterationLocal} < 0)"))
                     {
@@ -3252,16 +3243,16 @@ namespace System.Text.RegularExpressions.Generator
                     // As with the notoneloopatomic above, the unbounded constraint is purely for simplicity.
                     Debug.Assert(numSetChars > 1);
 
-                    writer.Write($"int {iterationLocal} = global::System.MemoryExtensions.IndexOfAny({sliceSpan}");
+                    writer.Write($"int {iterationLocal} = {sliceSpan}");
                     if (sliceStaticPos != 0)
                     {
                         writer.Write($".Slice({sliceStaticPos})");
                     }
                     writer.WriteLine(numSetChars switch
                     {
-                        2 => $"{Literal(setChars[0])}, {Literal(setChars[1])});",
-                        3 => $"{Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});",
-                        _ => $"{Literal(setChars.Slice(0, numSetChars).ToString())});",
+                        2 => $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])});",
+                        3 => $".IndexOfAny({Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});",
+                        _ => $".IndexOfAny({Literal(setChars.Slice(0, numSetChars).ToString())});",
                     });
                     using (EmitBlock(writer, $"if ({iterationLocal} < 0)"))
                     {
@@ -3286,7 +3277,7 @@ namespace System.Text.RegularExpressions.Generator
                     string expr = $"{sliceSpan}[{iterationLocal}]";
                     if (node.IsSetFamily)
                     {
-                        expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, ref requiredHelpers);
+                        expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, requiredHelpers);
                     }
                     else
                     {
@@ -3356,7 +3347,7 @@ namespace System.Text.RegularExpressions.Generator
 
                 if (node.IsSetFamily)
                 {
-                    expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, ref requiredHelpers);
+                    expr = MatchCharacterClass(hasTextInfo, options, expr, node.Str!, IsCaseInsensitive(node), negate: false, additionalDeclarations, requiredHelpers);
                 }
                 else
                 {
@@ -3581,41 +3572,41 @@ namespace System.Text.RegularExpressions.Generator
             // Emits code to unwind the capture stack until the crawl position specified in the provided local.
             void EmitUncaptureUntil(string capturepos)
             {
-                string name = "UncaptureUntil";
+                const string UncaptureUntil = nameof(UncaptureUntil);
 
-                if (!additionalLocalFunctions.ContainsKey(name))
+                if (!additionalLocalFunctions.ContainsKey(UncaptureUntil))
                 {
-                    var lines = new string[9];
-                    lines[0] = "// <summary>Undo captures until we reach the specified capture position.</summary>";
-                    lines[1] = "[global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]";
-                    lines[2] = $"void {name}(int capturepos)";
-                    lines[3] = "{";
-                    lines[4] = "    while (base.Crawlpos() > capturepos)";
-                    lines[5] = "    {";
-                    lines[6] = "        base.Uncapture();";
-                    lines[7] = "    }";
-                    lines[8] = "}";
-
-                    additionalLocalFunctions.Add(name, lines);
+                    additionalLocalFunctions.Add(UncaptureUntil, new string[]
+                    {
+                        $"// <summary>Undo captures until it reaches the specified capture position.</summary>",
+                        $"[MethodImpl(MethodImplOptions.AggressiveInlining)]",
+                        $"void {UncaptureUntil}(int capturePosition)",
+                        $"{{",
+                        $"    while (base.Crawlpos() > capturePosition)",
+                        $"    {{",
+                        $"        base.Uncapture();",
+                        $"    }}",
+                        $"}}",
+                    });
                 }
 
-                writer.WriteLine($"{name}({capturepos});");
+                writer.WriteLine($"{UncaptureUntil}({capturepos});");
             }
 
             /// <summary>Pushes values on to the backtracking stack.</summary>
             void EmitStackPush(params string[] args)
             {
                 Debug.Assert(args.Length is >= 1);
-                string function = $"StackPush{args.Length}";
+                string methodName = $"StackPush{args.Length}";
 
                 additionalDeclarations.Add("int stackpos = 0;");
 
-                if (!additionalLocalFunctions.ContainsKey(function))
+                if (!requiredHelpers.ContainsKey(methodName))
                 {
                     var lines = new string[24 + args.Length];
-                    lines[0] = $"// <summary>Push {args.Length} value{(args.Length == 1 ? "" : "s")} onto the backtracking stack.</summary>";
-                    lines[1] = $"[global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]";
-                    lines[2] = $"static void {function}(ref int[] stack, ref int pos{FormatN(", int arg{0}", args.Length)})";
+                    lines[0] = $"// <summary>Pushes {args.Length} value{(args.Length == 1 ? "" : "s")} onto the backtracking stack.</summary>";
+                    lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]";
+                    lines[2] = $"internal static void {methodName}(ref int[] stack, ref int pos{FormatN(", int arg{0}", args.Length)})";
                     lines[3] = $"{{";
                     lines[4] = $"    // If there's space available for {(args.Length > 1 ? $"all {args.Length} values, store them" : "the value, store it")}.";
                     lines[5] = $"    int[] s = stack;";
@@ -3634,18 +3625,18 @@ namespace System.Text.RegularExpressions.Generator
                     lines[14 + args.Length] = $"    WithResize(ref stack, ref pos{FormatN(", arg{0}", args.Length)});";
                     lines[15 + args.Length] = $"";
                     lines[16 + args.Length] = $"    // <summary>Resize the backtracking stack array and push {args.Length} value{(args.Length == 1 ? "" : "s")} onto the stack.</summary>";
-                    lines[17 + args.Length] = $"    [global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.NoInlining)]";
+                    lines[17 + args.Length] = $"    [MethodImpl(MethodImplOptions.NoInlining)]";
                     lines[18 + args.Length] = $"    static void WithResize(ref int[] stack, ref int pos{FormatN(", int arg{0}", args.Length)})";
                     lines[19 + args.Length] = $"    {{";
-                    lines[20 + args.Length] = $"        global::System.Array.Resize(ref stack, (pos + {args.Length - 1}) * 2);";
-                    lines[21 + args.Length] = $"        {function}(ref stack, ref pos{FormatN(", arg{0}", args.Length)});";
+                    lines[20 + args.Length] = $"        Array.Resize(ref stack, (pos + {args.Length - 1}) * 2);";
+                    lines[21 + args.Length] = $"        {methodName}(ref stack, ref pos{FormatN(", arg{0}", args.Length)});";
                     lines[22 + args.Length] = $"    }}";
                     lines[23 + args.Length] = $"}}";
 
-                    additionalLocalFunctions.Add(function, lines);
+                    requiredHelpers.Add(methodName, lines);
                 }
 
-                writer.WriteLine($"{function}(ref base.runstack!, ref stackpos, {string.Join(", ", args)});");
+                writer.WriteLine($"{HelpersTypeName}.{methodName}(ref base.runstack!, ref stackpos, {string.Join(", ", args)});");
             }
 
             /// <summary>Pops values from the backtracking stack into the specified locations.</summary>
@@ -3659,14 +3650,14 @@ namespace System.Text.RegularExpressions.Generator
                     return;
                 }
 
-                string function = $"StackPop{args.Length}";
+                string methodName = $"StackPop{args.Length}";
 
-                if (!additionalLocalFunctions.ContainsKey(function))
+                if (!requiredHelpers.ContainsKey(methodName))
                 {
                     var lines = new string[5 + args.Length];
-                    lines[0] = $"// <summary>Pop {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack.</summary>";
-                    lines[1] = $"[global::System.Runtime.CompilerServices.MethodImpl(global::System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)]";
-                    lines[2] = $"static void {function}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})";
+                    lines[0] = $"// <summary>Pops {args.Length} value{(args.Length == 1 ? "" : "s")} from the backtracking stack.</summary>";
+                    lines[1] = $"[MethodImpl(MethodImplOptions.AggressiveInlining)]";
+                    lines[2] = $"internal static void {methodName}(int[] stack, ref int pos{FormatN(", out int arg{0}", args.Length)})";
                     lines[3] = $"{{";
                     for (int i = 0; i < args.Length; i++)
                     {
@@ -3674,10 +3665,10 @@ namespace System.Text.RegularExpressions.Generator
                     }
                     lines[4 + args.Length] = $"}}";
 
-                    additionalLocalFunctions.Add(function, lines);
+                    requiredHelpers.Add(methodName, lines);
                 }
 
-                writer.WriteLine($"{function}(base.runstack, ref stackpos, out {string.Join(", out ", args)});");
+                writer.WriteLine($"{HelpersTypeName}.{methodName}(base.runstack, ref stackpos, out {string.Join(", out ", args)});");
             }
 
             /// <summary>Expression for popping the next item from the backtracking stack.</summary>
@@ -3721,7 +3712,7 @@ namespace System.Text.RegularExpressions.Generator
         {
             if (analysis.HasIgnoreCase && ((RegexOptions)rm.Options & RegexOptions.CultureInvariant) == 0)
             {
-                writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
+                writer.WriteLine("TextInfo textInfo = CultureInfo.CurrentCulture.TextInfo;");
                 return true;
             }
 
@@ -3734,7 +3725,7 @@ namespace System.Text.RegularExpressions.Generator
 
         private static string ToLowerIfNeeded(bool hasTextInfo, RegexOptions options, string expression, bool toLower) => toLower ? ToLower(hasTextInfo, options, expression) : expression;
 
-        private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options, string chExpr, string charClass, bool caseInsensitive, bool negate, HashSet<string> additionalDeclarations, ref RequiredHelperFunctions requiredHelpers)
+        private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options, string chExpr, string charClass, bool caseInsensitive, bool negate, HashSet<string> additionalDeclarations, Dictionary<string, string[]> requiredHelpers)
         {
             // We need to perform the equivalent of calling RegexRunner.CharInClass(ch, charClass),
             // but that call is relatively expensive.  Before we fall back to it, we try to optimize
@@ -3762,9 +3753,9 @@ namespace System.Text.RegularExpressions.Generator
 
                 case RegexCharClass.WordClass:
                 case RegexCharClass.NotWordClass:
-                    requiredHelpers |= RequiredHelperFunctions.IsWordChar;
+                    AddIsWordCharHelper(requiredHelpers);
                     negate ^= charClass == RegexCharClass.NotWordClass;
-                    return $"{(negate ? "!" : "")}IsWordChar({chExpr})";
+                    return $"{(negate ? "!" : "")}{HelpersTypeName}.IsWordChar({chExpr})";
             }
 
             // If we're meant to be doing a case-insensitive lookup, and if we're not using the invariant culture,
@@ -3798,7 +3789,7 @@ namespace System.Text.RegularExpressions.Generator
             if (!invariant && RegexCharClass.TryGetSingleUnicodeCategory(charClass, out UnicodeCategory category, out bool negated))
             {
                 negate ^= negated;
-                return $"(char.GetUnicodeCategory({chExpr}) {(negate ? "!=" : "==")} global::System.Globalization.UnicodeCategory.{category})";
+                return $"(char.GetUnicodeCategory({chExpr}) {(negate ? "!=" : "==")} UnicodeCategory.{category})";
             }
 
             // Next, if there's only 2 or 3 chars in the set (fairly common due to the sets we create for prefixes),
@@ -3849,8 +3840,8 @@ namespace System.Text.RegularExpressions.Generator
                     // extend the analysis to produce a known lower-bound and compare against
                     // that rather than always using 128 as the pivot point.)
                     return negate ?
-                        $"((ch = {chExpr}) < 128 || !global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
-                        $"((ch = {chExpr}) >= 128 && global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
+                        $"((ch = {chExpr}) < 128 || !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
+                        $"((ch = {chExpr}) >= 128 && RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
                 }
 
                 if (analysis.AllAsciiContained)
@@ -3859,8 +3850,8 @@ namespace System.Text.RegularExpressions.Generator
                     // if the class were the negated example from case 1 above:
                     // [^\p{IsGreek}\p{IsGreekExtended}].
                     return negate ?
-                        $"((ch = {chExpr}) >= 128 && !global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
-                        $"((ch = {chExpr}) < 128 || global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
+                        $"((ch = {chExpr}) >= 128 && !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))" :
+                        $"((ch = {chExpr}) < 128 || RegexRunner.CharInClass((char)ch, {Literal(charClass)}))";
                 }
             }
 
@@ -3922,10 +3913,10 @@ namespace System.Text.RegularExpressions.Generator
             // were [\w\d], so since ch >= 128, we need to fall back to calling CharInClass.
             return (negate, invariant) switch
             {
-                (false, false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
-                (true,  false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !global::System.Text.RegularExpressions.RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
-                (false, true)  => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : global::System.Text.RegularExpressions.RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
-                (true,  true)  => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !global::System.Text.RegularExpressions.RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
+                (false, false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
+                (true,  false) => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !RegexRunner.CharInClass((char)ch, {Literal(charClass)}))",
+                (false, true)  => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) != 0 : RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
+                (true,  true)  => $"((ch = {chExpr}) < 128 ? ({Literal(bitVectorString)}[ch >> 4] & (1 << (ch & 0xF))) == 0 : !RegexRunner.CharInClass(char.ToLowerInvariant((char)ch), {Literal(charClass)}))",
             };
         }
 
@@ -3970,7 +3961,7 @@ namespace System.Text.RegularExpressions.Generator
                 // The options were formatted as an int, which means the runtime couldn't
                 // produce a textual representation.  So just output casting the value as an int.
                 Debug.Fail("This shouldn't happen, as we should only get to the point of emitting code if RegexOptions was valid.");
-                return $"(global::System.Text.RegularExpressions.RegexOptions)({(int)options})";
+                return $"(RegexOptions)({(int)options})";
             }
 
             // Parse the runtime-generated "Option1, Option2" into each piece and then concat
@@ -3978,7 +3969,7 @@ namespace System.Text.RegularExpressions.Generator
             string[] parts = s.Split(new[] { ',' }, StringSplitOptions.RemoveEmptyEntries);
             for (int i = 0; i < parts.Length; i++)
             {
-                parts[i] = "global::System.Text.RegularExpressions.RegexOptions." + parts[i].Trim();
+                parts[i] = "RegexOptions." + parts[i].Trim();
             }
             return string.Join(" | ", parts);
         }
@@ -4198,19 +4189,5 @@ namespace System.Text.RegularExpressions.Generator
                 }
             }
         }
-
-        /// <summary>Bit flags indicating which additional helpers should be emitted into the regex class.</summary>
-        [Flags]
-        private enum RequiredHelperFunctions
-        {
-            /// <summary>No additional functions are required.</summary>
-            None = 0b0,
-            /// <summary>The IsWordChar helper is required.</summary>
-            IsWordChar = 0b1,
-            /// <summary>The IsBoundary helper is required.</summary>
-            IsBoundary = 0b10,
-            /// <summary>The IsECMABoundary helper is required.</summary>
-            IsECMABoundary = 0b100
-        }
     }
 }
index 7c3121e..d6e1a01 100644 (file)
@@ -178,10 +178,10 @@ namespace System.Text.RegularExpressions.Generator
             }
 
             // Parse the input pattern
-            RegexTree tree;
+            RegexTree regexTree;
             try
             {
-                tree = RegexParser.Parse(pattern, regexOptions | RegexOptions.Compiled, culture); // make sure Compiled is included to get all optimizations applied to it
+                regexTree = RegexParser.Parse(pattern, regexOptions | RegexOptions.Compiled, culture); // make sure Compiled is included to get all optimizations applied to it
             }
             catch (Exception e)
             {
@@ -192,37 +192,36 @@ namespace System.Text.RegularExpressions.Generator
             string? ns = regexMethodSymbol.ContainingType?.ContainingNamespace?.ToDisplayString(
                 SymbolDisplayFormat.FullyQualifiedFormat.WithGlobalNamespaceStyle(SymbolDisplayGlobalNamespaceStyle.Omitted));
 
+            var regexType = new RegexType(
+                typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText,
+                ns ?? string.Empty,
+                $"{typeDec.Identifier}{typeDec.TypeParameterList}");
+
             var regexMethod = new RegexMethod(
+                regexType,
                 methodSyntax,
                 regexMethodSymbol.Name,
                 methodSyntax.Modifiers.ToString(),
                 pattern,
                 regexOptions,
                 matchTimeout ?? Timeout.Infinite,
-                tree);
-
-            var regexType = new RegexType(
-                regexMethod,
-                typeDec is RecordDeclarationSyntax rds ? $"{typeDec.Keyword.ValueText} {rds.ClassOrStructKeyword}" : typeDec.Keyword.ValueText,
-                ns ?? string.Empty,
-                $"{typeDec.Identifier}{typeDec.TypeParameterList}");
+                regexTree);
 
             RegexType current = regexType;
             var parent = typeDec.Parent as TypeDeclarationSyntax;
 
             while (parent is not null && IsAllowedKind(parent.Kind()))
             {
-                current.ParentClass = new RegexType(
-                    null,
+                current.Parent = new RegexType(
                     parent is RecordDeclarationSyntax rds2 ? $"{parent.Keyword.ValueText} {rds2.ClassOrStructKeyword}" : parent.Keyword.ValueText,
                     ns ?? string.Empty,
                     $"{parent.Identifier}{parent.TypeParameterList}");
 
-                current = current.ParentClass;
+                current = current.Parent;
                 parent = parent.Parent as TypeDeclarationSyntax;
             }
 
-            return regexType;
+            return regexMethod;
 
             static bool IsAllowedKind(SyntaxKind kind) =>
                 kind == SyntaxKind.ClassDeclaration ||
@@ -233,12 +232,16 @@ namespace System.Text.RegularExpressions.Generator
         }
 
         /// <summary>A regex method.</summary>
-        internal sealed record RegexMethod(MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexTree Tree);
+        internal sealed record RegexMethod(RegexType DeclaringType, MethodDeclarationSyntax MethodSyntax, string MethodName, string Modifiers, string Pattern, RegexOptions Options, int MatchTimeout, RegexTree Tree)
+        {
+            public int GeneratedId { get; set; }
+            public string GeneratedName => $"{MethodName}_{GeneratedId}";
+        }
 
         /// <summary>A type holding a regex method.</summary>
-        internal sealed record RegexType(RegexMethod? Method, string Keyword, string Namespace, string Name)
+        internal sealed record RegexType(string Keyword, string Namespace, string Name)
         {
-            public RegexType? ParentClass { get; set; }
+            public RegexType? Parent { get; set; }
         }
     }
 }
index b840aff..69b8f55 100644 (file)
@@ -2,11 +2,13 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 
 using System;
+using System.CodeDom.Compiler;
 using System.Collections.Generic;
 using System.Collections.Immutable;
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Diagnostics.Tracing;
+using System.IO;
 using System.Linq;
 using System.Runtime.CompilerServices;
 using System.Text;
@@ -23,65 +25,239 @@ namespace System.Text.RegularExpressions.Generator
     [Generator(LanguageNames.CSharp)]
     public partial class RegexGenerator : IIncrementalGenerator
     {
-        public void Initialize(IncrementalGeneratorInitializationContext context)
+        /// <summary>Name of the type emitted to contain helpers used by the generated code.</summary>
+        private const string HelpersTypeName = "Utilities";
+        /// <summary>Namespace containing all the generated code.</summary>
+        private const string GeneratedNamespace = "System.Text.RegularExpressions.Generated";
+        /// <summary>Code for a [GeneratedCode] attribute to put on the top-level generated members.</summary>
+        private static readonly string s_generatedCodeAttribute = $"GeneratedCodeAttribute(\"{typeof(RegexGenerator).Assembly.GetName().Name}\", \"{typeof(RegexGenerator).Assembly.GetName().Version}\")";
+        /// <summary>Header comments and usings to include at the top of every generated file.</summary>
+        private static readonly string[] s_headers = new string[]
         {
-            // To avoid invalidating the generator's output when anything from the compilation
-            // changes, we will extract from it the only thing we care about: whether unsafe
-            // code is allowed.
-            IncrementalValueProvider<bool> allowUnsafeProvider =
-                context.CompilationProvider
-                .Select((x, _) => x.Options is CSharpCompilationOptions { AllowUnsafe: true });
+            "// <auto-generated/>",
+            "#nullable enable",
+            "#pragma warning disable CS0162 // Unreachable code",
+            "#pragma warning disable CS0164 // Unreferenced label",
+            "#pragma warning disable CS0219 // Variable assigned but never used",
+        };
 
-            // Contains one entry per regex method, either the generated code for that regex method,
-            // a diagnostic to fail with, or null if no action should be taken for that regex.
+        public void Initialize(IncrementalGeneratorInitializationContext context)
+        {
+            // Produces one entry per generated regex.  This may be:
+            // - Diagnostic in the case of a failure that should end the compilation
+            // - (RegexMethod regexMethod, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers) in the case of valid regex
+            // - (RegexMethod regexMethod, string reason, Diagnostic diagnostic) in the case of a limited-support regex
             IncrementalValueProvider<ImmutableArray<object?>> codeOrDiagnostics =
                 context.SyntaxProvider
 
-                // Find all MethodDeclarationSyntax nodes attributed with RegexGenerator and gather the required information
+                // Find all MethodDeclarationSyntax nodes attributed with RegexGenerator and gather the required information.
                 .CreateSyntaxProvider(IsSyntaxTargetForGeneration, GetSemanticTargetForGeneration)
                 .Where(static m => m is not null)
 
-                // Pair each with whether unsafe code is allowed
-                .Combine(allowUnsafeProvider)
-
-                // Get the resulting code string or error Diagnostic for
-                // each MethodDeclarationSyntax/allow-unsafe-blocks pair
+                // Generate the RunnerFactory for each regex, if possible.  This is where the bulk of the implementation occurs.
                 .Select((state, _) =>
                 {
-                    Debug.Assert(state.Left is not null);
-                    return state.Left is RegexType regexType ? EmitRegexType(regexType, state.Right) : state.Left;
+                    if (state is not RegexMethod regexMethod)
+                    {
+                        Debug.Assert(state is Diagnostic);
+                        return state;
+                    }
+
+                    // If we're unable to generate a full implementation for this regex, report a diagnostic.
+                    // We'll still output a limited implementation that just caches a new Regex(...).
+                    if (!regexMethod.Tree.Root.SupportsCompilation(out string? reason))
+                    {
+                        return (regexMethod, reason, Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, regexMethod.MethodSyntax.GetLocation()));
+                    }
+
+                    // Generate the core logic for the regex.
+                    Dictionary<string, string[]> requiredHelpers = new();
+                    var sw = new StringWriter();
+                    var writer = new IndentedTextWriter(sw);
+                    writer.Indent += 3;
+                    writer.WriteLine();
+                    EmitRegexDerivedTypeRunnerFactory(writer, regexMethod, requiredHelpers);
+                    writer.Indent -= 3;
+                    return (regexMethod, sw.ToString(), requiredHelpers);
                 })
                 .Collect();
 
+            // To avoid invalidating every regex's output when anything from the compilation changes,
+            // we extract from it the only things we care about: whether unsafe code is allowed,
+            // and a name based on the assembly's name, and only that information is then fed into
+            // RegisterSourceOutput along with all of the cached generated data from each regex.
+            IncrementalValueProvider<(bool AllowUnsafe, string? AssemblyName)> compilationDataProvider =
+                context.CompilationProvider
+                .Select((x, _) => (x.Options is CSharpCompilationOptions { AllowUnsafe: true }, x.AssemblyName));
+
             // When there something to output, take all the generated strings and concatenate them to output,
             // and raise all of the created diagnostics.
-            context.RegisterSourceOutput(codeOrDiagnostics, static (context, results) =>
+            context.RegisterSourceOutput(codeOrDiagnostics.Combine(compilationDataProvider), static (context, compilationDataAndResults) =>
             {
-                var code = new List<string>(s_headers.Length + results.Length);
-
-                // Add file header and required usings
-                code.AddRange(s_headers);
+                ImmutableArray<object?> results = compilationDataAndResults.Left;
 
+                // Report any top-level diagnostics.
+                bool allFailures = true;
                 foreach (object? result in results)
                 {
-                    switch (result)
+                    if (result is Diagnostic d)
                     {
-                        case Diagnostic d:
-                            context.ReportDiagnostic(d);
-                            break;
+                        context.ReportDiagnostic(d);
+                    }
+                    else
+                    {
+                        allFailures = false;
+                    }
+                }
+                if (allFailures)
+                {
+                    return;
+                }
+
+                // At this point we'll be emitting code.  Create a writer to hold it all.
+                var sw = new StringWriter();
+                IndentedTextWriter writer = new(sw);
+
+                // Add file headers and required usings.
+                foreach (string header in s_headers)
+                {
+                    writer.WriteLine(header);
+                }
+                writer.WriteLine();
 
-                        case ValueTuple<string, ImmutableArray<Diagnostic>> t:
-                            code.Add(t.Item1);
-                            foreach (Diagnostic d in t.Item2)
+                // For every generated type, we give it an incrementally increasing ID, in order to create
+                // unique type names even in situations where method names were the same, while also keeping
+                // the type names short.  Note that this is why we only generate the RunnerFactory implementations
+                // earlier in the pipeline... we want to avoid generating code that relies on the class names
+                // until we're able to iterate through them linearly keeping track of a deterministic ID
+                // used to name them.  The boilerplate code generation that happens here is minimal when compared to
+                // the work required to generate the actual matching code for the regex.
+                int id = 0;
+                string generatedClassName = $"__{ComputeStringHash(compilationDataAndResults.Right.AssemblyName ?? ""):x}";
+
+                // If we have any (RegexMethod regexMethod, string generatedName, string reason, Diagnostic diagnostic), these are regexes for which we have
+                // limited support and need to simply output boilerplate.  We need to emit their diagnostics.
+                // If we have any (RegexMethod regexMethod, string generatedName, string runnerFactoryImplementation, Dictionary<string, string[]> requiredHelpers),
+                // those are generated implementations to be emitted.  We need to gather up their required helpers.
+                Dictionary<string, string[]> requiredHelpers = new();
+                foreach (object? result in results)
+                {
+                    RegexMethod? regexMethod = null;
+                    if (result is ValueTuple<RegexMethod, string, Diagnostic> limitedSupportResult)
+                    {
+                        context.ReportDiagnostic(limitedSupportResult.Item3);
+                        regexMethod = limitedSupportResult.Item1;
+                    }
+                    else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
+                    {
+                        foreach (KeyValuePair<string, string[]> helper in regexImpl.Item3)
+                        {
+                            if (!requiredHelpers.ContainsKey(helper.Key))
                             {
-                                context.ReportDiagnostic(d);
+                                requiredHelpers.Add(helper.Key, helper.Value);
                             }
-                            break;
+                        }
+
+                        regexMethod = regexImpl.Item1;
+                    }
+
+                    if (regexMethod is not null)
+                    {
+                        regexMethod.GeneratedId = id++;
+                        EmitRegexPartialMethod(regexMethod, writer, generatedClassName);
+                        writer.WriteLine();
+                    }
+                }
+
+                // At this point we've emitted all the partial method definitions, but we still need to emit the actual regex-derived implementations.
+                // These are all emitted inside of our generated class.
+                // TODO https://github.com/dotnet/csharplang/issues/5529:
+                // When C# provides a mechanism for shielding generated code from the rest of the project, it should be employed
+                // here for the generated class.  At that point, the generated class wrapper can be removed, and all of the types
+                // generated inside of it (one for each regex as well as the helpers type) should be shielded.
+
+                writer.WriteLine($"namespace {GeneratedNamespace}");
+                writer.WriteLine($"{{");
+
+                // We emit usings here now that we're inside of a namespace block and are no longer emitting code into
+                // a user's partial type.  We can now rely on binding rules mapping to these usings and don't need to
+                // use global-qualified names for the rest of the implementation.
+                writer.WriteLine($"    using System;");
+                writer.WriteLine($"    using System.CodeDom.Compiler;");
+                writer.WriteLine($"    using System.Collections;");
+                writer.WriteLine($"    using System.ComponentModel;");
+                writer.WriteLine($"    using System.Globalization;");
+                writer.WriteLine($"    using System.Runtime.CompilerServices;");
+                writer.WriteLine($"    using System.Text.RegularExpressions;");
+                writer.WriteLine($"    using System.Threading;");
+                writer.WriteLine($"");
+                if (compilationDataAndResults.Right.AllowUnsafe)
+                {
+                    writer.WriteLine($"    [SkipLocalsInit]");
+                }
+                writer.WriteLine($"    [{s_generatedCodeAttribute}]");
+                writer.WriteLine($"    [EditorBrowsable(EditorBrowsableState.Never)]");
+                writer.WriteLine($"    internal static class {generatedClassName}");
+                writer.WriteLine($"    {{");
+
+                // Emit each Regex-derived type.
+                writer.Indent += 2;
+                foreach (object? result in results)
+                {
+                    if (result is ValueTuple<RegexMethod, string, Diagnostic> limitedSupportResult)
+                    {
+                        EmitRegexLimitedBoilerplate(writer, limitedSupportResult.Item1, limitedSupportResult.Item1.GeneratedId, limitedSupportResult.Item2);
+                        writer.WriteLine();
+                    }
+                    else if (result is ValueTuple<RegexMethod, string, Dictionary<string, string[]>> regexImpl)
+                    {
+                        EmitRegexDerivedImplementation(writer, regexImpl.Item1, regexImpl.Item1.GeneratedId, regexImpl.Item2);
+                        writer.WriteLine();
+                    }
+                }
+                writer.Indent -= 2;
+
+                // If any of the Regex-derived types asked for helper methods, emit those now.
+                if (requiredHelpers.Count != 0)
+                {
+                    writer.Indent += 2;
+                    writer.WriteLine($"private static class {HelpersTypeName}");
+                    writer.WriteLine($"{{");
+                    writer.Indent++;
+                    foreach (KeyValuePair<string, string[]> helper in requiredHelpers)
+                    {
+                        foreach (string value in helper.Value)
+                        {
+                            writer.WriteLine(value);
+                        }
+                        writer.WriteLine();
                     }
+                    writer.Indent--;
+                    writer.WriteLine($"}}");
+                    writer.Indent -= 2;
                 }
 
-                context.AddSource("RegexGenerator.g.cs", string.Join(Environment.NewLine, code));
+                writer.WriteLine($"    }}");
+                writer.WriteLine($"}}");
+
+                // Save out the source
+                context.AddSource("RegexGenerator.g.cs", sw.ToString());
             });
         }
+
+        /// <summary>Computes a hash of the string.</summary>
+        /// <remarks>
+        /// Currently an FNV-1a hash function. The actual algorithm used doesn't matter; just something
+        /// simple to create a deterministic, pseudo-random value that's based on input text.
+        /// </remarks>
+        private static uint ComputeStringHash(string s)
+        {
+            uint hashCode = 2166136261;
+            foreach (char c in s)
+            {
+                hashCode = (c ^ hashCode) * 16777619;
+            }
+            return hashCode;
+        }
     }
 }
index 73f51b8..8e910a1 100644 (file)
@@ -13,6 +13,7 @@
     <IsNETCoreAppAnalyzer>true</IsNETCoreAppAnalyzer>
     <AnalyzerLanguage>cs</AnalyzerLanguage>
     <IsPackable>false</IsPackable>
+    <LangVersion>Preview</LangVersion>
   </PropertyGroup>
 
   <ItemGroup>
index c2d3070..5fc3364 100644 (file)
@@ -95,10 +95,12 @@ namespace System.Text.RegularExpressions.Tests
 
             comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray());
             EmitResult results = comp.Emit(Stream.Null, cancellationToken: cancellationToken);
-            if (!results.Success || results.Diagnostics.Length != 0 || generatorResults.Diagnostics.Length != 0)
+            ImmutableArray<Diagnostic> generatorDiagnostics = generatorResults.Diagnostics.RemoveAll(d => d.Severity <= DiagnosticSeverity.Hidden);
+            ImmutableArray<Diagnostic> resultsDiagnostics = results.Diagnostics.RemoveAll(d => d.Severity <= DiagnosticSeverity.Hidden);
+            if (!results.Success || resultsDiagnostics.Length != 0 || generatorDiagnostics.Length != 0)
             {
                 throw new ArgumentException(
-                    string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorResults.Diagnostics)) + Environment.NewLine +
+                    string.Join(Environment.NewLine, resultsDiagnostics.Concat(generatorDiagnostics)) + Environment.NewLine +
                     string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => t.ToString())));
             }
 
@@ -196,11 +198,12 @@ namespace System.Text.RegularExpressions.Tests
             var dll = new MemoryStream();
             comp = comp.AddSyntaxTrees(generatorResults.GeneratedTrees.ToArray());
             EmitResult results = comp.Emit(dll, options: s_emitOptions, cancellationToken: cancellationToken);
-            if (!results.Success || results.Diagnostics.Length != 0)
+            ImmutableArray<Diagnostic> resultsDiagnostics = results.Diagnostics.RemoveAll(d => d.Severity <= DiagnosticSeverity.Hidden);
+            if (!results.Success || resultsDiagnostics.Length != 0)
             {
                 throw new ArgumentException(
                     string.Join(Environment.NewLine, generatorResults.GeneratedTrees.Select(t => NumberLines(t.ToString()))) + Environment.NewLine +
-                    string.Join(Environment.NewLine, results.Diagnostics.Concat(generatorDiagnostics)));
+                    string.Join(Environment.NewLine, resultsDiagnostics.Concat(generatorDiagnostics)));
             }
             dll.Position = 0;
 
index 63e592f..5627467 100644 (file)
@@ -518,6 +518,52 @@ namespace System.Text.RegularExpressions.Tests
             ", compile: true));
         }
 
+        [Fact]
+        public async Task Valid_SameMethodNameInMultipleTypes()
+        {
+            Assert.Empty(await RegexGeneratorHelper.RunGenerator(@"
+                using System.Text.RegularExpressions;
+                namespace A
+                {
+                    public partial class B<U>
+                    {
+                        private partial class C<T>
+                        {
+                            [RegexGenerator(""1"")]
+                            public partial Regex Valid();
+                        }
+
+                        private partial class C<T1,T2>
+                        {
+                            [RegexGenerator(""2"")]
+                            private static partial Regex Valid();
+
+                            private partial class D
+                            {
+                                [RegexGenerator(""3"")]
+                                internal partial Regex Valid();
+                            }
+                        }
+
+                        private partial class E
+                        {
+                            [RegexGenerator(""4"")]
+                            private static partial Regex Valid();
+                        }
+                    }
+                }
+
+                partial class F
+                {
+                    [RegexGenerator(""5"")]
+                    public partial Regex Valid();
+
+                    [RegexGenerator(""6"")]
+                    public partial Regex Valid2();
+                }
+            ", compile: true));
+        }
+
         public static IEnumerable<object[]> Valid_Modifiers_MemberData()
         {
             foreach (string type in new[] { "class", "struct", "record", "record struct", "record class", "interface" })