Add a comment in source-generated regexes explaining why they cannot have optimized...
authorTheodore Tsirpanis <teo@tsirpanis.gr>
Thu, 3 Mar 2022 16:26:18 +0000 (18:26 +0200)
committerGitHub <noreply@github.com>
Thu, 3 Mar 2022 16:26:18 +0000 (11:26 -0500)
* Add a comment in source-generated regexes explaining why they cannot have optimized code.

* Apply suggestions from code review

Co-authored-by: Stephen Toub <stoub@microsoft.com>
Co-authored-by: Stephen Toub <stoub@microsoft.com>
src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs

index 556ac84..743f8a6 100644 (file)
@@ -103,11 +103,11 @@ namespace System.Text.RegularExpressions.Generator
         }
 
         /// <summary>Gets whether a given regular expression method is supported by the code generator.</summary>
-        private static bool SupportsCodeGeneration(RegexMethod rm)
+        private static bool SupportsCodeGeneration(RegexMethod rm, out string? reason)
         {
             RegexNode root = rm.Tree.Root;
 
-            if (!root.SupportsCompilation())
+            if (!root.SupportsCompilation(out reason))
             {
                 return false;
             }
@@ -119,6 +119,7 @@ namespace System.Text.RegularExpressions.Generator
                 // Place an artificial limit on max tree depth in order to mitigate such issues.
                 // The allowed depth can be tweaked as needed;its exceedingly rare to find
                 // expressions with such deep trees.
+                reason = "the regex will result in code that may exceed C# compiler limits";
                 return false;
             }
 
@@ -163,8 +164,10 @@ namespace System.Text.RegularExpressions.Generator
             writer.Write("    public static global::System.Text.RegularExpressions.Regex Instance { get; } = ");
 
             // If we can't support custom generation for this regex, spit out a Regex constructor call.
-            if (!SupportsCodeGeneration(rm))
+            if (!SupportsCodeGeneration(rm, out string? reason))
             {
+                writer.WriteLine();
+                writer.WriteLine($"// Cannot generate Regex-derived implementation because {reason}.");
                 writer.WriteLine($"new global::System.Text.RegularExpressions.Regex({patternExpression}, {optionsExpression}, {timeoutExpression});");
                 writer.WriteLine("}");
                 return ImmutableArray.Create(Diagnostic.Create(DiagnosticDescriptors.LimitedSourceGeneration, rm.MethodSyntax.GetLocation()));
index 60b1398..145d708 100644 (file)
@@ -32,7 +32,7 @@ namespace System.Text.RegularExpressions
         /// <summary>The top-level driver. Initializes everything then calls the Generate* methods.</summary>
         public RegexRunnerFactory? FactoryInstanceFromCode(string pattern, RegexTree regexTree, RegexOptions options, bool hasTimeout)
         {
-            if (!regexTree.Root.SupportsCompilation())
+            if (!regexTree.Root.SupportsCompilation(out _))
             {
                 return null;
             }
index 93df3ae..e2bb2f4 100644 (file)
@@ -1223,7 +1223,7 @@ namespace System.Text.RegularExpressions
 
                     // Now compare the rest of the branches against it.
                     int endingIndex = startingIndex + 1;
-                    for ( ; endingIndex < children.Count; endingIndex++)
+                    for (; endingIndex < children.Count; endingIndex++)
                     {
                         // Get the starting node of the next branch.
                         startingNode = children[endingIndex].FindBranchOneOrMultiStart();
@@ -2566,18 +2566,27 @@ namespace System.Text.RegularExpressions
         }
 
         // Determines whether the node supports a compilation / code generation strategy based on walking the node tree.
-        internal bool SupportsCompilation()
+        // Also returns a human-readable string to explain the reason (it will be emitted by the source generator, hence
+        // there's no need to localize).
+        internal bool SupportsCompilation([NotNullWhen(false)] out string? reason)
         {
             if (!StackHelper.TryEnsureSufficientExecutionStack())
             {
-                // If we can't recur further, code generation isn't supported as the tree is too deep.
+                reason = "run-time limits were exceeded";
                 return false;
             }
 
-            if ((Options & (RegexOptions.RightToLeft | RegexOptions.NonBacktracking)) != 0)
+            // NonBacktracking isn't supported, nor RightToLeft.  The latter applies to both the top-level
+            // options as well as when used to specify positive and negative lookbehinds.
+            if ((Options & RegexOptions.NonBacktracking) != 0)
+            {
+                reason = "RegexOptions.NonBacktracking was specified";
+                return false;
+            }
+
+            if ((Options & RegexOptions.RightToLeft) != 0)
             {
-                // NonBacktracking isn't supported, nor RightToLeft.  The latter applies to both the top-level
-                // options as well as when used to specify positive and negative lookbehinds.
+                reason = "RegexOptions.RightToLeft or a positive/negative lookbehind was used";
                 return false;
             }
 
@@ -2585,13 +2594,14 @@ namespace System.Text.RegularExpressions
             for (int i = 0; i < childCount; i++)
             {
                 // The node isn't supported if any of its children aren't supported.
-                if (!Child(i).SupportsCompilation())
+                if (!Child(i).SupportsCompilation(out reason))
                 {
                     return false;
                 }
             }
 
             // Supported.
+            reason = null;
             return true;
         }