Fix code coverage gaps in RegexGenerator and fix RegexCompiler bug (#66321)
authorStephen Toub <stoub@microsoft.com>
Tue, 8 Mar 2022 19:24:48 +0000 (14:24 -0500)
committerGitHub <noreply@github.com>
Tue, 8 Mar 2022 19:24:48 +0000 (14:24 -0500)
Noticed some easily-fillable gaps in code coverage for RegexGenerator.  In adding those tests, found and fixed a bug in RegexCompiler where we were incorrectly using `Call(s_spanIndexOfSpan)` instead of `Call(s_spanIndexOfAnySpan)`.

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreterCode.cs
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexGeneratorParserTests.cs

index 7275ad9..0220a77 100644 (file)
@@ -1923,11 +1923,6 @@ namespace System.Text.RegularExpressions.Generator
                     string backtrack = ReserveName($"CaptureBacktrack");
                     MarkLabel(backtrack, emitSemicolon: false);
                     EmitStackPop(startingPos);
-                    if (!childBacktracks)
-                    {
-                        writer.WriteLine($"pos = {startingPos};");
-                        SliceInputSpan(writer);
-                    }
                     Goto(doneLabel);
                     writer.WriteLine();
 
@@ -2154,7 +2149,8 @@ namespace System.Text.RegularExpressions.Generator
                             EmitExpressionConditional(node);
                             break;
 
-                        case RegexNodeKind.Atomic when analysis.MayBacktrack(node.Child(0)):
+                        case RegexNodeKind.Atomic:
+                            Debug.Assert(analysis.MayBacktrack(node.Child(0)));
                             EmitAtomic(node, subsequent);
                             return;
 
@@ -3934,10 +3930,11 @@ namespace System.Text.RegularExpressions.Generator
         private static string Literal(RegexOptions options)
         {
             string s = options.ToString();
-            if (int.TryParse(s, out _))
+            if (int.TryParse(s, NumberStyles.Integer, CultureInfo.InvariantCulture, out _))
             {
                 // The options were formatted as an int, which means the runtime couldn't
                 // produce a textual representation.  So just output casting the value as an int.
+                Debug.Fail("This shouldn't happen, as we should only get to the point of emitting code if RegexOptions was valid.");
                 return $"(global::System.Text.RegularExpressions.RegexOptions)({(int)options})";
             }
 
index 717923c..573e5e4 100644 (file)
@@ -2198,13 +2198,6 @@ namespace System.Text.RegularExpressions
                     MarkLabel(backtrack);
                     EmitStackPop();
                     Stloc(startingPos);
-                    if (!childBacktracks)
-                    {
-                        // pos = startingPos
-                        Ldloc(startingPos);
-                        Stloc(pos);
-                        SliceInputSpan();
-                    }
 
                     // goto doneLabel;
                     BrFar(doneLabel);
@@ -3987,7 +3980,7 @@ namespace System.Text.RegularExpressions
                         default:
                             Ldstr(setChars.Slice(0, numSetChars).ToString());
                             Call(s_stringAsSpanMethod);
-                            Call(s_spanIndexOfSpan);
+                            Call(s_spanIndexOfAnySpan);
                             break;
                     }
                     Stloc(iterationLocal);
index 87eb2e8..8d3ac5e 100644 (file)
@@ -66,8 +66,10 @@ namespace System.Text.RegularExpressions
             }
         }
 
+#if DEBUG
         /// <summary>Gets the number of integers required to store an operation represented by the specified opcode (including the opcode).</summary>
         /// <returns>Values range from 1 (just the opcode) to 3 (the opcode plus up to two operands).</returns>
+        [ExcludeFromCodeCoverage]
         public static int OpcodeSize(RegexOpcode opcode)
         {
             opcode &= RegexOpcode.OperatorMask;
@@ -134,7 +136,6 @@ namespace System.Text.RegularExpressions
             }
         }
 
-#if DEBUG
         [ExcludeFromCodeCoverage]
         public override string ToString()
         {
index 2375d8f..8bdcf61 100644 (file)
@@ -97,12 +97,9 @@ namespace System.Text.RegularExpressions.Tests
                     yield return (@"(?<=^123 abc)def", "123 abcdef", RegexOptions.None, 0, 10, true, "def");
                     yield return (@"(?<=^123 abc)def", "123 abcdef", RegexOptions.Multiline, 0, 10, true, "def");
                     yield return (@"(?<=123$\nabc)def", "123\nabcdef", RegexOptions.Multiline, 0, 10, true, "def");
-                    yield return (@"123(?<!$) abcdef", "123 abcdef", RegexOptions.None, 0, 10, true, "123 abcdef");
                     yield return (@"\w{3}(?<=^xyz|^abc)defg", "abcdefg", RegexOptions.None, 0, 7, true, "abcdefg");
                     yield return (@"(abc)\w(?<=(?(1)d|e))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
-                    yield return (@"(abc)\w(?<!(?(1)e|d))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
                     yield return (@"(abc)\w(?<=(?(cd)d|e))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
-                    yield return (@"(abc)\w(?<!(?(cd)e|d))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
                     yield return (@"\w{3}(?<=(\w){6,8})", "abcdefghijklmnop", RegexOptions.None, 0, 16, true, "def");
                     yield return (@"\w{3}(?<=(?:\d\w){4})", "a1b2c3d4e5", RegexOptions.None, 0, 10, true, "d4e");
                     yield return (@"\w{3}(?<=(\w){6,8}?)", "abcdefghijklmnop", RegexOptions.None, 0, 16, true, "def");
@@ -117,6 +114,9 @@ namespace System.Text.RegularExpressions.Tests
 
                     // Zero-width negative lookbehind assertion: Actual - "(\\w){6}(?<!XXX)def"
                     yield return (@"(\w){6}(?<!XXX)def", "XXXabcdef", RegexOptions.None, 0, 9, true, "XXXabcdef");
+                    yield return (@"123(?<!$) abcdef", "123 abcdef", RegexOptions.None, 0, 10, true, "123 abcdef");
+                    yield return (@"(abc)\w(?<!(?(1)e|d))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
+                    yield return (@"(abc)\w(?<!(?(cd)e|d))", "abcdabc", RegexOptions.None, 0, 7, true, "abcd");
 
                     // Nonbacktracking subexpression: Actual - "[^0-9]+(?>[0-9]+)3"
                     // The last 3 causes the match to fail, since the non backtracking subexpression does not give up the last digit it matched
@@ -146,6 +146,13 @@ namespace System.Text.RegularExpressions.Tests
                         yield return (Case("(?>[^z]+)z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz");
                         yield return (Case("(?>(?>[^z]+))z"), "zzzzxyxyxyz123", options, 4, 9, true, "xyxyxyz");
                         yield return (Case("(?>[^z]*)z123"), "zzzzxyxyxyz123", options, 4, 10, true, "xyxyxyz123");
+
+                        yield return (Case("(?>[^12]+)1"), "121231", options, 0, 6, true, "31");
+                        yield return (Case("(?>[^123]+)1"), "12312341", options, 0, 8, true, "41");
+                        yield return (Case("(?>[^1234]+)1"), "1234123451", options, 0, 10, true, "51");
+                        yield return (Case("(?>[^12345]+)1"), "123451234561", options, 0, 12, true, "61");
+                        yield return (Case("(?>[^123456]+)1"), "12345612345671", options, 0, 14, true, "71");
+
                         yield return (Case("(?>a+)123"), "aa1234", options, 0, 5, true, "aa123");
                         yield return (Case("(?>a*)123"), "aa1234", options, 0, 5, true, "aa123");
                         yield return (Case("(?>(?>a*))123"), "aa1234", options, 0, 5, true, "aa123");
@@ -193,7 +200,10 @@ namespace System.Text.RegularExpressions.Tests
                 yield return (@"((\w+))@\w+.com", "abc@def.com", RegexOptions.None, 0, 11, true, "abc@def.com");
                 yield return (@"(\w+)c@\w+.com", "abc@def.comabcdef", RegexOptions.None, 0, 17, true, "abc@def.com");
                 yield return (@"\w+://\w+\.\w+", "test https://dot.net test", RegexOptions.None, 0, 25, true, "https://dot.net");
-                yield return (@"\w+[:|$*&]//\w+\.\w+", "test https://dot.net test", RegexOptions.None, 0, 25, true, "https://dot.net");
+                yield return (@"\w+[:|]//\w+\.\w+", "test https://dot.net test", RegexOptions.None, 0, 25, true, "https://dot.net");
+                yield return (@"\w+[|:$]//\w+\.\w+", "test https://dot.net test", RegexOptions.None, 0, 25, true, "https://dot.net");
+                yield return (@"\w+[|$:*]//\w+\.\w+", "test https://dot.net test", RegexOptions.None, 0, 25, true, "https://dot.net");
+                yield return (@"\w+[|$*:&]//\w+\.\w+", "test https://dot.net test", RegexOptions.None, 0, 25, true, "https://dot.net");
                 yield return (@".+a", "baa", RegexOptions.None, 0, 3, true, "baa");
                 yield return (@"[ab]+a", "cacbaac", RegexOptions.None, 0, 7, true, "baa");
                 yield return (@"^(\d{2,3}){2}$", "1234", RegexOptions.None, 0, 4, true, "1234");
@@ -366,8 +376,11 @@ namespace System.Text.RegularExpressions.Tests
                     yield return (@"^abc|def|^efg", " def", anchorOptions, 0, 4, true, "def");
                     yield return (@"^abc|def", " def", anchorOptions, 0, 4, true, "def");
                     yield return (@"abcd$", "1234567890abcd", anchorOptions, 0, 14, true, "abcd");
-                    yield return (@"abc{1,4}d$", "1234567890abcd", anchorOptions, 0, 14, true, "abcd");
-                    yield return (@"abc{1,4}d$", "1234567890abccccd", anchorOptions, 0, 17, true, "abccccd");
+                    foreach (string endAnchor in new[] { @"$", @"\Z", @"\z" })
+                    {
+                        yield return (@"abc{1,4}d" + endAnchor, "1234567890abcd", anchorOptions, 0, 14, true, "abcd");
+                        yield return (@"abc{1,4}d" + endAnchor, "1234567890abccccd", anchorOptions, 0, 17, true, "abccccd");
+                    }
                 }
                 if (!RegexHelpers.IsNonBacktracking(engine))
                 {
@@ -582,10 +595,22 @@ namespace System.Text.RegularExpressions.Tests
                     yield return (@"(?(abc)\w+|\w{0,2})dog", "catdog", RegexOptions.None, 0, 6, true, "atdog");
                     yield return (@"(?(abc)cat|\w{0,2})dog", "catdog", RegexOptions.None, 0, 6, true, "atdog");
                     yield return ("(a|ab|abc|abcd)d", "abcd", RegexOptions.RightToLeft, 0, 4, true, "abcd");
+
                     yield return ("(?>(?:a|ab|abc|abcd))d", "abcd", RegexOptions.None, 0, 4, false, string.Empty);
                     yield return ("(?>(?:a|ab|abc|abcd))d", "abcd", RegexOptions.RightToLeft, 0, 4, true, "abcd");
+
+                    yield return (@"(...)(?(1)\w*)[a1 ]", "zabcaaaaaaa", RegexOptions.None, 0, 11, true, "zabcaaaaaaa");
+                    yield return (@"(...)(?(1)\w*)[a1 ]", "zabcaaaaaaa", RegexOptions.RightToLeft, 0, 11, true, "aaaa");
+                    yield return (@"(...)(?(1)\w*)[a1 ]", "           ", RegexOptions.None, 0, 11, true, "    ");
+                    yield return (@"(...)(?(1)\w*)[a1 ]", "           ", RegexOptions.RightToLeft, 0, 11, true, "    ");
+                    yield return (@"(...)(?(1)\w*|\s*)[a1 ]", "zabcaaaaaaa", RegexOptions.None, 0, 11, true, "zabcaaaaaaa");
+                    yield return (@"(...)(?(1)\w*|\s*)[a1 ]", "----       ", RegexOptions.None, 0, 11, true, "--- ");
+                    yield return (@"(...)(?(1)\w*|\s*)[a1 ]", "zabcaaaaaaa", RegexOptions.RightToLeft, 0, 11, true, "aaaa");
+                    yield return (@"(...)(?(1)\w*|\s*)[a1 ]", "----       ", RegexOptions.RightToLeft, 0, 11, true, "---       ");
                 }
 
+                // Character Class Substraction
+
                 // No Negation
                 yield return ("[abcd-[abcd]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
                 yield return ("[1234-[1234]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
@@ -641,6 +666,7 @@ namespace System.Text.RegularExpressions.Tests
                 yield return (@"[^\P{Lu}-[^\P{Lu}]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
                 yield return (@"[^\p{Nd}-[^\p{Nd}]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
                 yield return (@"[^\P{Nd}-[^\P{Nd}]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
+                yield return (@"[^\p{IsGreek}\p{IsGreekExtended}]", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, true, "a");
 
                 // MixedNegation
                 yield return (@"[^\p{Ll}-[\P{Ll}]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
@@ -650,7 +676,6 @@ namespace System.Text.RegularExpressions.Tests
                 yield return (@"[^\p{Nd}-[\P{Nd}]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
                 yield return (@"[\p{Nd}-[^\P{Nd}]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
 
-                // Character Class Substraction
                 yield return (@"[ab\-\[cd-[-[]]]]", "[]]", RegexOptions.None, 0, 3, false, string.Empty);
                 yield return (@"[ab\-\[cd-[-[]]]]", "-]]", RegexOptions.None, 0, 3, false, string.Empty);
                 yield return (@"[ab\-\[cd-[-[]]]]", "`]]", RegexOptions.None, 0, 3, false, string.Empty);
index b59aaee..63e592f 100644 (file)
@@ -213,6 +213,53 @@ namespace System.Text.RegularExpressions.Tests
         }
 
         [Fact]
+        public async Task Diagnostic_CustomRegexGeneratorAttribute_ZeroArgCtor()
+        {
+            IReadOnlyList<Diagnostic> diagnostics = await RegexGeneratorHelper.RunGenerator(@"
+                using System.Text.RegularExpressions;
+                partial class C
+                {
+                    [RegexGenerator]
+                    private static partial Regex InvalidCtor();
+                }
+
+                namespace System.Text.RegularExpressions
+                {
+                    [AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)]
+                    public sealed class RegexGeneratorAttribute : Attribute
+                    {
+                    }
+                }
+            ");
+
+            Assert.Equal("SYSLIB1040", Assert.Single(diagnostics).Id);
+        }
+
+        [Fact]
+        public async Task Diagnostic_CustomRegexGeneratorAttribute_FourArgCtor()
+        {
+            IReadOnlyList<Diagnostic> diagnostics = await RegexGeneratorHelper.RunGenerator(@"
+                using System.Text.RegularExpressions;
+                partial class C
+                {
+                    [RegexGenerator(""a"", RegexOptions.None, -1, ""b""]
+                    private static partial Regex InvalidCtor();
+                }
+
+                namespace System.Text.RegularExpressions
+                {
+                    [AttributeUsage(AttributeTargets.Method, AllowMultiple = false, Inherited = false)]
+                    public sealed class RegexGeneratorAttribute : Attribute
+                    {
+                        public RegexGeneratorAttribute(string pattern, RegexOptions options, int timeout, string somethingElse) { }
+                    }
+                }
+            ");
+
+            Assert.Equal("SYSLIB1040", Assert.Single(diagnostics).Id);
+        }
+
+        [Fact]
         public async Task Valid_ClassWithoutNamespace()
         {
             Assert.Empty(await RegexGeneratorHelper.RunGenerator(@"
@@ -286,6 +333,22 @@ namespace System.Text.RegularExpressions.Tests
             ", compile: true));
         }
 
+        [Fact]
+        public async Task Valid_AdditionalAttributes()
+        {
+            Assert.Empty(await RegexGeneratorHelper.RunGenerator($@"
+                using System.Text.RegularExpressions;
+                using System.Diagnostics.CodeAnalysis;
+                partial class C
+                {{
+                    [SuppressMessage(""CATEGORY1"", ""SOMEID1"")]
+                    [RegexGenerator(""abc"")]
+                    [SuppressMessage(""CATEGORY2"", ""SOMEID2"")]
+                    private static partial Regex AdditionalAttributes();
+                }}
+            ", compile: true));
+        }
+
         [Theory]
         [InlineData(false)]
         [InlineData(true)]