Fix length check for Regex BOL FindFirstChar optimization (#55574)
authorStephen Toub <stoub@microsoft.com>
Tue, 13 Jul 2021 21:10:18 +0000 (17:10 -0400)
committerGitHub <noreply@github.com>
Tue, 13 Jul 2021 21:10:18 +0000 (17:10 -0400)
For a beginning-of-line anchor, in FindFirstChar we use IndexOf to quickly skip ahead to the next \n.  But we neglected to check to see whether that brought us past an explicitly specified end position.  This just adds the missing check.

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs

index c06b5af67c635b25bf2e48ff0ef096733bf447ff..b9b6e791ed5720af7d1f0965d63b806c7e4a3582 100644 (file)
@@ -1240,19 +1240,21 @@ namespace System.Text.RegularExpressions
                             {
                                 Stloc(newlinePos);
 
-                                // if (newlinePos == -1)
+                                // if (newlinePos == -1 || newlinePos + 1 > runtextend)
                                 // {
                                 //     runtextpos = runtextend;
                                 //     return false;
                                 // }
-                                Label foundNextLine = DefineLabel();
                                 Ldloc(newlinePos);
                                 Ldc(-1);
-                                Bne(foundNextLine);
-                                BrFar(returnFalse);
+                                Beq(returnFalse);
+                                Ldloc(newlinePos);
+                                Ldc(1);
+                                Add();
+                                Ldloc(_runtextendLocal);
+                                Bgt(returnFalse);
 
                                 // runtextpos = newlinePos + 1;
-                                MarkLabel(foundNextLine);
                                 Ldloc(newlinePos);
                                 Ldc(1);
                                 Add();
index 4cc3dc528605b95d88ea8670f0d87433a9fe76de..d557ec6c3aa734e6e747c779b9f5f9b42d67285e 100644 (file)
@@ -406,7 +406,7 @@ namespace System.Text.RegularExpressions
                 if (runtextpos > runtextbeg && runtext![runtextpos - 1] != '\n')
                 {
                     int newline = runtext.IndexOf('\n', runtextpos);
-                    if (newline == -1)
+                    if (newline == -1 || newline + 1 > runtextend)
                     {
                         runtextpos = runtextend;
                         return false;
@@ -457,7 +457,7 @@ namespace System.Text.RegularExpressions
                     if (!_code.LeadingCharClasses[0].CaseInsensitive)
                     {
                         // singleton, left-to-right, case-sensitive
-                        int i = runtext.AsSpan(runtextpos, runtextend - runtextpos).IndexOf(ch);
+                        int i = span.IndexOf(ch);
                         if (i >= 0)
                         {
                             runtextpos += i;
index d5d690b0c29c1f912c9ac789383f9d8df7b9d42d..719f280c91726b49ea733465f72651c49af05ffc 100644 (file)
@@ -152,6 +152,9 @@ namespace System.Text.RegularExpressions.Tests
             // Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z"
             yield return new object[] { @"\Aaaa\w+zzz\Z", "aaaasdfajsdlfjzzza", RegexOptions.None, 0, 18, false, string.Empty };
 
+            // Anchors and multiline
+            yield return new object[] { @"^A$", "ABC\n", RegexOptions.Multiline, 0, 2, false, string.Empty };
+
             // Using beginning/end of string chars \A, \Z: Actual - "\\Aaaa\\w+zzz\\Z"
             yield return new object[] { @"\A(line2\n)line3\Z", "line2\nline3\n", RegexOptions.Multiline, 0, 12, true, "line2\nline3" };
 
@@ -813,7 +816,7 @@ namespace System.Text.RegularExpressions.Tests
                 }
             };
 
-            // Mutliline
+            // Multiline
             yield return new object[]
             {
                 "(line2$\n)line3", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24,
@@ -824,7 +827,7 @@ namespace System.Text.RegularExpressions.Tests
                 }
             };
 
-            // Mutliline
+            // Multiline
             yield return new object[]
             {
                 "(line2\n^)line3", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24,
@@ -835,7 +838,7 @@ namespace System.Text.RegularExpressions.Tests
                 }
             };
 
-            // Mutliline
+            // Multiline
             yield return new object[]
             {
                 "(line3\n$\n)line4", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24,
@@ -846,7 +849,7 @@ namespace System.Text.RegularExpressions.Tests
                 }
             };
 
-            // Mutliline
+            // Multiline
             yield return new object[]
             {
                 "(line3\n^\n)line4", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24,
@@ -857,7 +860,7 @@ namespace System.Text.RegularExpressions.Tests
                 }
             };
 
-            // Mutliline
+            // Multiline
             yield return new object[]
             {
                 "(line2$\n^)line3", "line1\nline2\nline3\n\nline4", RegexOptions.Multiline, 0, 24,