Add missing regex position check after BOL optimization (#66216)
authorStephen Toub <stoub@microsoft.com>
Mon, 7 Mar 2022 00:13:14 +0000 (19:13 -0500)
committerGitHub <noreply@github.com>
Mon, 7 Mar 2022 00:13:14 +0000 (19:13 -0500)
* Add missing regex position check after BOL optimization

* Address PR feedback

src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.MultipleMatches.Tests.cs

index 053cbcf..2c72270 100644 (file)
@@ -535,6 +535,17 @@ namespace System.Text.RegularExpressions.Generator
                                 Goto(NoStartingPositionFound);
                             }
                             writer.WriteLine("pos = newlinePos + pos + 1;");
+
+                            // We've updated the position.  Make sure there's still enough room in the input for a possible match.
+                            using (EmitBlock(writer, minRequiredLength switch
+                            {
+                                0 => "if (pos > inputSpan.Length)",
+                                1 => "if (pos >= inputSpan.Length)",
+                                _ => $"if (pos > inputSpan.Length - {minRequiredLength})"
+                            }))
+                            {
+                                Goto(NoStartingPositionFound);
+                            }
                         }
                         writer.WriteLine();
                         break;
index 765be58..670b76f 100644 (file)
@@ -622,6 +622,18 @@ namespace System.Text.RegularExpressions
                                 Ldc(1);
                                 Add();
                                 Stloc(pos);
+
+                                // We've updated the position.  Make sure there's still enough room in the input for a possible match.
+                                // if (pos > inputSpan.Length - minRequiredLength) returnFalse;
+                                Ldloca(inputSpan);
+                                Call(s_spanGetLengthMethod);
+                                if (minRequiredLength != 0)
+                                {
+                                    Ldc(minRequiredLength);
+                                    Sub();
+                                }
+                                Ldloc(pos);
+                                BltFar(returnFalse);
                             }
 
                             MarkLabel(label);
index fc1b1ad..fafa720 100644 (file)
@@ -281,7 +281,13 @@ namespace System.Text.RegularExpressions
                         return false;
                     }
 
+                    // We've updated the position.  Make sure there's still enough room in the input for a possible match.
                     pos = newline + 1 + pos;
+                    if (pos > textSpan.Length - MinRequiredLength)
+                    {
+                        pos = textSpan.Length;
+                        return false;
+                    }
                 }
             }
 
index 34f60c6..e3884f1 100644 (file)
@@ -176,7 +176,7 @@ namespace System.Text.RegularExpressions.Tests
                     }
                 };
 
-                // Using ^ with multiline
+                // Using ^ and $ with multiline
                 yield return new object[]
                 {
                     engine,
@@ -244,6 +244,38 @@ namespace System.Text.RegularExpressions.Tests
                     }
                 };
 
+                yield return new object[]
+                {
+                    engine,
+                     @"^[^a]a", "bar\n", RegexOptions.Multiline,
+                     new[]
+                     {
+                         new CaptureData("ba", 0, 2)
+                     }
+                };
+
+                yield return new object[]
+                {
+                    engine,
+                     @"^[^a]a", "car\nbar\n", RegexOptions.Multiline,
+                     new[]
+                     {
+                         new CaptureData("ca", 0, 2),
+                         new CaptureData("ba", 4, 2)
+                     }
+                };
+
+                yield return new object[]
+                {
+                    engine,
+                     @"[0-9]cat$", "1cat\n2cat", RegexOptions.Multiline,
+                     new[]
+                     {
+                         new CaptureData("1cat", 0, 4),
+                         new CaptureData("2cat", 5, 4)
+                     }
+                };
+
                 if (!PlatformDetection.IsNetFramework)
                 {
                     // .NET Framework missing fix in https://github.com/dotnet/runtime/pull/1075