Add experimental Regex test based on SampleMatches (#79364)
authorStephen Toub <stoub@microsoft.com>
Thu, 8 Dec 2022 01:23:59 +0000 (20:23 -0500)
committerGitHub <noreply@github.com>
Thu, 8 Dec 2022 01:23:59 +0000 (20:23 -0500)
This adds a (currently disactivated) test that takes the ~15K regexes in our pattern database, generates inputs for each using the NonBacktracking engine's sampler, and then validates all the engines with that pattern.  The sampler currently hangs on some patterns, and it asserts on others, so this is manual-only until such issues can be fixed.  Still, I was able to run another ~10K patterns through all the engines before it fell over with an assert.

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.Sample.cs
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.KnownPattern.Tests.cs

index 3bc9ce7..571a1c4 100644 (file)
@@ -30,6 +30,8 @@ namespace System.Text.RegularExpressions.Symbolic
         [ExcludeFromCodeCoverage(Justification = "Currently only used for testing")]
         public override IEnumerable<string> SampleMatches(int k, int randomseed)
         {
+            var results = new List<string>();
+
             lock (this)
             {
                 // Zero is treated as no seed, instead using a system provided one
@@ -119,7 +121,7 @@ namespace System.Text.RegularExpressions.Symbolic
                             // Choose to stop here based on a coin-toss
                             if (FlipBiasedCoin(random, SampleMatchesStoppingProbability))
                             {
-                                yield return latestCandidate.ToString();
+                                results.Add(latestCandidate.ToString());
                                 break;
                             }
                         }
@@ -153,12 +155,14 @@ namespace System.Text.RegularExpressions.Symbolic
                             // such as @"no\bway" or due to poor choice of c -- no anchor is enabled -- so this is a deadend.
                             if (latestCandidate != null)
                             {
-                                yield return latestCandidate.ToString();
+                                results.Add(latestCandidate.ToString());
                             }
                             break;
                         }
                     }
                 }
+
+                return results;
             }
 
             static BDD ToBDD(TSet set, ISolver<TSet> solver, CharSetSolver charSetSolver) => solver.ConvertToBDD(set, charSetSolver);
index fa43702..16e333a 100644 (file)
@@ -5,8 +5,12 @@ using System.Collections.Generic;
 using System.Globalization;
 using System.IO;
 using System.Linq;
+using System.Reflection;
+using System.Runtime;
 using System.Text.Json;
+using System.Threading;
 using System.Threading.Tasks;
+using Microsoft.DotNet.XUnitExtensions;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
@@ -1556,6 +1560,64 @@ namespace System.Text.RegularExpressions.Tests
                     chunk.Select(r => (r.Pattern, (CultureInfo?)null, (RegexOptions?)r.Options, (TimeSpan?)null)).ToArray()).GetAwaiter().GetResult();
             });
         }
+
+        [ActiveIssue("Manual execution only for now until stability is improved")]
+        [OuterLoop("Super slow")]
+        [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.Is64BitProcess))] // consumes a lot of memory
+        public async Task PatternsDataSet_GenerateInputsWithNonBacktracking_MatchWithAllEngines()
+        {
+            MethodInfo? sampleMatchesMI = typeof(Regex).GetMethod("SampleMatches", BindingFlags.NonPublic | BindingFlags.Instance);
+            if (sampleMatchesMI is null)
+            {
+                throw new SkipTestException("Could not find Regex.SampleMatches");
+            }
+            Func<Regex, int, int, IEnumerable<string>> sampleMatches = sampleMatchesMI.CreateDelegate<Func<Regex, int, int, IEnumerable<string>>>();
+
+            DataSetExpression[] entries = s_patternsDataSet.Value;
+            for (int i = 0; i < entries.Length; i++)
+            {
+                DataSetExpression entry = entries[i];
+
+                Regex generator;
+                try
+                {
+                    generator = new Regex(entry.Pattern, RegexHelpers.RegexOptionNonBacktracking | entry.Options);
+                }
+                catch (Exception e) when (e is NotSupportedException or ArgumentOutOfRangeException)
+                {
+                    continue;
+                }
+
+                const int NumInputs = 3;
+                const int Seed = 42;
+                IEnumerable<string> expectedMatchInputs = null;
+                try
+                {
+#pragma warning disable SYSLIB0046 // temporary until some use of SampleMatches no longer hangs
+                    using var cts = new CancellationTokenSource(TimeSpan.FromSeconds(3));
+                    ControlledExecution.Run(() => expectedMatchInputs = sampleMatches(generator, NumInputs, Seed), cts.Token);
+#pragma warning restore SYSLIB0046
+                }
+                catch (OperationCanceledException)
+                {
+                    Console.Error.WriteLine($"*** SampleMatches hung on entry {i} ***");
+                    continue;
+                }
+
+                foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
+                {
+                    Regex r = engine == RegexEngine.NonBacktracking ?
+                        generator :
+                        await RegexHelpers.GetRegexAsync(engine, entry.Pattern, entry.Options);
+
+                    foreach (string input in expectedMatchInputs)
+                    {
+                        Console.WriteLine($"[{i}-{engine}] {r} <= {input}");
+                        Assert.True(r.IsMatch(input));
+                    }
+                }
+            }
+        }
 #endif
     }
 }