Add more regex tests (and fix one atomicity bug) (#33458)
authorStephen Toub <stoub@microsoft.com>
Wed, 11 Mar 2020 21:28:02 +0000 (17:28 -0400)
committerGitHub <noreply@github.com>
Wed, 11 Mar 2020 21:28:02 +0000 (17:28 -0400)
* Avoid automatically making Regex loops followed by a lookbehind atomic

We allow a positive lookahead to be used to determine whether a loop can be upgraded to be atomic, but we're currently missing the RTL check that distinguishes between positive lookaheads and positive lookbehinds, so we're currently erroneously making some loops followed by positive lookbehinds atomic when we shouldn't.  Fix that just by ensuring we only traverse Require nodes when they're lookahead rather than behind.

Also, just for additional safety, change a subsequent check to ensure that the two nodes being compared have identical options.  Today we're just checking for case-sensitivity, but it's more robust (and doesn't hurt) to just check all options.

* Add more tests to boost code coverage

Plus lookaround tests for min length computation

12 files changed:
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.CompileToAssembly.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.EscapeUnescape.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs
src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs
src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs

index 8ba5abd..64d7052 100644 (file)
@@ -1455,7 +1455,7 @@ namespace System.Text.RegularExpressions
                     case Concatenate:
                     case Capture:
                     case Atomic:
-                    case Require:
+                    case Require when (subsequent.Options & RegexOptions.RightToLeft) == 0: // only lookaheads, not lookbehinds (represented as RTL Require nodes)
                     case Loop when subsequent.M > 0:
                     case Lazyloop when subsequent.M > 0:
                         subsequent = subsequent.Child(0);
@@ -1465,10 +1465,8 @@ namespace System.Text.RegularExpressions
                 break;
             }
 
-            // If the two nodes don't agree on case-insensitivity, don't try to optimize.
-            // If they're both case sensitive or both case insensitive, then their tokens
-            // will be comparable.
-            if ((node.Options & RegexOptions.IgnoreCase) != (subsequent.Options & RegexOptions.IgnoreCase))
+            // If the two nodes don't agree on options in any way, don't try to optimize them.
+            if (node.Options != subsequent.Options)
             {
                 return false;
             }
index 638fc4a..99ab75e 100644 (file)
@@ -3,6 +3,7 @@
 // See the LICENSE file in the project root for more information.
 
 using System.Collections;
+using System.Collections.Generic;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
@@ -32,10 +33,34 @@ namespace System.Text.RegularExpressions.Tests
         }
 
         [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+        public static void GetEnumerator_Generic()
+        {
+            Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
+            Match match = regex.Match("aaabbccccccccccaaaabc");
+
+            GroupCollection groups = match.Groups;
+            IEnumerator<KeyValuePair<string, Group>> enumerator = ((IEnumerable<KeyValuePair<string, Group>>)groups).GetEnumerator();
+            for (int i = 0; i < 2; i++)
+            {
+                int counter = 0;
+                while (enumerator.MoveNext())
+                {
+                    Assert.Equal(groups[counter], enumerator.Current.Value);
+                    counter++;
+                }
+                Assert.False(enumerator.MoveNext());
+                Assert.Equal(groups.Count, counter);
+                enumerator.Reset();
+            }
+        }
+
+        [Fact]
         public static void GetEnumerator_Invalid()
         {
             Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
             Match match = regex.Match("aaabbccccccccccaaaabc");
+
             IEnumerator enumerator = match.Groups.GetEnumerator();
 
             Assert.Throws<InvalidOperationException>(() => enumerator.Current);
@@ -48,6 +73,24 @@ namespace System.Text.RegularExpressions.Tests
         }
 
         [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+        public static void GetEnumerator_Generic_Invalid()
+        {
+            Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
+            Match match = regex.Match("aaabbccccccccccaaaabc");
+
+            IEnumerator<KeyValuePair<string, Group>> enumerator = ((IEnumerable<KeyValuePair<string, Group>>)match.Groups).GetEnumerator();
+
+            Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+
+            while (enumerator.MoveNext()) ;
+            Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+
+            enumerator.Reset();
+            Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+        }
+
+        [Fact]
         public static void Item_Get()
         {
             GroupCollection collection = CreateCollection();
@@ -56,6 +99,17 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Equal("555-6666", collection[2].ToString());
         }
 
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+        public static void ContainsKey()
+        {
+            IReadOnlyDictionary<string, Group> collection = (IReadOnlyDictionary<string, Group>)CreateCollection();
+            Assert.True(collection.ContainsKey("0"));
+            Assert.True(collection.ContainsKey("1"));
+            Assert.True(collection.ContainsKey("2"));
+            Assert.False(collection.ContainsKey("3"));
+        }
+
         [Theory]
         [InlineData(-1)]
         [InlineData(4)]
index 3a97133..653fefa 100644 (file)
@@ -5,6 +5,7 @@
 using System.IO;
 using System.Linq;
 using System.Reflection;
+using System.Reflection.Emit;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
@@ -21,6 +22,19 @@ namespace System.Text.RegularExpressions.Tests
             AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd")));
             AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null));
             AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null, null));
+
+            // We currently build more code for CompileToAssembly into debug builds, which changes this particular exception type based on Debug vs Release.
+            // Until that changes, for the tests just allow them both.
+            AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd")));
+            AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { new RegexCompilationInfo("abc", RegexOptions.None, "abc", "", true), null }, new AssemblyName("abcd")));
+            AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd"), new CustomAttributeBuilder[0]));
+
+            static void AssertThrows<TException1, TException2>(Action action)
+            {
+                Exception e = Record.Exception(action);
+                Assert.NotNull(e);
+                Assert.True(e is TException1 || e is TException2);
+            }
         }
 
         [Fact]
@@ -29,8 +43,41 @@ namespace System.Text.RegularExpressions.Tests
         {
             Assert.Throws<PlatformNotSupportedException>(() =>
                 Regex.CompileToAssembly(
-                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true) },
                     new AssemblyName("abcd")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.CultureInvariant, "abcd", "", true, TimeSpan.FromMinutes(1)) },
+                    new AssemblyName("abcdWithTimeout")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("(?<FirstTwoLetters>ab)cd", RegexOptions.None, "abcd", "", true, TimeSpan.FromMinutes(1)) },
+                    new AssemblyName("abcdWithNamedCapture")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo(".*\\B(\\d+)(?<output>SUCCESS)\\B.*", RegexOptions.None, "withCaptures", "", true) },
+                    new AssemblyName("withCaptures")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+                    new AssemblyName("abcdWithCustomAttribute"),
+                    new[] { new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor(new[] { typeof(string) }), new[] { "TestCompany" }) }));
+        }
+
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+        public void CompileToAssembly_ResourceFile_PNSE()
+        {
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+                    new AssemblyName("abcdWithUnsupportedResourceFile"),
+                    attributes: null,
+                    "unsupportedResourceFile"));
         }
 
         [Fact]
index 68f4c47..aeab31d 100644 (file)
@@ -2,9 +2,14 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Collections;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.IO;
+using System.Linq;
 using System.Runtime.InteropServices;
+using System.Runtime.Serialization;
+using System.Runtime.Serialization.Formatters.Binary;
 using System.Threading;
 using Microsoft.DotNet.RemoteExecutor;
 using Xunit;
@@ -61,10 +66,17 @@ namespace System.Text.RegularExpressions.Tests
             r = new Regex("[abc]def(ghi|jkl)", options | (RegexOptions)0x80 /*RegexOptions.Debug*/);
             Assert.False(r.Match("a").Success);
             Assert.True(r.Match("adefghi").Success);
+            Assert.Equal("123456789", r.Replace("123adefghi789", "456"));
 
             r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/);
             Assert.False(r.Match("jkl").Success);
             Assert.True(r.Match("ghi").Success);
+            Assert.Equal("123456789", r.Replace("123ghi789", "456"));
+
+            r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/, TimeSpan.FromDays(1));
+            Assert.False(r.Match("jkl").Success);
+            Assert.True(r.Match("ghi").Success);
+            Assert.Equal("123456789", r.Replace("123ghi789", "456"));
         }
 
         [Fact]
@@ -121,9 +133,81 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Throws<NotSupportedException>(() => r.InitializeReferences());
         }
 
+        [Fact]
+        public void Ctor_CapNames_ReturnsDefaultValues()
+        {
+            var r = new DerivedRegex(@"(?<Name>\w*)");
+
+            Assert.Null(r.Caps);
+
+            IDictionary capNames = r.CapNames;
+            Assert.NotNull(capNames);
+            Assert.Same(capNames, r.CapNames);
+            Assert.True(capNames.Contains("Name"));
+
+            AssertExtensions.Throws<ArgumentNullException>("value", () => r.Caps = null);
+            AssertExtensions.Throws<ArgumentNullException>("value", () => r.CapNames = null);
+
+            r.Caps = new Dictionary<string, string>();
+            Assert.IsType<Hashtable>(r.Caps);
+
+            r.CapNames = new Dictionary<string, string>();
+            Assert.IsType<Hashtable>(r.CapNames);
+
+            var newHashtable = new Hashtable();
+
+            r.CapNames = newHashtable;
+            Assert.Same(newHashtable, r.CapNames);
+
+            r.Caps = newHashtable;
+            Assert.Same(newHashtable, r.Caps);
+        }
+
         private sealed class DerivedRegex : Regex
         {
+            public DerivedRegex() { }
+            public DerivedRegex(string pattern) : base(pattern) { }
+
             public new void InitializeReferences() => base.InitializeReferences();
+
+            public new IDictionary Caps { get => base.Caps; set => base.Caps = value; }
+            public new IDictionary CapNames { get => base.CapNames; set => base.CapNames = value; }
+        }
+
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+        public void Serialization_ThrowsNotSupported()
+        {
+            var r = new SerializableDerivedRegex();
+            Assert.Throws<PlatformNotSupportedException>(() => new SerializableDerivedRegex(default, default));
+            Assert.Throws<PlatformNotSupportedException>(() => ((ISerializable)r).GetObjectData(default, default));
+        }
+
+        [Serializable]
+        private sealed class SerializableDerivedRegex : Regex
+        {
+            public SerializableDerivedRegex() : base("") { }
+            public SerializableDerivedRegex(SerializationInfo info, StreamingContext context) : base(info, context) { }
+        }
+
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+        public void Ctor_PatternInName()
+        {
+            RemoteExecutor.Invoke(() =>
+            {
+                // Just make sure setting the environment variable doesn't cause problems.
+                Environment.SetEnvironmentVariable("DOTNET_SYSTEM_TEXT_REGULAREXPRESSIONS_PATTERNINNAME", "1");
+
+                // Short pattern
+                var r = new Regex("abc", RegexOptions.Compiled);
+                Assert.True(r.IsMatch("123abc456"));
+
+                // Long pattern
+                string pattern = string.Concat(Enumerable.Repeat("1234567890", 20));
+                r = new Regex(pattern, RegexOptions.Compiled);
+                Assert.True(r.IsMatch("abc" + pattern + "abc"));
+            }).Dispose();
         }
     }
 }
index a8e9459..3b3521b 100644 (file)
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Linq;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
@@ -17,6 +18,12 @@ namespace System.Text.RegularExpressions.Tests
         public static void Escape(string str, string expected)
         {
             Assert.Equal(expected, Regex.Escape(str));
+
+            if (expected.Length > 0)
+            {
+                const int Count = 100;
+                Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Escape(string.Concat(Enumerable.Repeat(str, Count))));
+            }
         }
 
         [Fact]
@@ -35,6 +42,12 @@ namespace System.Text.RegularExpressions.Tests
         public void Unescape(string str, string expected)
         {
             Assert.Equal(expected, Regex.Unescape(str));
+
+            if (expected.Length > 0)
+            {
+                const int Count = 100;
+                Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Unescape(string.Concat(Enumerable.Repeat(str, Count))));
+            }
         }
 
         [Fact]
index 2c31d51..1a028ad 100644 (file)
@@ -734,9 +734,9 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}){2}", "acdfbcdfacefbcefbcefbcdfacdef", RegexOptions.None, new string[] { "acdfbcdfacefbcefbcefbcdf" } };
             yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}hello){2}", "aaaaaacdfbcdfacefhellobcefbcefbcdfhellooooo", RegexOptions.None, new string[] { "acdfbcdfacefhellobcefbcefbcdfhello" } };
             yield return new object[] { null, @"CN=(.*[^,]+).*", "CN=localhost", RegexOptions.Singleline, new string[] { "CN=localhost", "localhost" } };
-
             // Nested atomic
             yield return new object[] { null, @"(?>abc[def]gh(i*))", "123abceghiii456", RegexOptions.None, new string[] { "abceghiii", "iii" } };
+            yield return new object[] { null, @"(?>(?:abc)*)", "abcabcabc", RegexOptions.None, new string[] { "abcabcabc" } };
 
             // Anchoring loops beginning with .* / .+
             yield return new object[] { null, @".*", "", RegexOptions.None, new string[] { "" } };
index 7ccfb3b..ff01ae5 100644 (file)
@@ -172,6 +172,7 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { @"\s+\d+", "sdf 12sad", RegexOptions.RightToLeft, 0, 9, true, " 12" };
             yield return new object[] { @"\s+\d+", " asdf12 ", RegexOptions.RightToLeft, 0, 6, false, string.Empty };
             yield return new object[] { "aaa", "aaabbb", RegexOptions.None, 3, 3, false, string.Empty };
+            yield return new object[] { "abc|def", "123def456", RegexOptions.RightToLeft | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, true, "def" };
 
             yield return new object[] { @"foo\d+", "0123456789foo4567890foo         ", RegexOptions.RightToLeft, 10, 3, false, string.Empty };
             yield return new object[] { @"foo\d+", "0123456789foo4567890foo         ", RegexOptions.RightToLeft, 11, 21, false, string.Empty };
@@ -383,6 +384,7 @@ namespace System.Text.RegularExpressions.Tests
                 VerifyMatch(r.Match(input), expectedSuccess, expectedValue);
                 VerifyMatch(Regex.Match(input, pattern, options), expectedSuccess, expectedValue);
 
+                Assert.Equal(expectedSuccess, r.IsMatch(input));
                 Assert.Equal(expectedSuccess, Regex.IsMatch(input, pattern, options));
             }
 
@@ -462,17 +464,34 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Equal("a", match.Value);
         }
 
-        [Fact]
-        public void Match_Timeout_Throws()
+        [Theory]
+        [InlineData(RegexOptions.None)]
+        [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)]
+        [InlineData(RegexOptions.Compiled)]
+        [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)]
+        public void Match_Timeout_Throws(RegexOptions options)
+        {
+            const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$";
+            string input = new string('a', 50) + "@a.a";
+
+            Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern, options, TimeSpan.FromMilliseconds(100)).Match(input));
+        }
+
+        [Theory]
+        [InlineData(RegexOptions.None)]
+        [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)]
+        [InlineData(RegexOptions.Compiled)]
+        [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)]
+        public void Match_DefaultTimeout_Throws(RegexOptions options)
         {
-            RemoteExecutor.Invoke(() =>
+            RemoteExecutor.Invoke(optionsString =>
             {
                 const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$";
                 string input = new string('a', 50) + "@a.a";
 
                 AppDomain.CurrentDomain.SetData(RegexHelpers.DefaultMatchTimeout_ConfigKeyName, TimeSpan.FromMilliseconds(100));
-                Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern).Match(input));
-            }).Dispose();
+                Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern, (RegexOptions)int.Parse(optionsString, CultureInfo.InvariantCulture)).Match(input));
+            }, ((int)options).ToString(CultureInfo.InvariantCulture)).Dispose();
         }
 
         // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
@@ -492,8 +511,8 @@ namespace System.Text.RegularExpressions.Tests
         // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
         [OuterLoop("Can take several seconds")]
         [ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))]
-        [InlineData(RegexOptions.Compiled)]
         [InlineData(RegexOptions.None)]
+        [InlineData(RegexOptions.Compiled)]
         public void Match_Timeout_Repetition_Throws(RegexOptions options)
         {
             int repetitionCount = 800_000_000;
index a97ec74..e6ebb46 100644 (file)
@@ -12,6 +12,8 @@ namespace System.Text.RegularExpressions.Tests
     {
         public static IEnumerable<object[]> Replace_String_TestData()
         {
+            yield return new object[] { @"a", "bbbb", "c", RegexOptions.None, 4, 3, "bbbb" };
+            yield return new object[] { @"", "   ", "123", RegexOptions.None, 4, 0, "123 123 123 123" };
             yield return new object[] { @"[^ ]+\s(?<time>)", "08/10/99 16:00", "${time}", RegexOptions.None, 14, 0, "16:00" };
             yield return new object[] { "icrosoft", "MiCrOsOfT", "icrosoft", RegexOptions.IgnoreCase, 9, 0, "Microsoft" };
             yield return new object[] { "dog", "my dog has fleas", "CAT", RegexOptions.IgnoreCase, 16, 0, "my CAT has fleas" };
@@ -78,6 +80,8 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { @"(?<cat>cat)\s*(?<dog>dog)", "slkfjsdcat dogkljeah", "START${catTWO}dogcat${dogTWO}END", RegexOptions.None, 20, 0, "slkfjsdSTART${catTWO}dogcat${dogTWO}ENDkljeah" };
 
             // RightToLeft
+            yield return new object[] { @"a", "bbbb", "c", RegexOptions.RightToLeft, 4, 3, "bbbb" };
+            yield return new object[] { @"", "   ", "123", RegexOptions.RightToLeft, 4, 3, "123 123 123 123" };
             yield return new object[] { @"foo\s+", "0123456789foo4567890foo         ", "bar", RegexOptions.RightToLeft, 32, 32, "0123456789foo4567890bar" };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", "#", RegexOptions.RightToLeft, 17, 32, "##########foo#######foo         " };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", "#", RegexOptions.RightToLeft, 7, 32, "0123456789foo#######foo         " };
@@ -139,6 +143,7 @@ namespace System.Text.RegularExpressions.Tests
 
         public static IEnumerable<object[]> Replace_MatchEvaluator_TestData()
         {
+            yield return new object[] { "a", "bbbb", new MatchEvaluator(match => "uhoh"), RegexOptions.None, 4, 0, "bbbb" };
             yield return new object[] { "(Big|Small)", "Big mountain", new MatchEvaluator(MatchEvaluator1), RegexOptions.None, 12, 0, "Huge mountain" };
             yield return new object[] { "(Big|Small)", "Small village", new MatchEvaluator(MatchEvaluator1), RegexOptions.None, 13, 0, "Tiny village" };
 
@@ -162,6 +167,7 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { @"\u0915\u0930.*?\b", boldInput, new MatchEvaluator(MatchEvaluatorBold), RegexOptions.CultureInvariant | RegexOptions.Singleline, boldInput.Length, 0, boldExpected };
 
             // RighToLeft
+            yield return new object[] { "a", "bbbb", new MatchEvaluator(match => "uhoh"), RegexOptions.RightToLeft, 4, 3, "bbbb" };
             yield return new object[] { @"foo\s+", "0123456789foo4567890foo         ", new MatchEvaluator(MatchEvaluatorBar), RegexOptions.RightToLeft, 32, 32, "0123456789foo4567890bar" };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", new MatchEvaluator(MatchEvaluatorPoundSign), RegexOptions.RightToLeft, 17, 32, "##########foo#######foo         " };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", new MatchEvaluator(MatchEvaluatorPoundSign), RegexOptions.RightToLeft, 7, 32, "0123456789foo#######foo         " };
index c480f4b..52b4bd8 100644 (file)
@@ -9,8 +9,11 @@ namespace System.Text.RegularExpressions.Tests
 {
     public class RegexSplitTests
     {
-        public static IEnumerable<object[]> Split_NonCompiled_TestData()
+        public static IEnumerable<object[]> Split_TestData()
         {
+            yield return new object[] { "", "", RegexOptions.None, 0, 0, new string[] { "", "" } };
+            yield return new object[] { "123", "abc", RegexOptions.None, 3, 0, new string[] { "abc" } };
+
             yield return new object[] { "    ", "word0    word1    word2    word3", RegexOptions.None, 32, 0, new string[] { "word0", "word1", "word2", "word3" } };
 
             yield return new object[] { ":", "kkk:lll:mmm:nnn:ooo", RegexOptions.None, 19, 0, new string[] { "kkk", "lll", "mmm", "nnn", "ooo" } };
@@ -27,6 +30,9 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { "a(?<dot1>.)c(.)e", "123abcde456aBCDe789", RegexOptions.IgnoreCase, 19, 0, new string[] { "123", "d", "b", "456", "D", "B", "789" } };
 
             // RightToLeft
+            yield return new object[] { "", "", RegexOptions.RightToLeft, 0, 0, new string[] { "", "" } };
+            yield return new object[] { "123", "abc", RegexOptions.RightToLeft, 3, 0, new string[] { "abc" } };
+
             yield return new object[] { "a(.)c(.)e", "123abcde456aBCDe789", RegexOptions.RightToLeft, 19, 19, new string[] { "123", "d", "b", "456aBCDe789" } };
             yield return new object[] { "a(.)c(.)e", "123abcde456aBCDe789", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 19, 19, new string[] { "123", "d", "b", "456", "D", "B", "789" } };
 
@@ -50,8 +56,8 @@ namespace System.Text.RegularExpressions.Tests
         }
 
         [Theory]
-        [MemberData(nameof(Split_NonCompiled_TestData))]
-        [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Split_NonCompiled_TestData), 2, MemberType = typeof(RegexCompilationHelper))]
+        [MemberData(nameof(Split_TestData))]
+        [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Split_TestData), 2, MemberType = typeof(RegexCompilationHelper))]
         public void Split(string pattern, string input, RegexOptions options, int count, int start, string[] expected)
         {
             bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start);
index a13484f..e1ef5ce 100644 (file)
@@ -343,6 +343,20 @@ namespace System.Text.RegularExpressions.Tests
             }
         }
 
+        [Theory]
+        [InlineData("ab", 1, false)]
+        [InlineData("a b", 1, true)]
+        [InlineData("a b", 2, true)]
+        [InlineData("\u200Da", 1, false)]
+        [InlineData("\u200D\u200C", 1, false)]
+        [InlineData("\u200Ca", 1, false)]
+        [InlineData("\u200C a", 1, true)]
+        public void IsBoundary_ReturnsExpectedResult(string text, int pos, bool expectedBoundary)
+        {
+            var r = new DerivedRunner(text);
+            Assert.Equal(expectedBoundary, r.IsBoundary(pos, 0, text.Length));
+        }
+
         private static HashSet<char> ComputeIncludedSet(Func<char, bool> func)
         {
             var included = new HashSet<char>();
@@ -371,6 +385,45 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Throws<XunitException>(() => ValidateSet("[b]", RegexOptions.None, null, new HashSet<char>() { 'b' }, validateEveryChar: true));
         }
 
+        [Fact]
+        public void RegexRunner_Legacy_CharInSet()
+        {
+            Assert.True(DerivedRunner.CharInSet('a', "ab", ""));
+            Assert.False(DerivedRunner.CharInSet('x', "ab", ""));
+
+            Assert.True(DerivedRunner.CharInSet('x', "\0\0ab", ""));
+            Assert.False(DerivedRunner.CharInSet('a', "\0\0ab", ""));
+
+            Assert.True(DerivedRunner.CharInSet('4', "", "\x0009"));
+            Assert.False(DerivedRunner.CharInSet('a', "", "\x0009"));
+
+            Assert.True(DerivedRunner.CharInSet('4', "xz", "\x0009"));
+            Assert.True(DerivedRunner.CharInSet('a', "az", "\x0009"));
+            Assert.False(DerivedRunner.CharInSet('a', "xz", "\x0009"));
+        }
+
+        private sealed class DerivedRunner : RegexRunner
+        {
+            public DerivedRunner() { }
+
+            public DerivedRunner(string text)
+            {
+                runtext = text;
+                runtextbeg = 0;
+                runtextstart = 0;
+                runtextend = text.Length;
+                runtextpos = 0;
+            }
+
+            public new bool IsBoundary(int index, int startpos, int endpos) => base.IsBoundary(index, startpos, endpos);
+
+            public static new bool CharInSet(char ch, string set, string category) => RegexRunner.CharInSet(ch, set, category);
+
+            protected override bool FindFirstChar() => throw new NotImplementedException();
+            protected override void Go() => throw new NotImplementedException();
+            protected override void InitTrackCount() => throw new NotImplementedException();
+        }
+
         private static void ValidateSet(string regex, RegexOptions options, HashSet<char> included, HashSet<char> excluded, bool validateEveryChar = false)
         {
             Assert.True((included != null) ^ (excluded != null));
index e16f2d3..a7f1197 100644 (file)
@@ -349,6 +349,7 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData(@"[a-[b]] ", RegexOptions.None, null)]
         [InlineData(@"[a-b-[c]] ", RegexOptions.None, null)]
         [InlineData(@"[a-[b]-c] ", RegexOptions.None, RegexParseError.SubtractionMustBeLast)]
+        [InlineData(@"[a-z-[b]12]", RegexOptions.None, RegexParseError.SubtractionMustBeLast)]
         [InlineData(@"[[a]-b] ", RegexOptions.None, null)]
         [InlineData(@"[[a]-[b]] ", RegexOptions.None, null)]
         [InlineData(@"[\w-a] ", RegexOptions.None, null)]
@@ -587,6 +588,8 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData(@"(?imn", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
         [InlineData(@"(?'cat'", RegexOptions.None, RegexParseError.NotEnoughParentheses)]
         [InlineData(@"(?'", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
+        [InlineData(@"(?'=)", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
+        [InlineData(@"(?'!)", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
         [InlineData(@"[^", RegexOptions.None, RegexParseError.UnterminatedBracket)]
         [InlineData(@"[cat", RegexOptions.None, RegexParseError.UnterminatedBracket)]
         [InlineData(@"[^cat", RegexOptions.None, RegexParseError.UnterminatedBracket)]
index a70d9d1..a4fd802 100644 (file)
@@ -172,6 +172,23 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("[^a]{1,3}[^a]+", "[^a]{2,}")]
         [InlineData("[^a]{1,3}[^a]?", "[^a]{1,4}")]
         [InlineData("[^a]{1,3}[^a]{1,3}", "[^a]{2,6}")]
+        // Two lazy notone loops
+        [InlineData("[^a]*?[^a]*?", "[^a]*?")]
+        [InlineData("[^a]*?[^a]+?", "[^a]+?")]
+        [InlineData("[^a]*?[^a]??", "[^a]*?")]
+        [InlineData("[^a]*?[^a]{1,3}?", "[^a]+?")]
+        [InlineData("[^a]+?[^a]*?", "[^a]+?")]
+        [InlineData("[^a]+?[^a]+?", "[^a]{2,}?")]
+        [InlineData("[^a]+?[^a]??", "[^a]+?")]
+        [InlineData("[^a]+?[^a]{1,3}?", "[^a]{2,}?")]
+        [InlineData("[^a]??[^a]*?", "[^a]*?")]
+        [InlineData("[^a]??[^a]+?", "[^a]+?")]
+        [InlineData("[^a]??[^a]??", "[^a]{0,2}?")]
+        [InlineData("[^a]??[^a]{1,3}?", "[^a]{1,4}?")]
+        [InlineData("[^a]{1,3}?[^a]*?", "[^a]+?")]
+        [InlineData("[^a]{1,3}?[^a]+?", "[^a]{2,}?")]
+        [InlineData("[^a]{1,3}?[^a]??", "[^a]{1,4}?")]
+        [InlineData("[^a]{1,3}?[^a]{1,3}?", "[^a]{2,6}?")]
         // Two atomic notone loops
         [InlineData("(?>[^a]*)(?>[^a]*)", "(?>[^a]*)")]
         [InlineData("(?>[^a]*)(?>[^a]+)", "(?>[^a]+)")]
@@ -189,7 +206,7 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("(?>[^a]{1,3})(?>[^a]+)", "(?>[^a]{2,})")]
         [InlineData("(?>[^a]{1,3})(?>[^a]?)", "(?>[^a]{1,4})")]
         [InlineData("(?>[^a]{1,3})(?>[^a]{1,3})", "(?>[^a]{2,6})")]
-        // Greedy notone loop and one
+        // Greedy notone loop and notone
         [InlineData("[^a]*[^a]", "[^a]+")]
         [InlineData("[^a]+[^a]", "[^a]{2,}")]
         [InlineData("[^a]?[^a]", "[^a]{1,2}")]
@@ -198,7 +215,16 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("[^a][^a]+", "[^a]{2,}")]
         [InlineData("[^a][^a]?", "[^a]{1,2}")]
         [InlineData("[^a][^a]{1,3}", "[^a]{2,4}")]
-        // Atomic notone loop and one
+        // Lazy notone loop and notone
+        [InlineData("[^a]*?[^a]", "[^a]+?")]
+        [InlineData("[^a]+?[^a]", "[^a]{2,}?")]
+        [InlineData("[^a]??[^a]", "[^a]{1,2}?")]
+        [InlineData("[^a]{1,3}?[^a]", "[^a]{2,4}?")]
+        [InlineData("[^a][^a]*?", "[^a]+?")]
+        [InlineData("[^a][^a]+?", "[^a]{2,}?")]
+        [InlineData("[^a][^a]??", "[^a]{1,2}?")]
+        [InlineData("[^a][^a]{1,3}?", "[^a]{2,4}?")]
+        // Atomic notone loop and notone
         [InlineData("(?>[^a]*)[^a]", "(?>[^a]+)")]
         [InlineData("(?>[^a]+)[^a]", "(?>[^a]{2,})")]
         [InlineData("(?>[^a]?)[^a]", "(?>[^a]{1,2})")]
@@ -261,6 +287,23 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("[0-9]{1,3}?[0-9]+?", "[0-9]{2,}?")]
         [InlineData("[0-9]{1,3}?[0-9]??", "[0-9]{1,4}?")]
         [InlineData("[0-9]{1,3}?[0-9]{1,3}?", "[0-9]{2,6}?")]
+        // Two atomic set loops
+        [InlineData("(?>[0-9]*)(?>[0-9]*)", "(?>[0-9]*)")]
+        [InlineData("(?>[0-9]*)(?>[0-9]+)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]*)(?>[0-9]?)", "(?>[0-9]*)")]
+        [InlineData("(?>[0-9]*)(?>[0-9]{1,3})", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]+)(?>[0-9]*)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]+)(?>[0-9]+)", "(?>[0-9]{2,})")]
+        [InlineData("(?>[0-9]+)(?>[0-9]?)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]+)(?>[0-9]{1,3})", "(?>[0-9]{2,})")]
+        [InlineData("(?>[0-9]?)(?>[0-9]*)", "(?>[0-9]*)")]
+        [InlineData("(?>[0-9]?)(?>[0-9]+)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]?)(?>[0-9]?)", "(?>[0-9]{0,2})")]
+        [InlineData("(?>[0-9]?)(?>[0-9]{1,3})", "(?>[0-9]{1,4})")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]*)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]+)", "(?>[0-9]{2,})")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]?)", "(?>[0-9]{1,4})")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]{1,3})", "(?>[0-9]{2,6})")]
         // Lazy set loop and set
         [InlineData("[0-9]*?[0-9]", "[0-9]+?")]
         [InlineData("[0-9]+?[0-9]", "[0-9]{2,}?")]
@@ -375,10 +418,6 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("a*a*?", "a*")]
         [InlineData("a*?a*", "a*")]
         [InlineData("a*[^a]*", "a*")]
-        [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
-        [InlineData("[ab]*[^a]*", "(?>[ab]*)[^a]*")]
-        [InlineData("[ab]*[^a]*?", "(?>[ab]*)[^a]*?")]
-        [InlineData("[ab]*(?>[^a]*)", "(?>[ab]*)(?>[^a]*)")]
         [InlineData("[^a]*a*", "a*")]
         [InlineData("a{2147483646}a", "a{2147483647}")]
         [InlineData("a{2147483647}a", "a{2147483647}")]
@@ -407,9 +446,18 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("abc(?:(?i:e)|f)", "abc[ef]")]
         // Not applying auto-atomicity
         [InlineData("a*b*", "(?>a*)b*")]
+        [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
+        [InlineData("[ab]*[^a]*", "(?>[ab]*)[^a]*")]
+        [InlineData("[ab]*[^a]*?", "(?>[ab]*)[^a]*?")]
+        [InlineData("[ab]*(?>[^a]*)", "(?>[ab]*)(?>[^a]*)")]
         [InlineData("[^\n]*\n*", "(?>[^\n]*)\n")]
         [InlineData("(a[bcd]a*)*fg", "(a[bcd](?>a*))*fg")]
         [InlineData("(\\w[bcd]\\d*)*fg", "(\\w[bcd](?>\\d*))*fg")]
+        [InlineData("a*(?<=[^a])b", "(?>a*)(?<=[^a])b")]
+        [InlineData("[\x0000-\xFFFF]*[a-z]", "(?>[\x0000-\xFFFF]*)[a-z]")]
+        [InlineData("[a-z]*[\x0000-\xFFFF]+", "(?>[a-z]*)[\x0000-\xFFFF]+")]
+        [InlineData("[^a-c]*[e-g]", "(?>[^a-c]*)[e-g]")]
+        [InlineData("[^a-c]*[^e-g]", "(?>[^a-c]*)[^e-g]")]
         public void PatternsReduceDifferently(string pattern1, string pattern2)
         {
             var r1 = new Regex(pattern1);
@@ -443,6 +491,10 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData(@"a*a*a*a*a*a*a*b*", 0)]
         [InlineData(@"((a{1,2}){4}){3,7}", 12)]
         [InlineData(@"\b\w{4}\b", 4)]
+        [InlineData(@"abcd(?=efgh)efgh", 8)]
+        [InlineData(@"abcd(?<=cd)efgh", 8)]
+        [InlineData(@"abcd(?!ab)efgh", 8)]
+        [InlineData(@"abcd(?<!ef)efgh", 8)]
         // we stop computing after a certain depth; if that logic changes in the future, these tests can be updated
         [InlineData(@"((((((((((((((((((((((((((((((ab|cd+)|ef+)|gh+)|ij+)|kl+)|mn+)|op+)|qr+)|st+)|uv+)|wx+)|yz+)|01+)|23+)|45+)|67+)|89+)|AB+)|CD+)|EF+)|GH+)|IJ+)|KL+)|MN+)|OP+)|QR+)|ST+)|UV+)|WX+)|YZ)", 0)]
         [InlineData(@"(YZ+|(WX+|(UV+|(ST+|(QR+|(OP+|(MN+|(KL+|(IJ+|(GH+|(EF+|(CD+|(AB+|(89+|(67+|(45+|(23+|(01+|(yz+|(wx+|(uv+|(st+|(qr+|(op+|(mn+|(kl+|(ij+|(gh+|(ef+|(de+|(a|bc+)))))))))))))))))))))))))))))))", 0)]