Add more regex tests (and fix one atomicity bug) (#33458)
authorStephen Toub <stoub@microsoft.com>
Wed, 11 Mar 2020 21:28:02 +0000 (17:28 -0400)
committerGitHub <noreply@github.com>
Wed, 11 Mar 2020 21:28:02 +0000 (17:28 -0400)
* Avoid automatically making Regex loops followed by a lookbehind atomic

We allow a positive lookahead to be used to determine whether a loop can be upgraded to be atomic, but we're currently missing the RTL check that distinguishes between positive lookaheads and positive lookbehinds, so we're currently erroneously making some loops followed by positive lookbehinds atomic when we shouldn't.  Fix that just by ensuring we only traverse Require nodes when they're lookahead rather than behind.

Also, just for additional safety, change a subsequent check to ensure that the two nodes being compared have identical options.  Today we're just checking for case-sensitivity, but it's more robust (and doesn't hurt) to just check all options.

* Add more tests to boost code coverage

Plus lookaround tests for min length computation

12 files changed:
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.CompileToAssembly.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.EscapeUnescape.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/Regex.Split.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/RegexCharacterSetTests.cs
src/libraries/System.Text.RegularExpressions/tests/RegexParserTests.cs
src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs

index 8ba5abde70b8333615730446e31f9f52cd2e12cf..64d7052da7a034e4cf62b46e8aa416015205909c 100644 (file)
@@ -1455,7 +1455,7 @@ namespace System.Text.RegularExpressions
                     case Concatenate:
                     case Capture:
                     case Atomic:
-                    case Require:
+                    case Require when (subsequent.Options & RegexOptions.RightToLeft) == 0: // only lookaheads, not lookbehinds (represented as RTL Require nodes)
                     case Loop when subsequent.M > 0:
                     case Lazyloop when subsequent.M > 0:
                         subsequent = subsequent.Child(0);
@@ -1465,10 +1465,8 @@ namespace System.Text.RegularExpressions
                 break;
             }
 
-            // If the two nodes don't agree on case-insensitivity, don't try to optimize.
-            // If they're both case sensitive or both case insensitive, then their tokens
-            // will be comparable.
-            if ((node.Options & RegexOptions.IgnoreCase) != (subsequent.Options & RegexOptions.IgnoreCase))
+            // If the two nodes don't agree on options in any way, don't try to optimize them.
+            if (node.Options != subsequent.Options)
             {
                 return false;
             }
index 638fc4a99af27ee5e3ef64fdf5ce7b1f6d30af56..99ab75e08a40c6fc7c9fd7dc2658bf9f6d437019 100644 (file)
@@ -3,6 +3,7 @@
 // See the LICENSE file in the project root for more information.
 
 using System.Collections;
+using System.Collections.Generic;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
@@ -31,11 +32,35 @@ namespace System.Text.RegularExpressions.Tests
             }
         }
 
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+        public static void GetEnumerator_Generic()
+        {
+            Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
+            Match match = regex.Match("aaabbccccccccccaaaabc");
+
+            GroupCollection groups = match.Groups;
+            IEnumerator<KeyValuePair<string, Group>> enumerator = ((IEnumerable<KeyValuePair<string, Group>>)groups).GetEnumerator();
+            for (int i = 0; i < 2; i++)
+            {
+                int counter = 0;
+                while (enumerator.MoveNext())
+                {
+                    Assert.Equal(groups[counter], enumerator.Current.Value);
+                    counter++;
+                }
+                Assert.False(enumerator.MoveNext());
+                Assert.Equal(groups.Count, counter);
+                enumerator.Reset();
+            }
+        }
+
         [Fact]
         public static void GetEnumerator_Invalid()
         {
             Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
             Match match = regex.Match("aaabbccccccccccaaaabc");
+
             IEnumerator enumerator = match.Groups.GetEnumerator();
 
             Assert.Throws<InvalidOperationException>(() => enumerator.Current);
@@ -47,6 +72,24 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Throws<InvalidOperationException>(() => enumerator.Current);
         }
 
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+        public static void GetEnumerator_Generic_Invalid()
+        {
+            Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
+            Match match = regex.Match("aaabbccccccccccaaaabc");
+
+            IEnumerator<KeyValuePair<string, Group>> enumerator = ((IEnumerable<KeyValuePair<string, Group>>)match.Groups).GetEnumerator();
+
+            Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+
+            while (enumerator.MoveNext()) ;
+            Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+
+            enumerator.Reset();
+            Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+        }
+
         [Fact]
         public static void Item_Get()
         {
@@ -56,6 +99,17 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Equal("555-6666", collection[2].ToString());
         }
 
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+        public static void ContainsKey()
+        {
+            IReadOnlyDictionary<string, Group> collection = (IReadOnlyDictionary<string, Group>)CreateCollection();
+            Assert.True(collection.ContainsKey("0"));
+            Assert.True(collection.ContainsKey("1"));
+            Assert.True(collection.ContainsKey("2"));
+            Assert.False(collection.ContainsKey("3"));
+        }
+
         [Theory]
         [InlineData(-1)]
         [InlineData(4)]
index 3a97133501cbff4e6282e1151c6dad250ca4d341..653fefa3a9cc6b23b94201a82a2712dc338c0cc6 100644 (file)
@@ -5,6 +5,7 @@
 using System.IO;
 using System.Linq;
 using System.Reflection;
+using System.Reflection.Emit;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
@@ -21,6 +22,19 @@ namespace System.Text.RegularExpressions.Tests
             AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd")));
             AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null));
             AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null, null));
+
+            // We currently build more code for CompileToAssembly into debug builds, which changes this particular exception type based on Debug vs Release.
+            // Until that changes, for the tests just allow them both.
+            AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd")));
+            AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { new RegexCompilationInfo("abc", RegexOptions.None, "abc", "", true), null }, new AssemblyName("abcd")));
+            AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd"), new CustomAttributeBuilder[0]));
+
+            static void AssertThrows<TException1, TException2>(Action action)
+            {
+                Exception e = Record.Exception(action);
+                Assert.NotNull(e);
+                Assert.True(e is TException1 || e is TException2);
+            }
         }
 
         [Fact]
@@ -29,8 +43,41 @@ namespace System.Text.RegularExpressions.Tests
         {
             Assert.Throws<PlatformNotSupportedException>(() =>
                 Regex.CompileToAssembly(
-                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true) },
                     new AssemblyName("abcd")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.CultureInvariant, "abcd", "", true, TimeSpan.FromMinutes(1)) },
+                    new AssemblyName("abcdWithTimeout")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("(?<FirstTwoLetters>ab)cd", RegexOptions.None, "abcd", "", true, TimeSpan.FromMinutes(1)) },
+                    new AssemblyName("abcdWithNamedCapture")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo(".*\\B(\\d+)(?<output>SUCCESS)\\B.*", RegexOptions.None, "withCaptures", "", true) },
+                    new AssemblyName("withCaptures")));
+
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+                    new AssemblyName("abcdWithCustomAttribute"),
+                    new[] { new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor(new[] { typeof(string) }), new[] { "TestCompany" }) }));
+        }
+
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+        public void CompileToAssembly_ResourceFile_PNSE()
+        {
+            Assert.Throws<PlatformNotSupportedException>(() =>
+                Regex.CompileToAssembly(
+                    new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+                    new AssemblyName("abcdWithUnsupportedResourceFile"),
+                    attributes: null,
+                    "unsupportedResourceFile"));
         }
 
         [Fact]
index 68f4c4776745f6efe46bd25ec33e962ff5f51876..aeab31dc21b80c040dfddc63d4c3d1c4b28bc2fe 100644 (file)
@@ -2,9 +2,14 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Collections;
 using System.Collections.Generic;
 using System.Diagnostics;
+using System.IO;
+using System.Linq;
 using System.Runtime.InteropServices;
+using System.Runtime.Serialization;
+using System.Runtime.Serialization.Formatters.Binary;
 using System.Threading;
 using Microsoft.DotNet.RemoteExecutor;
 using Xunit;
@@ -61,10 +66,17 @@ namespace System.Text.RegularExpressions.Tests
             r = new Regex("[abc]def(ghi|jkl)", options | (RegexOptions)0x80 /*RegexOptions.Debug*/);
             Assert.False(r.Match("a").Success);
             Assert.True(r.Match("adefghi").Success);
+            Assert.Equal("123456789", r.Replace("123adefghi789", "456"));
 
             r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/);
             Assert.False(r.Match("jkl").Success);
             Assert.True(r.Match("ghi").Success);
+            Assert.Equal("123456789", r.Replace("123ghi789", "456"));
+
+            r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/, TimeSpan.FromDays(1));
+            Assert.False(r.Match("jkl").Success);
+            Assert.True(r.Match("ghi").Success);
+            Assert.Equal("123456789", r.Replace("123ghi789", "456"));
         }
 
         [Fact]
@@ -121,9 +133,81 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Throws<NotSupportedException>(() => r.InitializeReferences());
         }
 
+        [Fact]
+        public void Ctor_CapNames_ReturnsDefaultValues()
+        {
+            var r = new DerivedRegex(@"(?<Name>\w*)");
+
+            Assert.Null(r.Caps);
+
+            IDictionary capNames = r.CapNames;
+            Assert.NotNull(capNames);
+            Assert.Same(capNames, r.CapNames);
+            Assert.True(capNames.Contains("Name"));
+
+            AssertExtensions.Throws<ArgumentNullException>("value", () => r.Caps = null);
+            AssertExtensions.Throws<ArgumentNullException>("value", () => r.CapNames = null);
+
+            r.Caps = new Dictionary<string, string>();
+            Assert.IsType<Hashtable>(r.Caps);
+
+            r.CapNames = new Dictionary<string, string>();
+            Assert.IsType<Hashtable>(r.CapNames);
+
+            var newHashtable = new Hashtable();
+
+            r.CapNames = newHashtable;
+            Assert.Same(newHashtable, r.CapNames);
+
+            r.Caps = newHashtable;
+            Assert.Same(newHashtable, r.Caps);
+        }
+
         private sealed class DerivedRegex : Regex
         {
+            public DerivedRegex() { }
+            public DerivedRegex(string pattern) : base(pattern) { }
+
             public new void InitializeReferences() => base.InitializeReferences();
+
+            public new IDictionary Caps { get => base.Caps; set => base.Caps = value; }
+            public new IDictionary CapNames { get => base.CapNames; set => base.CapNames = value; }
+        }
+
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+        public void Serialization_ThrowsNotSupported()
+        {
+            var r = new SerializableDerivedRegex();
+            Assert.Throws<PlatformNotSupportedException>(() => new SerializableDerivedRegex(default, default));
+            Assert.Throws<PlatformNotSupportedException>(() => ((ISerializable)r).GetObjectData(default, default));
+        }
+
+        [Serializable]
+        private sealed class SerializableDerivedRegex : Regex
+        {
+            public SerializableDerivedRegex() : base("") { }
+            public SerializableDerivedRegex(SerializationInfo info, StreamingContext context) : base(info, context) { }
+        }
+
+        [Fact]
+        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+        public void Ctor_PatternInName()
+        {
+            RemoteExecutor.Invoke(() =>
+            {
+                // Just make sure setting the environment variable doesn't cause problems.
+                Environment.SetEnvironmentVariable("DOTNET_SYSTEM_TEXT_REGULAREXPRESSIONS_PATTERNINNAME", "1");
+
+                // Short pattern
+                var r = new Regex("abc", RegexOptions.Compiled);
+                Assert.True(r.IsMatch("123abc456"));
+
+                // Long pattern
+                string pattern = string.Concat(Enumerable.Repeat("1234567890", 20));
+                r = new Regex(pattern, RegexOptions.Compiled);
+                Assert.True(r.IsMatch("abc" + pattern + "abc"));
+            }).Dispose();
         }
     }
 }
index a8e9459cf7a4edaace356f6fa17e5ab29171a65e..3b3521b88ea7ad99a44e38029827d1e4afef8774 100644 (file)
@@ -2,6 +2,7 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Linq;
 using Xunit;
 
 namespace System.Text.RegularExpressions.Tests
@@ -17,6 +18,12 @@ namespace System.Text.RegularExpressions.Tests
         public static void Escape(string str, string expected)
         {
             Assert.Equal(expected, Regex.Escape(str));
+
+            if (expected.Length > 0)
+            {
+                const int Count = 100;
+                Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Escape(string.Concat(Enumerable.Repeat(str, Count))));
+            }
         }
 
         [Fact]
@@ -35,6 +42,12 @@ namespace System.Text.RegularExpressions.Tests
         public void Unescape(string str, string expected)
         {
             Assert.Equal(expected, Regex.Unescape(str));
+
+            if (expected.Length > 0)
+            {
+                const int Count = 100;
+                Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Unescape(string.Concat(Enumerable.Repeat(str, Count))));
+            }
         }
 
         [Fact]
index 2c31d515b6a91ca3e411a1f2b33b6efebb901796..1a028ad9542e383714e44da2e6fe75116175cd11 100644 (file)
@@ -734,9 +734,9 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}){2}", "acdfbcdfacefbcefbcefbcdfacdef", RegexOptions.None, new string[] { "acdfbcdfacefbcefbcefbcdf" } };
             yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}hello){2}", "aaaaaacdfbcdfacefhellobcefbcefbcdfhellooooo", RegexOptions.None, new string[] { "acdfbcdfacefhellobcefbcefbcdfhello" } };
             yield return new object[] { null, @"CN=(.*[^,]+).*", "CN=localhost", RegexOptions.Singleline, new string[] { "CN=localhost", "localhost" } };
-
             // Nested atomic
             yield return new object[] { null, @"(?>abc[def]gh(i*))", "123abceghiii456", RegexOptions.None, new string[] { "abceghiii", "iii" } };
+            yield return new object[] { null, @"(?>(?:abc)*)", "abcabcabc", RegexOptions.None, new string[] { "abcabcabc" } };
 
             // Anchoring loops beginning with .* / .+
             yield return new object[] { null, @".*", "", RegexOptions.None, new string[] { "" } };
index 7ccfb3b6bb26580f2b53a1b7ba92809481719a2e..ff01ae5a34de196deef92682f4db493e3eb08471 100644 (file)
@@ -172,6 +172,7 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { @"\s+\d+", "sdf 12sad", RegexOptions.RightToLeft, 0, 9, true, " 12" };
             yield return new object[] { @"\s+\d+", " asdf12 ", RegexOptions.RightToLeft, 0, 6, false, string.Empty };
             yield return new object[] { "aaa", "aaabbb", RegexOptions.None, 3, 3, false, string.Empty };
+            yield return new object[] { "abc|def", "123def456", RegexOptions.RightToLeft | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, true, "def" };
 
             yield return new object[] { @"foo\d+", "0123456789foo4567890foo         ", RegexOptions.RightToLeft, 10, 3, false, string.Empty };
             yield return new object[] { @"foo\d+", "0123456789foo4567890foo         ", RegexOptions.RightToLeft, 11, 21, false, string.Empty };
@@ -383,6 +384,7 @@ namespace System.Text.RegularExpressions.Tests
                 VerifyMatch(r.Match(input), expectedSuccess, expectedValue);
                 VerifyMatch(Regex.Match(input, pattern, options), expectedSuccess, expectedValue);
 
+                Assert.Equal(expectedSuccess, r.IsMatch(input));
                 Assert.Equal(expectedSuccess, Regex.IsMatch(input, pattern, options));
             }
 
@@ -462,17 +464,34 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Equal("a", match.Value);
         }
 
-        [Fact]
-        public void Match_Timeout_Throws()
+        [Theory]
+        [InlineData(RegexOptions.None)]
+        [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)]
+        [InlineData(RegexOptions.Compiled)]
+        [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)]
+        public void Match_Timeout_Throws(RegexOptions options)
+        {
+            const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$";
+            string input = new string('a', 50) + "@a.a";
+
+            Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern, options, TimeSpan.FromMilliseconds(100)).Match(input));
+        }
+
+        [Theory]
+        [InlineData(RegexOptions.None)]
+        [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)]
+        [InlineData(RegexOptions.Compiled)]
+        [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)]
+        public void Match_DefaultTimeout_Throws(RegexOptions options)
         {
-            RemoteExecutor.Invoke(() =>
+            RemoteExecutor.Invoke(optionsString =>
             {
                 const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$";
                 string input = new string('a', 50) + "@a.a";
 
                 AppDomain.CurrentDomain.SetData(RegexHelpers.DefaultMatchTimeout_ConfigKeyName, TimeSpan.FromMilliseconds(100));
-                Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern).Match(input));
-            }).Dispose();
+                Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern, (RegexOptions)int.Parse(optionsString, CultureInfo.InvariantCulture)).Match(input));
+            }, ((int)options).ToString(CultureInfo.InvariantCulture)).Dispose();
         }
 
         // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
@@ -492,8 +511,8 @@ namespace System.Text.RegularExpressions.Tests
         // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
         [OuterLoop("Can take several seconds")]
         [ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))]
-        [InlineData(RegexOptions.Compiled)]
         [InlineData(RegexOptions.None)]
+        [InlineData(RegexOptions.Compiled)]
         public void Match_Timeout_Repetition_Throws(RegexOptions options)
         {
             int repetitionCount = 800_000_000;
index a97ec741df4d1a885f23ded1a2fec6f2f30b3aba..e6ebb469202d2588e135518ac1698c45394a6d23 100644 (file)
@@ -12,6 +12,8 @@ namespace System.Text.RegularExpressions.Tests
     {
         public static IEnumerable<object[]> Replace_String_TestData()
         {
+            yield return new object[] { @"a", "bbbb", "c", RegexOptions.None, 4, 3, "bbbb" };
+            yield return new object[] { @"", "   ", "123", RegexOptions.None, 4, 0, "123 123 123 123" };
             yield return new object[] { @"[^ ]+\s(?<time>)", "08/10/99 16:00", "${time}", RegexOptions.None, 14, 0, "16:00" };
             yield return new object[] { "icrosoft", "MiCrOsOfT", "icrosoft", RegexOptions.IgnoreCase, 9, 0, "Microsoft" };
             yield return new object[] { "dog", "my dog has fleas", "CAT", RegexOptions.IgnoreCase, 16, 0, "my CAT has fleas" };
@@ -78,6 +80,8 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { @"(?<cat>cat)\s*(?<dog>dog)", "slkfjsdcat dogkljeah", "START${catTWO}dogcat${dogTWO}END", RegexOptions.None, 20, 0, "slkfjsdSTART${catTWO}dogcat${dogTWO}ENDkljeah" };
 
             // RightToLeft
+            yield return new object[] { @"a", "bbbb", "c", RegexOptions.RightToLeft, 4, 3, "bbbb" };
+            yield return new object[] { @"", "   ", "123", RegexOptions.RightToLeft, 4, 3, "123 123 123 123" };
             yield return new object[] { @"foo\s+", "0123456789foo4567890foo         ", "bar", RegexOptions.RightToLeft, 32, 32, "0123456789foo4567890bar" };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", "#", RegexOptions.RightToLeft, 17, 32, "##########foo#######foo         " };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", "#", RegexOptions.RightToLeft, 7, 32, "0123456789foo#######foo         " };
@@ -139,6 +143,7 @@ namespace System.Text.RegularExpressions.Tests
 
         public static IEnumerable<object[]> Replace_MatchEvaluator_TestData()
         {
+            yield return new object[] { "a", "bbbb", new MatchEvaluator(match => "uhoh"), RegexOptions.None, 4, 0, "bbbb" };
             yield return new object[] { "(Big|Small)", "Big mountain", new MatchEvaluator(MatchEvaluator1), RegexOptions.None, 12, 0, "Huge mountain" };
             yield return new object[] { "(Big|Small)", "Small village", new MatchEvaluator(MatchEvaluator1), RegexOptions.None, 13, 0, "Tiny village" };
 
@@ -162,6 +167,7 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { @"\u0915\u0930.*?\b", boldInput, new MatchEvaluator(MatchEvaluatorBold), RegexOptions.CultureInvariant | RegexOptions.Singleline, boldInput.Length, 0, boldExpected };
 
             // RighToLeft
+            yield return new object[] { "a", "bbbb", new MatchEvaluator(match => "uhoh"), RegexOptions.RightToLeft, 4, 3, "bbbb" };
             yield return new object[] { @"foo\s+", "0123456789foo4567890foo         ", new MatchEvaluator(MatchEvaluatorBar), RegexOptions.RightToLeft, 32, 32, "0123456789foo4567890bar" };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", new MatchEvaluator(MatchEvaluatorPoundSign), RegexOptions.RightToLeft, 17, 32, "##########foo#######foo         " };
             yield return new object[] { @"\d", "0123456789foo4567890foo         ", new MatchEvaluator(MatchEvaluatorPoundSign), RegexOptions.RightToLeft, 7, 32, "0123456789foo#######foo         " };
index c480f4baa8d99e0b93811a8125508e17dba17f51..52b4bd8a232656fd2e2b5b0301a3434d596130b2 100644 (file)
@@ -9,8 +9,11 @@ namespace System.Text.RegularExpressions.Tests
 {
     public class RegexSplitTests
     {
-        public static IEnumerable<object[]> Split_NonCompiled_TestData()
+        public static IEnumerable<object[]> Split_TestData()
         {
+            yield return new object[] { "", "", RegexOptions.None, 0, 0, new string[] { "", "" } };
+            yield return new object[] { "123", "abc", RegexOptions.None, 3, 0, new string[] { "abc" } };
+
             yield return new object[] { "    ", "word0    word1    word2    word3", RegexOptions.None, 32, 0, new string[] { "word0", "word1", "word2", "word3" } };
 
             yield return new object[] { ":", "kkk:lll:mmm:nnn:ooo", RegexOptions.None, 19, 0, new string[] { "kkk", "lll", "mmm", "nnn", "ooo" } };
@@ -27,6 +30,9 @@ namespace System.Text.RegularExpressions.Tests
             yield return new object[] { "a(?<dot1>.)c(.)e", "123abcde456aBCDe789", RegexOptions.IgnoreCase, 19, 0, new string[] { "123", "d", "b", "456", "D", "B", "789" } };
 
             // RightToLeft
+            yield return new object[] { "", "", RegexOptions.RightToLeft, 0, 0, new string[] { "", "" } };
+            yield return new object[] { "123", "abc", RegexOptions.RightToLeft, 3, 0, new string[] { "abc" } };
+
             yield return new object[] { "a(.)c(.)e", "123abcde456aBCDe789", RegexOptions.RightToLeft, 19, 19, new string[] { "123", "d", "b", "456aBCDe789" } };
             yield return new object[] { "a(.)c(.)e", "123abcde456aBCDe789", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 19, 19, new string[] { "123", "d", "b", "456", "D", "B", "789" } };
 
@@ -50,8 +56,8 @@ namespace System.Text.RegularExpressions.Tests
         }
 
         [Theory]
-        [MemberData(nameof(Split_NonCompiled_TestData))]
-        [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Split_NonCompiled_TestData), 2, MemberType = typeof(RegexCompilationHelper))]
+        [MemberData(nameof(Split_TestData))]
+        [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Split_TestData), 2, MemberType = typeof(RegexCompilationHelper))]
         public void Split(string pattern, string input, RegexOptions options, int count, int start, string[] expected)
         {
             bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start);
index a13484f2ab340855f0e6245c23c7b14d7dc1626e..e1ef5ce3c681df133ae90b21d301c524bc5a502e 100644 (file)
@@ -343,6 +343,20 @@ namespace System.Text.RegularExpressions.Tests
             }
         }
 
+        [Theory]
+        [InlineData("ab", 1, false)]
+        [InlineData("a b", 1, true)]
+        [InlineData("a b", 2, true)]
+        [InlineData("\u200Da", 1, false)]
+        [InlineData("\u200D\u200C", 1, false)]
+        [InlineData("\u200Ca", 1, false)]
+        [InlineData("\u200C a", 1, true)]
+        public void IsBoundary_ReturnsExpectedResult(string text, int pos, bool expectedBoundary)
+        {
+            var r = new DerivedRunner(text);
+            Assert.Equal(expectedBoundary, r.IsBoundary(pos, 0, text.Length));
+        }
+
         private static HashSet<char> ComputeIncludedSet(Func<char, bool> func)
         {
             var included = new HashSet<char>();
@@ -371,6 +385,45 @@ namespace System.Text.RegularExpressions.Tests
             Assert.Throws<XunitException>(() => ValidateSet("[b]", RegexOptions.None, null, new HashSet<char>() { 'b' }, validateEveryChar: true));
         }
 
+        [Fact]
+        public void RegexRunner_Legacy_CharInSet()
+        {
+            Assert.True(DerivedRunner.CharInSet('a', "ab", ""));
+            Assert.False(DerivedRunner.CharInSet('x', "ab", ""));
+
+            Assert.True(DerivedRunner.CharInSet('x', "\0\0ab", ""));
+            Assert.False(DerivedRunner.CharInSet('a', "\0\0ab", ""));
+
+            Assert.True(DerivedRunner.CharInSet('4', "", "\x0009"));
+            Assert.False(DerivedRunner.CharInSet('a', "", "\x0009"));
+
+            Assert.True(DerivedRunner.CharInSet('4', "xz", "\x0009"));
+            Assert.True(DerivedRunner.CharInSet('a', "az", "\x0009"));
+            Assert.False(DerivedRunner.CharInSet('a', "xz", "\x0009"));
+        }
+
+        private sealed class DerivedRunner : RegexRunner
+        {
+            public DerivedRunner() { }
+
+            public DerivedRunner(string text)
+            {
+                runtext = text;
+                runtextbeg = 0;
+                runtextstart = 0;
+                runtextend = text.Length;
+                runtextpos = 0;
+            }
+
+            public new bool IsBoundary(int index, int startpos, int endpos) => base.IsBoundary(index, startpos, endpos);
+
+            public static new bool CharInSet(char ch, string set, string category) => RegexRunner.CharInSet(ch, set, category);
+
+            protected override bool FindFirstChar() => throw new NotImplementedException();
+            protected override void Go() => throw new NotImplementedException();
+            protected override void InitTrackCount() => throw new NotImplementedException();
+        }
+
         private static void ValidateSet(string regex, RegexOptions options, HashSet<char> included, HashSet<char> excluded, bool validateEveryChar = false)
         {
             Assert.True((included != null) ^ (excluded != null));
index e16f2d37855635c1ad2d6306fb7ce3cf1553be0d..a7f11978c719d3377b152b1c0d04ad5529d08961 100644 (file)
@@ -349,6 +349,7 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData(@"[a-[b]] ", RegexOptions.None, null)]
         [InlineData(@"[a-b-[c]] ", RegexOptions.None, null)]
         [InlineData(@"[a-[b]-c] ", RegexOptions.None, RegexParseError.SubtractionMustBeLast)]
+        [InlineData(@"[a-z-[b]12]", RegexOptions.None, RegexParseError.SubtractionMustBeLast)]
         [InlineData(@"[[a]-b] ", RegexOptions.None, null)]
         [InlineData(@"[[a]-[b]] ", RegexOptions.None, null)]
         [InlineData(@"[\w-a] ", RegexOptions.None, null)]
@@ -587,6 +588,8 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData(@"(?imn", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
         [InlineData(@"(?'cat'", RegexOptions.None, RegexParseError.NotEnoughParentheses)]
         [InlineData(@"(?'", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
+        [InlineData(@"(?'=)", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
+        [InlineData(@"(?'!)", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
         [InlineData(@"[^", RegexOptions.None, RegexParseError.UnterminatedBracket)]
         [InlineData(@"[cat", RegexOptions.None, RegexParseError.UnterminatedBracket)]
         [InlineData(@"[^cat", RegexOptions.None, RegexParseError.UnterminatedBracket)]
index a70d9d1dbc8c5303b9199897fca74d6b1a7f0aab..a4fd8028ac1ec6c459d6f82a3926cb1c9ed06ed2 100644 (file)
@@ -172,6 +172,23 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("[^a]{1,3}[^a]+", "[^a]{2,}")]
         [InlineData("[^a]{1,3}[^a]?", "[^a]{1,4}")]
         [InlineData("[^a]{1,3}[^a]{1,3}", "[^a]{2,6}")]
+        // Two lazy notone loops
+        [InlineData("[^a]*?[^a]*?", "[^a]*?")]
+        [InlineData("[^a]*?[^a]+?", "[^a]+?")]
+        [InlineData("[^a]*?[^a]??", "[^a]*?")]
+        [InlineData("[^a]*?[^a]{1,3}?", "[^a]+?")]
+        [InlineData("[^a]+?[^a]*?", "[^a]+?")]
+        [InlineData("[^a]+?[^a]+?", "[^a]{2,}?")]
+        [InlineData("[^a]+?[^a]??", "[^a]+?")]
+        [InlineData("[^a]+?[^a]{1,3}?", "[^a]{2,}?")]
+        [InlineData("[^a]??[^a]*?", "[^a]*?")]
+        [InlineData("[^a]??[^a]+?", "[^a]+?")]
+        [InlineData("[^a]??[^a]??", "[^a]{0,2}?")]
+        [InlineData("[^a]??[^a]{1,3}?", "[^a]{1,4}?")]
+        [InlineData("[^a]{1,3}?[^a]*?", "[^a]+?")]
+        [InlineData("[^a]{1,3}?[^a]+?", "[^a]{2,}?")]
+        [InlineData("[^a]{1,3}?[^a]??", "[^a]{1,4}?")]
+        [InlineData("[^a]{1,3}?[^a]{1,3}?", "[^a]{2,6}?")]
         // Two atomic notone loops
         [InlineData("(?>[^a]*)(?>[^a]*)", "(?>[^a]*)")]
         [InlineData("(?>[^a]*)(?>[^a]+)", "(?>[^a]+)")]
@@ -189,7 +206,7 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("(?>[^a]{1,3})(?>[^a]+)", "(?>[^a]{2,})")]
         [InlineData("(?>[^a]{1,3})(?>[^a]?)", "(?>[^a]{1,4})")]
         [InlineData("(?>[^a]{1,3})(?>[^a]{1,3})", "(?>[^a]{2,6})")]
-        // Greedy notone loop and one
+        // Greedy notone loop and notone
         [InlineData("[^a]*[^a]", "[^a]+")]
         [InlineData("[^a]+[^a]", "[^a]{2,}")]
         [InlineData("[^a]?[^a]", "[^a]{1,2}")]
@@ -198,7 +215,16 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("[^a][^a]+", "[^a]{2,}")]
         [InlineData("[^a][^a]?", "[^a]{1,2}")]
         [InlineData("[^a][^a]{1,3}", "[^a]{2,4}")]
-        // Atomic notone loop and one
+        // Lazy notone loop and notone
+        [InlineData("[^a]*?[^a]", "[^a]+?")]
+        [InlineData("[^a]+?[^a]", "[^a]{2,}?")]
+        [InlineData("[^a]??[^a]", "[^a]{1,2}?")]
+        [InlineData("[^a]{1,3}?[^a]", "[^a]{2,4}?")]
+        [InlineData("[^a][^a]*?", "[^a]+?")]
+        [InlineData("[^a][^a]+?", "[^a]{2,}?")]
+        [InlineData("[^a][^a]??", "[^a]{1,2}?")]
+        [InlineData("[^a][^a]{1,3}?", "[^a]{2,4}?")]
+        // Atomic notone loop and notone
         [InlineData("(?>[^a]*)[^a]", "(?>[^a]+)")]
         [InlineData("(?>[^a]+)[^a]", "(?>[^a]{2,})")]
         [InlineData("(?>[^a]?)[^a]", "(?>[^a]{1,2})")]
@@ -261,6 +287,23 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("[0-9]{1,3}?[0-9]+?", "[0-9]{2,}?")]
         [InlineData("[0-9]{1,3}?[0-9]??", "[0-9]{1,4}?")]
         [InlineData("[0-9]{1,3}?[0-9]{1,3}?", "[0-9]{2,6}?")]
+        // Two atomic set loops
+        [InlineData("(?>[0-9]*)(?>[0-9]*)", "(?>[0-9]*)")]
+        [InlineData("(?>[0-9]*)(?>[0-9]+)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]*)(?>[0-9]?)", "(?>[0-9]*)")]
+        [InlineData("(?>[0-9]*)(?>[0-9]{1,3})", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]+)(?>[0-9]*)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]+)(?>[0-9]+)", "(?>[0-9]{2,})")]
+        [InlineData("(?>[0-9]+)(?>[0-9]?)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]+)(?>[0-9]{1,3})", "(?>[0-9]{2,})")]
+        [InlineData("(?>[0-9]?)(?>[0-9]*)", "(?>[0-9]*)")]
+        [InlineData("(?>[0-9]?)(?>[0-9]+)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]?)(?>[0-9]?)", "(?>[0-9]{0,2})")]
+        [InlineData("(?>[0-9]?)(?>[0-9]{1,3})", "(?>[0-9]{1,4})")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]*)", "(?>[0-9]+)")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]+)", "(?>[0-9]{2,})")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]?)", "(?>[0-9]{1,4})")]
+        [InlineData("(?>[0-9]{1,3})(?>[0-9]{1,3})", "(?>[0-9]{2,6})")]
         // Lazy set loop and set
         [InlineData("[0-9]*?[0-9]", "[0-9]+?")]
         [InlineData("[0-9]+?[0-9]", "[0-9]{2,}?")]
@@ -375,10 +418,6 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("a*a*?", "a*")]
         [InlineData("a*?a*", "a*")]
         [InlineData("a*[^a]*", "a*")]
-        [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
-        [InlineData("[ab]*[^a]*", "(?>[ab]*)[^a]*")]
-        [InlineData("[ab]*[^a]*?", "(?>[ab]*)[^a]*?")]
-        [InlineData("[ab]*(?>[^a]*)", "(?>[ab]*)(?>[^a]*)")]
         [InlineData("[^a]*a*", "a*")]
         [InlineData("a{2147483646}a", "a{2147483647}")]
         [InlineData("a{2147483647}a", "a{2147483647}")]
@@ -407,9 +446,18 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("abc(?:(?i:e)|f)", "abc[ef]")]
         // Not applying auto-atomicity
         [InlineData("a*b*", "(?>a*)b*")]
+        [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
+        [InlineData("[ab]*[^a]*", "(?>[ab]*)[^a]*")]
+        [InlineData("[ab]*[^a]*?", "(?>[ab]*)[^a]*?")]
+        [InlineData("[ab]*(?>[^a]*)", "(?>[ab]*)(?>[^a]*)")]
         [InlineData("[^\n]*\n*", "(?>[^\n]*)\n")]
         [InlineData("(a[bcd]a*)*fg", "(a[bcd](?>a*))*fg")]
         [InlineData("(\\w[bcd]\\d*)*fg", "(\\w[bcd](?>\\d*))*fg")]
+        [InlineData("a*(?<=[^a])b", "(?>a*)(?<=[^a])b")]
+        [InlineData("[\x0000-\xFFFF]*[a-z]", "(?>[\x0000-\xFFFF]*)[a-z]")]
+        [InlineData("[a-z]*[\x0000-\xFFFF]+", "(?>[a-z]*)[\x0000-\xFFFF]+")]
+        [InlineData("[^a-c]*[e-g]", "(?>[^a-c]*)[e-g]")]
+        [InlineData("[^a-c]*[^e-g]", "(?>[^a-c]*)[^e-g]")]
         public void PatternsReduceDifferently(string pattern1, string pattern2)
         {
             var r1 = new Regex(pattern1);
@@ -443,6 +491,10 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData(@"a*a*a*a*a*a*a*b*", 0)]
         [InlineData(@"((a{1,2}){4}){3,7}", 12)]
         [InlineData(@"\b\w{4}\b", 4)]
+        [InlineData(@"abcd(?=efgh)efgh", 8)]
+        [InlineData(@"abcd(?<=cd)efgh", 8)]
+        [InlineData(@"abcd(?!ab)efgh", 8)]
+        [InlineData(@"abcd(?<!ef)efgh", 8)]
         // we stop computing after a certain depth; if that logic changes in the future, these tests can be updated
         [InlineData(@"((((((((((((((((((((((((((((((ab|cd+)|ef+)|gh+)|ij+)|kl+)|mn+)|op+)|qr+)|st+)|uv+)|wx+)|yz+)|01+)|23+)|45+)|67+)|89+)|AB+)|CD+)|EF+)|GH+)|IJ+)|KL+)|MN+)|OP+)|QR+)|ST+)|UV+)|WX+)|YZ)", 0)]
         [InlineData(@"(YZ+|(WX+|(UV+|(ST+|(QR+|(OP+|(MN+|(KL+|(IJ+|(GH+|(EF+|(CD+|(AB+|(89+|(67+|(45+|(23+|(01+|(yz+|(wx+|(uv+|(st+|(qr+|(op+|(mn+|(kl+|(ij+|(gh+|(ef+|(de+|(a|bc+)))))))))))))))))))))))))))))))", 0)]