From 83016f3bf2b5c4331bec431ef19a72064552779d Mon Sep 17 00:00:00 2001 From: Stephen Toub Date: Wed, 11 Mar 2020 17:28:02 -0400 Subject: [PATCH] Add more regex tests (and fix one atomicity bug) (#33458) * Avoid automatically making Regex loops followed by a lookbehind atomic We allow a positive lookahead to be used to determine whether a loop can be upgraded to be atomic, but we're currently missing the RTL check that distinguishes between positive lookaheads and positive lookbehinds, so we're currently erroneously making some loops followed by positive lookbehinds atomic when we shouldn't. Fix that just by ensuring we only traverse Require nodes when they're lookahead rather than behind. Also, just for additional safety, change a subsequent check to ensure that the two nodes being compared have identical options. Today we're just checking for case-sensitivity, but it's more robust (and doesn't hurt) to just check all options. * Add more tests to boost code coverage Plus lookaround tests for min length computation --- .../System/Text/RegularExpressions/RegexNode.cs | 8 +-- .../tests/GroupCollectionTests.cs | 54 ++++++++++++++ .../tests/Regex.CompileToAssembly.Tests.cs | 49 ++++++++++++- .../tests/Regex.Ctor.Tests.cs | 84 ++++++++++++++++++++++ .../tests/Regex.EscapeUnescape.Tests.cs | 13 ++++ .../tests/Regex.Groups.Tests.cs | 2 +- .../tests/Regex.Match.Tests.cs | 31 ++++++-- .../tests/Regex.Replace.Tests.cs | 6 ++ .../tests/Regex.Split.Tests.cs | 12 +++- .../tests/RegexCharacterSetTests.cs | 53 ++++++++++++++ .../tests/RegexParserTests.cs | 3 + .../tests/RegexReductionTests.cs | 64 +++++++++++++++-- 12 files changed, 357 insertions(+), 22 deletions(-) diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs index 8ba5abd..64d7052 100644 --- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs +++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs @@ -1455,7 +1455,7 @@ namespace System.Text.RegularExpressions case Concatenate: case Capture: case Atomic: - case Require: + case Require when (subsequent.Options & RegexOptions.RightToLeft) == 0: // only lookaheads, not lookbehinds (represented as RTL Require nodes) case Loop when subsequent.M > 0: case Lazyloop when subsequent.M > 0: subsequent = subsequent.Child(0); @@ -1465,10 +1465,8 @@ namespace System.Text.RegularExpressions break; } - // If the two nodes don't agree on case-insensitivity, don't try to optimize. - // If they're both case sensitive or both case insensitive, then their tokens - // will be comparable. - if ((node.Options & RegexOptions.IgnoreCase) != (subsequent.Options & RegexOptions.IgnoreCase)) + // If the two nodes don't agree on options in any way, don't try to optimize them. + if (node.Options != subsequent.Options) { return false; } diff --git a/src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests.cs index 638fc4a..99ab75e 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/GroupCollectionTests.cs @@ -3,6 +3,7 @@ // See the LICENSE file in the project root for more information. using System.Collections; +using System.Collections.Generic; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -32,10 +33,34 @@ namespace System.Text.RegularExpressions.Tests } [Fact] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")] + public static void GetEnumerator_Generic() + { + Regex regex = new Regex(@"(?a*)(?b*)(?c*)"); + Match match = regex.Match("aaabbccccccccccaaaabc"); + + GroupCollection groups = match.Groups; + IEnumerator> enumerator = ((IEnumerable>)groups).GetEnumerator(); + for (int i = 0; i < 2; i++) + { + int counter = 0; + while (enumerator.MoveNext()) + { + Assert.Equal(groups[counter], enumerator.Current.Value); + counter++; + } + Assert.False(enumerator.MoveNext()); + Assert.Equal(groups.Count, counter); + enumerator.Reset(); + } + } + + [Fact] public static void GetEnumerator_Invalid() { Regex regex = new Regex(@"(?a*)(?b*)(?c*)"); Match match = regex.Match("aaabbccccccccccaaaabc"); + IEnumerator enumerator = match.Groups.GetEnumerator(); Assert.Throws(() => enumerator.Current); @@ -48,6 +73,24 @@ namespace System.Text.RegularExpressions.Tests } [Fact] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")] + public static void GetEnumerator_Generic_Invalid() + { + Regex regex = new Regex(@"(?a*)(?b*)(?c*)"); + Match match = regex.Match("aaabbccccccccccaaaabc"); + + IEnumerator> enumerator = ((IEnumerable>)match.Groups).GetEnumerator(); + + Assert.Throws(() => enumerator.Current); + + while (enumerator.MoveNext()) ; + Assert.Throws(() => enumerator.Current); + + enumerator.Reset(); + Assert.Throws(() => enumerator.Current); + } + + [Fact] public static void Item_Get() { GroupCollection collection = CreateCollection(); @@ -56,6 +99,17 @@ namespace System.Text.RegularExpressions.Tests Assert.Equal("555-6666", collection[2].ToString()); } + [Fact] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")] + public static void ContainsKey() + { + IReadOnlyDictionary collection = (IReadOnlyDictionary)CreateCollection(); + Assert.True(collection.ContainsKey("0")); + Assert.True(collection.ContainsKey("1")); + Assert.True(collection.ContainsKey("2")); + Assert.False(collection.ContainsKey("3")); + } + [Theory] [InlineData(-1)] [InlineData(4)] diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.CompileToAssembly.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.CompileToAssembly.Tests.cs index 3a97133..653fefa 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.CompileToAssembly.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.CompileToAssembly.Tests.cs @@ -5,6 +5,7 @@ using System.IO; using System.Linq; using System.Reflection; +using System.Reflection.Emit; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -21,6 +22,19 @@ namespace System.Text.RegularExpressions.Tests AssertExtensions.Throws("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"))); AssertExtensions.Throws("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null)); AssertExtensions.Throws("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null, null)); + + // We currently build more code for CompileToAssembly into debug builds, which changes this particular exception type based on Debug vs Release. + // Until that changes, for the tests just allow them both. + AssertThrows(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd"))); + AssertThrows(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { new RegexCompilationInfo("abc", RegexOptions.None, "abc", "", true), null }, new AssemblyName("abcd"))); + AssertThrows(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd"), new CustomAttributeBuilder[0])); + + static void AssertThrows(Action action) + { + Exception e = Record.Exception(action); + Assert.NotNull(e); + Assert.True(e is TException1 || e is TException2); + } } [Fact] @@ -29,8 +43,41 @@ namespace System.Text.RegularExpressions.Tests { Assert.Throws(() => Regex.CompileToAssembly( - new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) }, + new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true) }, new AssemblyName("abcd"))); + + Assert.Throws(() => + Regex.CompileToAssembly( + new[] { new RegexCompilationInfo("abcd", RegexOptions.CultureInvariant, "abcd", "", true, TimeSpan.FromMinutes(1)) }, + new AssemblyName("abcdWithTimeout"))); + + Assert.Throws(() => + Regex.CompileToAssembly( + new[] { new RegexCompilationInfo("(?ab)cd", RegexOptions.None, "abcd", "", true, TimeSpan.FromMinutes(1)) }, + new AssemblyName("abcdWithNamedCapture"))); + + Assert.Throws(() => + Regex.CompileToAssembly( + new[] { new RegexCompilationInfo(".*\\B(\\d+)(?SUCCESS)\\B.*", RegexOptions.None, "withCaptures", "", true) }, + new AssemblyName("withCaptures"))); + + Assert.Throws(() => + Regex.CompileToAssembly( + new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) }, + new AssemblyName("abcdWithCustomAttribute"), + new[] { new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor(new[] { typeof(string) }), new[] { "TestCompany" }) })); + } + + [Fact] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] + public void CompileToAssembly_ResourceFile_PNSE() + { + Assert.Throws(() => + Regex.CompileToAssembly( + new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) }, + new AssemblyName("abcdWithUnsupportedResourceFile"), + attributes: null, + "unsupportedResourceFile")); } [Fact] diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs index 68f4c47..aeab31d 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Ctor.Tests.cs @@ -2,9 +2,14 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Collections; using System.Collections.Generic; using System.Diagnostics; +using System.IO; +using System.Linq; using System.Runtime.InteropServices; +using System.Runtime.Serialization; +using System.Runtime.Serialization.Formatters.Binary; using System.Threading; using Microsoft.DotNet.RemoteExecutor; using Xunit; @@ -61,10 +66,17 @@ namespace System.Text.RegularExpressions.Tests r = new Regex("[abc]def(ghi|jkl)", options | (RegexOptions)0x80 /*RegexOptions.Debug*/); Assert.False(r.Match("a").Success); Assert.True(r.Match("adefghi").Success); + Assert.Equal("123456789", r.Replace("123adefghi789", "456")); r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/); Assert.False(r.Match("jkl").Success); Assert.True(r.Match("ghi").Success); + Assert.Equal("123456789", r.Replace("123ghi789", "456")); + + r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/, TimeSpan.FromDays(1)); + Assert.False(r.Match("jkl").Success); + Assert.True(r.Match("ghi").Success); + Assert.Equal("123456789", r.Replace("123ghi789", "456")); } [Fact] @@ -121,9 +133,81 @@ namespace System.Text.RegularExpressions.Tests Assert.Throws(() => r.InitializeReferences()); } + [Fact] + public void Ctor_CapNames_ReturnsDefaultValues() + { + var r = new DerivedRegex(@"(?\w*)"); + + Assert.Null(r.Caps); + + IDictionary capNames = r.CapNames; + Assert.NotNull(capNames); + Assert.Same(capNames, r.CapNames); + Assert.True(capNames.Contains("Name")); + + AssertExtensions.Throws("value", () => r.Caps = null); + AssertExtensions.Throws("value", () => r.CapNames = null); + + r.Caps = new Dictionary(); + Assert.IsType(r.Caps); + + r.CapNames = new Dictionary(); + Assert.IsType(r.CapNames); + + var newHashtable = new Hashtable(); + + r.CapNames = newHashtable; + Assert.Same(newHashtable, r.CapNames); + + r.Caps = newHashtable; + Assert.Same(newHashtable, r.Caps); + } + private sealed class DerivedRegex : Regex { + public DerivedRegex() { } + public DerivedRegex(string pattern) : base(pattern) { } + public new void InitializeReferences() => base.InitializeReferences(); + + public new IDictionary Caps { get => base.Caps; set => base.Caps = value; } + public new IDictionary CapNames { get => base.CapNames; set => base.CapNames = value; } + } + + [Fact] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] + public void Serialization_ThrowsNotSupported() + { + var r = new SerializableDerivedRegex(); + Assert.Throws(() => new SerializableDerivedRegex(default, default)); + Assert.Throws(() => ((ISerializable)r).GetObjectData(default, default)); + } + + [Serializable] + private sealed class SerializableDerivedRegex : Regex + { + public SerializableDerivedRegex() : base("") { } + public SerializableDerivedRegex(SerializationInfo info, StreamingContext context) : base(info, context) { } + } + + [Fact] + [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)] + public void Ctor_PatternInName() + { + RemoteExecutor.Invoke(() => + { + // Just make sure setting the environment variable doesn't cause problems. + Environment.SetEnvironmentVariable("DOTNET_SYSTEM_TEXT_REGULAREXPRESSIONS_PATTERNINNAME", "1"); + + // Short pattern + var r = new Regex("abc", RegexOptions.Compiled); + Assert.True(r.IsMatch("123abc456")); + + // Long pattern + string pattern = string.Concat(Enumerable.Repeat("1234567890", 20)); + r = new Regex(pattern, RegexOptions.Compiled); + Assert.True(r.IsMatch("abc" + pattern + "abc")); + }).Dispose(); } } } diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.EscapeUnescape.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.EscapeUnescape.Tests.cs index a8e9459..3b3521b 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.EscapeUnescape.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.EscapeUnescape.Tests.cs @@ -2,6 +2,7 @@ // The .NET Foundation licenses this file to you under the MIT license. // See the LICENSE file in the project root for more information. +using System.Linq; using Xunit; namespace System.Text.RegularExpressions.Tests @@ -17,6 +18,12 @@ namespace System.Text.RegularExpressions.Tests public static void Escape(string str, string expected) { Assert.Equal(expected, Regex.Escape(str)); + + if (expected.Length > 0) + { + const int Count = 100; + Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Escape(string.Concat(Enumerable.Repeat(str, Count)))); + } } [Fact] @@ -35,6 +42,12 @@ namespace System.Text.RegularExpressions.Tests public void Unescape(string str, string expected) { Assert.Equal(expected, Regex.Unescape(str)); + + if (expected.Length > 0) + { + const int Count = 100; + Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Unescape(string.Concat(Enumerable.Repeat(str, Count)))); + } } [Fact] diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs index 2c31d51..1a028ad 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs @@ -734,9 +734,9 @@ namespace System.Text.RegularExpressions.Tests yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}){2}", "acdfbcdfacefbcefbcefbcdfacdef", RegexOptions.None, new string[] { "acdfbcdfacefbcefbcefbcdf" } }; yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}hello){2}", "aaaaaacdfbcdfacefhellobcefbcefbcdfhellooooo", RegexOptions.None, new string[] { "acdfbcdfacefhellobcefbcefbcdfhello" } }; yield return new object[] { null, @"CN=(.*[^,]+).*", "CN=localhost", RegexOptions.Singleline, new string[] { "CN=localhost", "localhost" } }; - // Nested atomic yield return new object[] { null, @"(?>abc[def]gh(i*))", "123abceghiii456", RegexOptions.None, new string[] { "abceghiii", "iii" } }; + yield return new object[] { null, @"(?>(?:abc)*)", "abcabcabc", RegexOptions.None, new string[] { "abcabcabc" } }; // Anchoring loops beginning with .* / .+ yield return new object[] { null, @".*", "", RegexOptions.None, new string[] { "" } }; diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs index 7ccfb3b..ff01ae5 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs @@ -172,6 +172,7 @@ namespace System.Text.RegularExpressions.Tests yield return new object[] { @"\s+\d+", "sdf 12sad", RegexOptions.RightToLeft, 0, 9, true, " 12" }; yield return new object[] { @"\s+\d+", " asdf12 ", RegexOptions.RightToLeft, 0, 6, false, string.Empty }; yield return new object[] { "aaa", "aaabbb", RegexOptions.None, 3, 3, false, string.Empty }; + yield return new object[] { "abc|def", "123def456", RegexOptions.RightToLeft | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, true, "def" }; yield return new object[] { @"foo\d+", "0123456789foo4567890foo ", RegexOptions.RightToLeft, 10, 3, false, string.Empty }; yield return new object[] { @"foo\d+", "0123456789foo4567890foo ", RegexOptions.RightToLeft, 11, 21, false, string.Empty }; @@ -383,6 +384,7 @@ namespace System.Text.RegularExpressions.Tests VerifyMatch(r.Match(input), expectedSuccess, expectedValue); VerifyMatch(Regex.Match(input, pattern, options), expectedSuccess, expectedValue); + Assert.Equal(expectedSuccess, r.IsMatch(input)); Assert.Equal(expectedSuccess, Regex.IsMatch(input, pattern, options)); } @@ -462,17 +464,34 @@ namespace System.Text.RegularExpressions.Tests Assert.Equal("a", match.Value); } - [Fact] - public void Match_Timeout_Throws() + [Theory] + [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)] + [InlineData(RegexOptions.Compiled)] + [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)] + public void Match_Timeout_Throws(RegexOptions options) + { + const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$"; + string input = new string('a', 50) + "@a.a"; + + Assert.Throws(() => new Regex(Pattern, options, TimeSpan.FromMilliseconds(100)).Match(input)); + } + + [Theory] + [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)] + [InlineData(RegexOptions.Compiled)] + [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)] + public void Match_DefaultTimeout_Throws(RegexOptions options) { - RemoteExecutor.Invoke(() => + RemoteExecutor.Invoke(optionsString => { const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$"; string input = new string('a', 50) + "@a.a"; AppDomain.CurrentDomain.SetData(RegexHelpers.DefaultMatchTimeout_ConfigKeyName, TimeSpan.FromMilliseconds(100)); - Assert.Throws(() => new Regex(Pattern).Match(input)); - }).Dispose(); + Assert.Throws(() => new Regex(Pattern, (RegexOptions)int.Parse(optionsString, CultureInfo.InvariantCulture)).Match(input)); + }, ((int)options).ToString(CultureInfo.InvariantCulture)).Dispose(); } // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions. @@ -492,8 +511,8 @@ namespace System.Text.RegularExpressions.Tests // On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions. [OuterLoop("Can take several seconds")] [ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))] - [InlineData(RegexOptions.Compiled)] [InlineData(RegexOptions.None)] + [InlineData(RegexOptions.Compiled)] public void Match_Timeout_Repetition_Throws(RegexOptions options) { int repetitionCount = 800_000_000; diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs index a97ec74..e6ebb46 100644 --- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs +++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Replace.Tests.cs @@ -12,6 +12,8 @@ namespace System.Text.RegularExpressions.Tests { public static IEnumerable Replace_String_TestData() { + yield return new object[] { @"a", "bbbb", "c", RegexOptions.None, 4, 3, "bbbb" }; + yield return new object[] { @"", " ", "123", RegexOptions.None, 4, 0, "123 123 123 123" }; yield return new object[] { @"[^ ]+\s(?