case Concatenate:
case Capture:
case Atomic:
- case Require:
+ case Require when (subsequent.Options & RegexOptions.RightToLeft) == 0: // only lookaheads, not lookbehinds (represented as RTL Require nodes)
case Loop when subsequent.M > 0:
case Lazyloop when subsequent.M > 0:
subsequent = subsequent.Child(0);
- // If the two nodes don't agree on case-insensitivity, don't try to optimize.
- // If they're both case sensitive or both case insensitive, then their tokens
- // will be comparable.
- if ((node.Options & RegexOptions.IgnoreCase) != (subsequent.Options & RegexOptions.IgnoreCase))
+ // If the two nodes don't agree on options in any way, don't try to optimize them.
+ if (node.Options != subsequent.Options)
return false;
// See the LICENSE file in the project root for more information.
using System.Collections;
+using System.Collections.Generic;
using Xunit;
namespace System.Text.RegularExpressions.Tests
+ [Fact]
+ [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+ public static void GetEnumerator_Generic()
+ {
+ Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
+ Match match = regex.Match("aaabbccccccccccaaaabc");
+ GroupCollection groups = match.Groups;
+ IEnumerator<KeyValuePair<string, Group>> enumerator = ((IEnumerable<KeyValuePair<string, Group>>)groups).GetEnumerator();
+ for (int i = 0; i < 2; i++)
+ {
+ int counter = 0;
+ while (enumerator.MoveNext())
+ {
+ Assert.Equal(groups[counter], enumerator.Current.Value);
+ counter++;
+ }
+ Assert.False(enumerator.MoveNext());
+ Assert.Equal(groups.Count, counter);
+ enumerator.Reset();
+ }
+ }
public static void GetEnumerator_Invalid()
Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
Match match = regex.Match("aaabbccccccccccaaaabc");
IEnumerator enumerator = match.Groups.GetEnumerator();
Assert.Throws<InvalidOperationException>(() => enumerator.Current);
Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+ [Fact]
+ [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+ public static void GetEnumerator_Generic_Invalid()
+ {
+ Regex regex = new Regex(@"(?<A1>a*)(?<A2>b*)(?<A3>c*)");
+ Match match = regex.Match("aaabbccccccccccaaaabc");
+ IEnumerator<KeyValuePair<string, Group>> enumerator = ((IEnumerable<KeyValuePair<string, Group>>)match.Groups).GetEnumerator();
+ Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+ while (enumerator.MoveNext()) ;
+ Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+ enumerator.Reset();
+ Assert.Throws<InvalidOperationException>(() => enumerator.Current);
+ }
public static void Item_Get()
Assert.Equal("555-6666", collection[2].ToString());
+ [Fact]
+ [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Interface not implemented on .NET Framework")]
+ public static void ContainsKey()
+ {
+ IReadOnlyDictionary<string, Group> collection = (IReadOnlyDictionary<string, Group>)CreateCollection();
+ Assert.True(collection.ContainsKey("0"));
+ Assert.True(collection.ContainsKey("1"));
+ Assert.True(collection.ContainsKey("2"));
+ Assert.False(collection.ContainsKey("3"));
+ }
using System.IO;
using System.Linq;
using System.Reflection;
+using System.Reflection.Emit;
using Xunit;
namespace System.Text.RegularExpressions.Tests
AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd")));
AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null));
AssertExtensions.Throws<ArgumentNullException>("regexinfos", () => Regex.CompileToAssembly(null, new AssemblyName("abcd"), null, null));
+ // We currently build more code for CompileToAssembly into debug builds, which changes this particular exception type based on Debug vs Release.
+ // Until that changes, for the tests just allow them both.
+ AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd")));
+ AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { new RegexCompilationInfo("abc", RegexOptions.None, "abc", "", true), null }, new AssemblyName("abcd")));
+ AssertThrows<PlatformNotSupportedException, ArgumentNullException>(() => Regex.CompileToAssembly(new RegexCompilationInfo[] { null }, new AssemblyName("abcd"), new CustomAttributeBuilder[0]));
+ static void AssertThrows<TException1, TException2>(Action action)
+ {
+ Exception e = Record.Exception(action);
+ Assert.NotNull(e);
+ Assert.True(e is TException1 || e is TException2);
+ }
Assert.Throws<PlatformNotSupportedException>(() =>
- new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+ new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "SomeNamespace", true) },
new AssemblyName("abcd")));
+ Assert.Throws<PlatformNotSupportedException>(() =>
+ Regex.CompileToAssembly(
+ new[] { new RegexCompilationInfo("abcd", RegexOptions.CultureInvariant, "abcd", "", true, TimeSpan.FromMinutes(1)) },
+ new AssemblyName("abcdWithTimeout")));
+ Assert.Throws<PlatformNotSupportedException>(() =>
+ Regex.CompileToAssembly(
+ new[] { new RegexCompilationInfo("(?<FirstTwoLetters>ab)cd", RegexOptions.None, "abcd", "", true, TimeSpan.FromMinutes(1)) },
+ new AssemblyName("abcdWithNamedCapture")));
+ Assert.Throws<PlatformNotSupportedException>(() =>
+ Regex.CompileToAssembly(
+ new[] { new RegexCompilationInfo(".*\\B(\\d+)(?<output>SUCCESS)\\B.*", RegexOptions.None, "withCaptures", "", true) },
+ new AssemblyName("withCaptures")));
+ Assert.Throws<PlatformNotSupportedException>(() =>
+ Regex.CompileToAssembly(
+ new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+ new AssemblyName("abcdWithCustomAttribute"),
+ new[] { new CustomAttributeBuilder(typeof(AssemblyCompanyAttribute).GetConstructor(new[] { typeof(string) }), new[] { "TestCompany" }) }));
+ }
+ [Fact]
+ [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+ public void CompileToAssembly_ResourceFile_PNSE()
+ {
+ Assert.Throws<PlatformNotSupportedException>(() =>
+ Regex.CompileToAssembly(
+ new[] { new RegexCompilationInfo("abcd", RegexOptions.None, "abcd", "", true) },
+ new AssemblyName("abcdWithUnsupportedResourceFile"),
+ attributes: null,
+ "unsupportedResourceFile"));
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
+using System.IO;
+using System.Linq;
using System.Runtime.InteropServices;
+using System.Runtime.Serialization;
+using System.Runtime.Serialization.Formatters.Binary;
using System.Threading;
using Microsoft.DotNet.RemoteExecutor;
using Xunit;
r = new Regex("[abc]def(ghi|jkl)", options | (RegexOptions)0x80 /*RegexOptions.Debug*/);
+ Assert.Equal("123456789", r.Replace("123adefghi789", "456"));
r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/);
+ Assert.Equal("123456789", r.Replace("123ghi789", "456"));
+ r = new Regex("(ghi|jkl)*ghi", options | (RegexOptions)0x80 /*RegexOptions.Debug*/, TimeSpan.FromDays(1));
+ Assert.False(r.Match("jkl").Success);
+ Assert.True(r.Match("ghi").Success);
+ Assert.Equal("123456789", r.Replace("123ghi789", "456"));
Assert.Throws<NotSupportedException>(() => r.InitializeReferences());
+ [Fact]
+ public void Ctor_CapNames_ReturnsDefaultValues()
+ {
+ var r = new DerivedRegex(@"(?<Name>\w*)");
+ Assert.Null(r.Caps);
+ IDictionary capNames = r.CapNames;
+ Assert.NotNull(capNames);
+ Assert.Same(capNames, r.CapNames);
+ Assert.True(capNames.Contains("Name"));
+ AssertExtensions.Throws<ArgumentNullException>("value", () => r.Caps = null);
+ AssertExtensions.Throws<ArgumentNullException>("value", () => r.CapNames = null);
+ r.Caps = new Dictionary<string, string>();
+ Assert.IsType<Hashtable>(r.Caps);
+ r.CapNames = new Dictionary<string, string>();
+ Assert.IsType<Hashtable>(r.CapNames);
+ var newHashtable = new Hashtable();
+ r.CapNames = newHashtable;
+ Assert.Same(newHashtable, r.CapNames);
+ r.Caps = newHashtable;
+ Assert.Same(newHashtable, r.Caps);
+ }
private sealed class DerivedRegex : Regex
+ public DerivedRegex() { }
+ public DerivedRegex(string pattern) : base(pattern) { }
public new void InitializeReferences() => base.InitializeReferences();
+ public new IDictionary Caps { get => base.Caps; set => base.Caps = value; }
+ public new IDictionary CapNames { get => base.CapNames; set => base.CapNames = value; }
+ }
+ [Fact]
+ [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+ public void Serialization_ThrowsNotSupported()
+ {
+ var r = new SerializableDerivedRegex();
+ Assert.Throws<PlatformNotSupportedException>(() => new SerializableDerivedRegex(default, default));
+ Assert.Throws<PlatformNotSupportedException>(() => ((ISerializable)r).GetObjectData(default, default));
+ }
+ [Serializable]
+ private sealed class SerializableDerivedRegex : Regex
+ {
+ public SerializableDerivedRegex() : base("") { }
+ public SerializableDerivedRegex(SerializationInfo info, StreamingContext context) : base(info, context) { }
+ }
+ [Fact]
+ [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
+ public void Ctor_PatternInName()
+ {
+ RemoteExecutor.Invoke(() =>
+ {
+ // Just make sure setting the environment variable doesn't cause problems.
+ // Short pattern
+ var r = new Regex("abc", RegexOptions.Compiled);
+ Assert.True(r.IsMatch("123abc456"));
+ // Long pattern
+ string pattern = string.Concat(Enumerable.Repeat("1234567890", 20));
+ r = new Regex(pattern, RegexOptions.Compiled);
+ Assert.True(r.IsMatch("abc" + pattern + "abc"));
+ }).Dispose();
// The .NET Foundation licenses this file to you under the MIT license.
// See the LICENSE file in the project root for more information.
+using System.Linq;
using Xunit;
namespace System.Text.RegularExpressions.Tests
public static void Escape(string str, string expected)
Assert.Equal(expected, Regex.Escape(str));
+ if (expected.Length > 0)
+ {
+ const int Count = 100;
+ Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Escape(string.Concat(Enumerable.Repeat(str, Count))));
+ }
public void Unescape(string str, string expected)
Assert.Equal(expected, Regex.Unescape(str));
+ if (expected.Length > 0)
+ {
+ const int Count = 100;
+ Assert.Equal(string.Concat(Enumerable.Repeat(expected, Count)), Regex.Unescape(string.Concat(Enumerable.Repeat(str, Count))));
+ }
yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}){2}", "acdfbcdfacefbcefbcefbcdfacdef", RegexOptions.None, new string[] { "acdfbcdfacefbcefbcefbcdf" } };
yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}hello){2}", "aaaaaacdfbcdfacefhellobcefbcefbcdfhellooooo", RegexOptions.None, new string[] { "acdfbcdfacefhellobcefbcefbcdfhello" } };
yield return new object[] { null, @"CN=(.*[^,]+).*", "CN=localhost", RegexOptions.Singleline, new string[] { "CN=localhost", "localhost" } };
// Nested atomic
yield return new object[] { null, @"(?>abc[def]gh(i*))", "123abceghiii456", RegexOptions.None, new string[] { "abceghiii", "iii" } };
+ yield return new object[] { null, @"(?>(?:abc)*)", "abcabcabc", RegexOptions.None, new string[] { "abcabcabc" } };
// Anchoring loops beginning with .* / .+
yield return new object[] { null, @".*", "", RegexOptions.None, new string[] { "" } };
yield return new object[] { @"\s+\d+", "sdf 12sad", RegexOptions.RightToLeft, 0, 9, true, " 12" };
yield return new object[] { @"\s+\d+", " asdf12 ", RegexOptions.RightToLeft, 0, 6, false, string.Empty };
yield return new object[] { "aaa", "aaabbb", RegexOptions.None, 3, 3, false, string.Empty };
+ yield return new object[] { "abc|def", "123def456", RegexOptions.RightToLeft | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 9, true, "def" };
yield return new object[] { @"foo\d+", "0123456789foo4567890foo ", RegexOptions.RightToLeft, 10, 3, false, string.Empty };
yield return new object[] { @"foo\d+", "0123456789foo4567890foo ", RegexOptions.RightToLeft, 11, 21, false, string.Empty };
VerifyMatch(r.Match(input), expectedSuccess, expectedValue);
VerifyMatch(Regex.Match(input, pattern, options), expectedSuccess, expectedValue);
+ Assert.Equal(expectedSuccess, r.IsMatch(input));
Assert.Equal(expectedSuccess, Regex.IsMatch(input, pattern, options));
Assert.Equal("a", match.Value);
- [Fact]
- public void Match_Timeout_Throws()
+ [Theory]
+ [InlineData(RegexOptions.None)]
+ [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)]
+ [InlineData(RegexOptions.Compiled)]
+ [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)]
+ public void Match_Timeout_Throws(RegexOptions options)
+ {
+ const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$";
+ string input = new string('a', 50) + "@a.a";
+ Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern, options, TimeSpan.FromMilliseconds(100)).Match(input));
+ }
+ [Theory]
+ [InlineData(RegexOptions.None)]
+ [InlineData(RegexOptions.None | (RegexOptions)0x80 /* Debug */)]
+ [InlineData(RegexOptions.Compiled)]
+ [InlineData(RegexOptions.Compiled | (RegexOptions)0x80 /* Debug */)]
+ public void Match_DefaultTimeout_Throws(RegexOptions options)
- RemoteExecutor.Invoke(() =>
+ RemoteExecutor.Invoke(optionsString =>
const string Pattern = @"^([0-9a-zA-Z]([-.\w]*[0-9a-zA-Z])*@(([0-9a-zA-Z])+([-\w]*[0-9a-zA-Z])*\.)+[a-zA-Z]{2,9})$";
string input = new string('a', 50) + "@a.a";
AppDomain.CurrentDomain.SetData(RegexHelpers.DefaultMatchTimeout_ConfigKeyName, TimeSpan.FromMilliseconds(100));
- Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern).Match(input));
- }).Dispose();
+ Assert.Throws<RegexMatchTimeoutException>(() => new Regex(Pattern, (RegexOptions)int.Parse(optionsString, CultureInfo.InvariantCulture)).Match(input));
+ }, ((int)options).ToString(CultureInfo.InvariantCulture)).Dispose();
// On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
// On 32-bit we can't test these high inputs as they cause OutOfMemoryExceptions.
[OuterLoop("Can take several seconds")]
[ConditionalTheory(typeof(Environment), nameof(Environment.Is64BitProcess))]
- [InlineData(RegexOptions.Compiled)]
+ [InlineData(RegexOptions.Compiled)]
public void Match_Timeout_Repetition_Throws(RegexOptions options)
int repetitionCount = 800_000_000;
public static IEnumerable<object[]> Replace_String_TestData()
+ yield return new object[] { @"a", "bbbb", "c", RegexOptions.None, 4, 3, "bbbb" };
+ yield return new object[] { @"", " ", "123", RegexOptions.None, 4, 0, "123 123 123 123" };
yield return new object[] { @"[^ ]+\s(?<time>)", "08/10/99 16:00", "${time}", RegexOptions.None, 14, 0, "16:00" };
yield return new object[] { "icrosoft", "MiCrOsOfT", "icrosoft", RegexOptions.IgnoreCase, 9, 0, "Microsoft" };
yield return new object[] { "dog", "my dog has fleas", "CAT", RegexOptions.IgnoreCase, 16, 0, "my CAT has fleas" };
yield return new object[] { @"(?<cat>cat)\s*(?<dog>dog)", "slkfjsdcat dogkljeah", "START${catTWO}dogcat${dogTWO}END", RegexOptions.None, 20, 0, "slkfjsdSTART${catTWO}dogcat${dogTWO}ENDkljeah" };
// RightToLeft
+ yield return new object[] { @"a", "bbbb", "c", RegexOptions.RightToLeft, 4, 3, "bbbb" };
+ yield return new object[] { @"", " ", "123", RegexOptions.RightToLeft, 4, 3, "123 123 123 123" };
yield return new object[] { @"foo\s+", "0123456789foo4567890foo ", "bar", RegexOptions.RightToLeft, 32, 32, "0123456789foo4567890bar" };
yield return new object[] { @"\d", "0123456789foo4567890foo ", "#", RegexOptions.RightToLeft, 17, 32, "##########foo#######foo " };
yield return new object[] { @"\d", "0123456789foo4567890foo ", "#", RegexOptions.RightToLeft, 7, 32, "0123456789foo#######foo " };
public static IEnumerable<object[]> Replace_MatchEvaluator_TestData()
+ yield return new object[] { "a", "bbbb", new MatchEvaluator(match => "uhoh"), RegexOptions.None, 4, 0, "bbbb" };
yield return new object[] { "(Big|Small)", "Big mountain", new MatchEvaluator(MatchEvaluator1), RegexOptions.None, 12, 0, "Huge mountain" };
yield return new object[] { "(Big|Small)", "Small village", new MatchEvaluator(MatchEvaluator1), RegexOptions.None, 13, 0, "Tiny village" };
yield return new object[] { @"\u0915\u0930.*?\b", boldInput, new MatchEvaluator(MatchEvaluatorBold), RegexOptions.CultureInvariant | RegexOptions.Singleline, boldInput.Length, 0, boldExpected };
// RighToLeft
+ yield return new object[] { "a", "bbbb", new MatchEvaluator(match => "uhoh"), RegexOptions.RightToLeft, 4, 3, "bbbb" };
yield return new object[] { @"foo\s+", "0123456789foo4567890foo ", new MatchEvaluator(MatchEvaluatorBar), RegexOptions.RightToLeft, 32, 32, "0123456789foo4567890bar" };
yield return new object[] { @"\d", "0123456789foo4567890foo ", new MatchEvaluator(MatchEvaluatorPoundSign), RegexOptions.RightToLeft, 17, 32, "##########foo#######foo " };
yield return new object[] { @"\d", "0123456789foo4567890foo ", new MatchEvaluator(MatchEvaluatorPoundSign), RegexOptions.RightToLeft, 7, 32, "0123456789foo#######foo " };
public class RegexSplitTests
- public static IEnumerable<object[]> Split_NonCompiled_TestData()
+ public static IEnumerable<object[]> Split_TestData()
+ yield return new object[] { "", "", RegexOptions.None, 0, 0, new string[] { "", "" } };
+ yield return new object[] { "123", "abc", RegexOptions.None, 3, 0, new string[] { "abc" } };
yield return new object[] { " ", "word0 word1 word2 word3", RegexOptions.None, 32, 0, new string[] { "word0", "word1", "word2", "word3" } };
yield return new object[] { ":", "kkk:lll:mmm:nnn:ooo", RegexOptions.None, 19, 0, new string[] { "kkk", "lll", "mmm", "nnn", "ooo" } };
yield return new object[] { "a(?<dot1>.)c(.)e", "123abcde456aBCDe789", RegexOptions.IgnoreCase, 19, 0, new string[] { "123", "d", "b", "456", "D", "B", "789" } };
// RightToLeft
+ yield return new object[] { "", "", RegexOptions.RightToLeft, 0, 0, new string[] { "", "" } };
+ yield return new object[] { "123", "abc", RegexOptions.RightToLeft, 3, 0, new string[] { "abc" } };
yield return new object[] { "a(.)c(.)e", "123abcde456aBCDe789", RegexOptions.RightToLeft, 19, 19, new string[] { "123", "d", "b", "456aBCDe789" } };
yield return new object[] { "a(.)c(.)e", "123abcde456aBCDe789", RegexOptions.RightToLeft | RegexOptions.IgnoreCase, 19, 19, new string[] { "123", "d", "b", "456", "D", "B", "789" } };
- [MemberData(nameof(Split_NonCompiled_TestData))]
- [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Split_NonCompiled_TestData), 2, MemberType = typeof(RegexCompilationHelper))]
+ [MemberData(nameof(Split_TestData))]
+ [MemberData(nameof(RegexCompilationHelper.TransformRegexOptions), nameof(Split_TestData), 2, MemberType = typeof(RegexCompilationHelper))]
public void Split(string pattern, string input, RegexOptions options, int count, int start, string[] expected)
bool isDefaultStart = RegexHelpers.IsDefaultStart(input, options, start);
+ [Theory]
+ [InlineData("ab", 1, false)]
+ [InlineData("a b", 1, true)]
+ [InlineData("a b", 2, true)]
+ [InlineData("\u200Da", 1, false)]
+ [InlineData("\u200D\u200C", 1, false)]
+ [InlineData("\u200Ca", 1, false)]
+ [InlineData("\u200C a", 1, true)]
+ public void IsBoundary_ReturnsExpectedResult(string text, int pos, bool expectedBoundary)
+ {
+ var r = new DerivedRunner(text);
+ Assert.Equal(expectedBoundary, r.IsBoundary(pos, 0, text.Length));
+ }
private static HashSet<char> ComputeIncludedSet(Func<char, bool> func)
var included = new HashSet<char>();
Assert.Throws<XunitException>(() => ValidateSet("[b]", RegexOptions.None, null, new HashSet<char>() { 'b' }, validateEveryChar: true));
+ [Fact]
+ public void RegexRunner_Legacy_CharInSet()
+ {
+ Assert.True(DerivedRunner.CharInSet('a', "ab", ""));
+ Assert.False(DerivedRunner.CharInSet('x', "ab", ""));
+ Assert.True(DerivedRunner.CharInSet('x', "\0\0ab", ""));
+ Assert.False(DerivedRunner.CharInSet('a', "\0\0ab", ""));
+ Assert.True(DerivedRunner.CharInSet('4', "", "\x0009"));
+ Assert.False(DerivedRunner.CharInSet('a', "", "\x0009"));
+ Assert.True(DerivedRunner.CharInSet('4', "xz", "\x0009"));
+ Assert.True(DerivedRunner.CharInSet('a', "az", "\x0009"));
+ Assert.False(DerivedRunner.CharInSet('a', "xz", "\x0009"));
+ }
+ private sealed class DerivedRunner : RegexRunner
+ {
+ public DerivedRunner() { }
+ public DerivedRunner(string text)
+ {
+ runtext = text;
+ runtextbeg = 0;
+ runtextstart = 0;
+ runtextend = text.Length;
+ runtextpos = 0;
+ }
+ public new bool IsBoundary(int index, int startpos, int endpos) => base.IsBoundary(index, startpos, endpos);
+ public static new bool CharInSet(char ch, string set, string category) => RegexRunner.CharInSet(ch, set, category);
+ protected override bool FindFirstChar() => throw new NotImplementedException();
+ protected override void Go() => throw new NotImplementedException();
+ protected override void InitTrackCount() => throw new NotImplementedException();
+ }
private static void ValidateSet(string regex, RegexOptions options, HashSet<char> included, HashSet<char> excluded, bool validateEveryChar = false)
Assert.True((included != null) ^ (excluded != null));
[InlineData(@"[a-[b]] ", RegexOptions.None, null)]
[InlineData(@"[a-b-[c]] ", RegexOptions.None, null)]
[InlineData(@"[a-[b]-c] ", RegexOptions.None, RegexParseError.SubtractionMustBeLast)]
+ [InlineData(@"[a-z-[b]12]", RegexOptions.None, RegexParseError.SubtractionMustBeLast)]
[InlineData(@"[[a]-b] ", RegexOptions.None, null)]
[InlineData(@"[[a]-[b]] ", RegexOptions.None, null)]
[InlineData(@"[\w-a] ", RegexOptions.None, null)]
[InlineData(@"(?imn", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
[InlineData(@"(?'cat'", RegexOptions.None, RegexParseError.NotEnoughParentheses)]
[InlineData(@"(?'", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
+ [InlineData(@"(?'=)", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
+ [InlineData(@"(?'!)", RegexOptions.None, RegexParseError.UnrecognizedGrouping)]
[InlineData(@"[^", RegexOptions.None, RegexParseError.UnterminatedBracket)]
[InlineData(@"[cat", RegexOptions.None, RegexParseError.UnterminatedBracket)]
[InlineData(@"[^cat", RegexOptions.None, RegexParseError.UnterminatedBracket)]
[InlineData("[^a]{1,3}[^a]+", "[^a]{2,}")]
[InlineData("[^a]{1,3}[^a]?", "[^a]{1,4}")]
[InlineData("[^a]{1,3}[^a]{1,3}", "[^a]{2,6}")]
+ // Two lazy notone loops
+ [InlineData("[^a]*?[^a]*?", "[^a]*?")]
+ [InlineData("[^a]*?[^a]+?", "[^a]+?")]
+ [InlineData("[^a]*?[^a]??", "[^a]*?")]
+ [InlineData("[^a]*?[^a]{1,3}?", "[^a]+?")]
+ [InlineData("[^a]+?[^a]*?", "[^a]+?")]
+ [InlineData("[^a]+?[^a]+?", "[^a]{2,}?")]
+ [InlineData("[^a]+?[^a]??", "[^a]+?")]
+ [InlineData("[^a]+?[^a]{1,3}?", "[^a]{2,}?")]
+ [InlineData("[^a]??[^a]*?", "[^a]*?")]
+ [InlineData("[^a]??[^a]+?", "[^a]+?")]
+ [InlineData("[^a]??[^a]??", "[^a]{0,2}?")]
+ [InlineData("[^a]??[^a]{1,3}?", "[^a]{1,4}?")]
+ [InlineData("[^a]{1,3}?[^a]*?", "[^a]+?")]
+ [InlineData("[^a]{1,3}?[^a]+?", "[^a]{2,}?")]
+ [InlineData("[^a]{1,3}?[^a]??", "[^a]{1,4}?")]
+ [InlineData("[^a]{1,3}?[^a]{1,3}?", "[^a]{2,6}?")]
// Two atomic notone loops
[InlineData("(?>[^a]*)(?>[^a]*)", "(?>[^a]*)")]
[InlineData("(?>[^a]*)(?>[^a]+)", "(?>[^a]+)")]
[InlineData("(?>[^a]{1,3})(?>[^a]+)", "(?>[^a]{2,})")]
[InlineData("(?>[^a]{1,3})(?>[^a]?)", "(?>[^a]{1,4})")]
[InlineData("(?>[^a]{1,3})(?>[^a]{1,3})", "(?>[^a]{2,6})")]
- // Greedy notone loop and one
+ // Greedy notone loop and notone
[InlineData("[^a]*[^a]", "[^a]+")]
[InlineData("[^a]+[^a]", "[^a]{2,}")]
[InlineData("[^a]?[^a]", "[^a]{1,2}")]
[InlineData("[^a][^a]+", "[^a]{2,}")]
[InlineData("[^a][^a]?", "[^a]{1,2}")]
[InlineData("[^a][^a]{1,3}", "[^a]{2,4}")]
- // Atomic notone loop and one
+ // Lazy notone loop and notone
+ [InlineData("[^a]*?[^a]", "[^a]+?")]
+ [InlineData("[^a]+?[^a]", "[^a]{2,}?")]
+ [InlineData("[^a]??[^a]", "[^a]{1,2}?")]
+ [InlineData("[^a]{1,3}?[^a]", "[^a]{2,4}?")]
+ [InlineData("[^a][^a]*?", "[^a]+?")]
+ [InlineData("[^a][^a]+?", "[^a]{2,}?")]
+ [InlineData("[^a][^a]??", "[^a]{1,2}?")]
+ [InlineData("[^a][^a]{1,3}?", "[^a]{2,4}?")]
+ // Atomic notone loop and notone
[InlineData("(?>[^a]*)[^a]", "(?>[^a]+)")]
[InlineData("(?>[^a]+)[^a]", "(?>[^a]{2,})")]
[InlineData("(?>[^a]?)[^a]", "(?>[^a]{1,2})")]
[InlineData("[0-9]{1,3}?[0-9]+?", "[0-9]{2,}?")]
[InlineData("[0-9]{1,3}?[0-9]??", "[0-9]{1,4}?")]
[InlineData("[0-9]{1,3}?[0-9]{1,3}?", "[0-9]{2,6}?")]
+ // Two atomic set loops
+ [InlineData("(?>[0-9]*)(?>[0-9]*)", "(?>[0-9]*)")]
+ [InlineData("(?>[0-9]*)(?>[0-9]+)", "(?>[0-9]+)")]
+ [InlineData("(?>[0-9]*)(?>[0-9]?)", "(?>[0-9]*)")]
+ [InlineData("(?>[0-9]*)(?>[0-9]{1,3})", "(?>[0-9]+)")]
+ [InlineData("(?>[0-9]+)(?>[0-9]*)", "(?>[0-9]+)")]
+ [InlineData("(?>[0-9]+)(?>[0-9]+)", "(?>[0-9]{2,})")]
+ [InlineData("(?>[0-9]+)(?>[0-9]?)", "(?>[0-9]+)")]
+ [InlineData("(?>[0-9]+)(?>[0-9]{1,3})", "(?>[0-9]{2,})")]
+ [InlineData("(?>[0-9]?)(?>[0-9]*)", "(?>[0-9]*)")]
+ [InlineData("(?>[0-9]?)(?>[0-9]+)", "(?>[0-9]+)")]
+ [InlineData("(?>[0-9]?)(?>[0-9]?)", "(?>[0-9]{0,2})")]
+ [InlineData("(?>[0-9]?)(?>[0-9]{1,3})", "(?>[0-9]{1,4})")]
+ [InlineData("(?>[0-9]{1,3})(?>[0-9]*)", "(?>[0-9]+)")]
+ [InlineData("(?>[0-9]{1,3})(?>[0-9]+)", "(?>[0-9]{2,})")]
+ [InlineData("(?>[0-9]{1,3})(?>[0-9]?)", "(?>[0-9]{1,4})")]
+ [InlineData("(?>[0-9]{1,3})(?>[0-9]{1,3})", "(?>[0-9]{2,6})")]
// Lazy set loop and set
[InlineData("[0-9]*?[0-9]", "[0-9]+?")]
[InlineData("[0-9]+?[0-9]", "[0-9]{2,}?")]
[InlineData("a*a*?", "a*")]
[InlineData("a*?a*", "a*")]
[InlineData("a*[^a]*", "a*")]
- [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
- [InlineData("[ab]*[^a]*", "(?>[ab]*)[^a]*")]
- [InlineData("[ab]*[^a]*?", "(?>[ab]*)[^a]*?")]
- [InlineData("[ab]*(?>[^a]*)", "(?>[ab]*)(?>[^a]*)")]
[InlineData("[^a]*a*", "a*")]
[InlineData("a{2147483646}a", "a{2147483647}")]
[InlineData("a{2147483647}a", "a{2147483647}")]
[InlineData("abc(?:(?i:e)|f)", "abc[ef]")]
// Not applying auto-atomicity
[InlineData("a*b*", "(?>a*)b*")]
+ [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
+ [InlineData("[ab]*[^a]*", "(?>[ab]*)[^a]*")]
+ [InlineData("[ab]*[^a]*?", "(?>[ab]*)[^a]*?")]
+ [InlineData("[ab]*(?>[^a]*)", "(?>[ab]*)(?>[^a]*)")]
[InlineData("[^\n]*\n*", "(?>[^\n]*)\n")]
[InlineData("(a[bcd]a*)*fg", "(a[bcd](?>a*))*fg")]
[InlineData("(\\w[bcd]\\d*)*fg", "(\\w[bcd](?>\\d*))*fg")]
+ [InlineData("a*(?<=[^a])b", "(?>a*)(?<=[^a])b")]
+ [InlineData("[\x0000-\xFFFF]*[a-z]", "(?>[\x0000-\xFFFF]*)[a-z]")]
+ [InlineData("[a-z]*[\x0000-\xFFFF]+", "(?>[a-z]*)[\x0000-\xFFFF]+")]
+ [InlineData("[^a-c]*[e-g]", "(?>[^a-c]*)[e-g]")]
+ [InlineData("[^a-c]*[^e-g]", "(?>[^a-c]*)[^e-g]")]
public void PatternsReduceDifferently(string pattern1, string pattern2)
var r1 = new Regex(pattern1);
[InlineData(@"a*a*a*a*a*a*a*b*", 0)]
[InlineData(@"((a{1,2}){4}){3,7}", 12)]
[InlineData(@"\b\w{4}\b", 4)]
+ [InlineData(@"abcd(?=efgh)efgh", 8)]
+ [InlineData(@"abcd(?<=cd)efgh", 8)]
+ [InlineData(@"abcd(?!ab)efgh", 8)]
+ [InlineData(@"abcd(?<!ef)efgh", 8)]
// we stop computing after a certain depth; if that logic changes in the future, these tests can be updated
[InlineData(@"((((((((((((((((((((((((((((((ab|cd+)|ef+)|gh+)|ij+)|kl+)|mn+)|op+)|qr+)|st+)|uv+)|wx+)|yz+)|01+)|23+)|45+)|67+)|89+)|AB+)|CD+)|EF+)|GH+)|IJ+)|KL+)|MN+)|OP+)|QR+)|ST+)|UV+)|WX+)|YZ)", 0)]
[InlineData(@"(YZ+|(WX+|(UV+|(ST+|(QR+|(OP+|(MN+|(KL+|(IJ+|(GH+|(EF+|(CD+|(AB+|(89+|(67+|(45+|(23+|(01+|(yz+|(wx+|(uv+|(st+|(qr+|(op+|(mn+|(kl+|(ij+|(gh+|(ef+|(de+|(a|bc+)))))))))))))))))))))))))))))))", 0)]