remove [::] treatment (#88590)
authorDan Moseley <danmose@microsoft.com>
Tue, 11 Jul 2023 16:42:37 +0000 (11:42 -0500)
committerGitHub <noreply@github.com>
Tue, 11 Jul 2023 16:42:37 +0000 (18:42 +0200)
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/Regex.Match.Tests.cs
src/libraries/System.Text.RegularExpressions/tests/FunctionalTests/RegexParserTests.netcoreapp.cs

index 4348c6b..1716511 100644 (file)
@@ -668,21 +668,6 @@ namespace System.Text.RegularExpressions
                             break; // this break will only break out of the switch
                     }
                 }
-                else if (ch == '[')
-                {
-                    // This is code for Posix style properties - [:Ll:] or [:IsTibetan:].
-                    // It currently doesn't do anything other than skip the whole thing!
-                    if (_pos < _pattern.Length && _pattern[_pos] == ':' && !inRange)
-                    {
-                        int savePos = _pos;
-
-                        _pos++;
-                        if (_pos + 1 >= _pattern.Length || _pattern[_pos++] != ':' || _pattern[_pos++] != ']')
-                        {
-                            _pos = savePos;
-                        }
-                    }
-                }
 
                 if (inRange)
                 {
index d3e2d45..43bd39f 100644 (file)
@@ -917,6 +917,15 @@ namespace System.Text.RegularExpressions.Tests
                 yield return (@"^(?i:[\u24B6-\u24D0])$", ((char)('\u24CF' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u24CF' + 26)).ToString());
             }
 
+            // [:XX:] inside a range has no special treatment; the [:XX: is literal, the ] closes the range
+            if (PlatformDetection.IsNetCore)
+            {
+                yield return (@"[[::]]", "x", RegexOptions.None, 0, 1, false, "");
+                yield return (@"[[:a:]]", "a]", RegexOptions.None, 0, 2, true, "a]");
+                yield return (@"[c[:ab:]", "c", RegexOptions.None, 0, 1, true, "c");
+                yield return (@"[c[:ab:]{3}d]", "abcd]", RegexOptions.None, 0, 5, true, "abcd]");
+            }
+
             // Long inputs
             string longCharacterRange = string.Concat(Enumerable.Range(1, 0x2000).Select(c => (char)c));
             foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.IgnoreCase })
index 13ff23b..a16b59a 100644 (file)
@@ -85,6 +85,10 @@ namespace System.Text.RegularExpressions.Tests
         [InlineData("a{0,2147483648}", RegexOptions.None, RegexParseError.QuantifierOrCaptureGroupOutOfRange, 14)]
         // Surrogate pair which is parsed as [char,char-char,char] as we operate on UTF-16 code units.
         [InlineData("[\uD82F\uDCA0-\uD82F\uDCA3]", RegexOptions.IgnoreCase, RegexParseError.ReversedCharacterRange, 5)]
+        // [ inside a range is treated literally
+        [InlineData(@"[[::]", RegexOptions.None, null)]
+        [InlineData(@"[[:X:]", RegexOptions.None, null)]
+        [InlineData(@"[[:ab:]", RegexOptions.None, null)]
 
         // Following are borrowed from Rust regex tests ============
         // https://github.com/rust-lang/regex/blob/master/tests/noparse.rs