Factor out and improve the vectorization of RegexInterpreter.FindFirstChar (#61490)

author Stephen Toub <stoub@microsoft.com>

Wed, 17 Nov 2021 16:41:12 +0000 (11:41 -0500)

committer GitHub <noreply@github.com>

Wed, 17 Nov 2021 16:41:12 +0000 (11:41 -0500)
author Stephen Toub <stoub@microsoft.com>
Wed, 17 Nov 2021 16:41:12 +0000 (11:41 -0500)
committer GitHub <noreply@github.com>
Wed, 17 Nov 2021 16:41:12 +0000 (11:41 -0500)
diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs

index 46eb36b..fd83096 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs
@@ -231,10 +231,8 @@ namespace System.Text.RegularExpressions.Generator
          {
              RegexOptions options = (RegexOptions)rm.Options;
              RegexCode code = rm.Code;
-            (string CharClass, bool CaseInsensitive)[]? lcc = code.LeadingCharClasses;
              bool rtl = code.RightToLeft;
              bool hasTextInfo = false;
-            bool textInfoEmitted = false;
  
              // Emit locals initialization
              writer.WriteLine("string runtext = base.runtext!;");
@@ -267,34 +265,52 @@ namespace System.Text.RegularExpressions.Generator
                  };
              using (EmitBlock(writer, clause))
              {
-                EmitAnchors();
-
-                if (code.BoyerMoorePrefix is RegexBoyerMoore { NegativeUnicode: null } rbm)
-                {
-                    if (rbm.PatternSupportsIndexOf)
-                    {
-                        EmitIndexOf(rbm.Pattern);
-                    }
-                    else
-                    {
-                        EmitBoyerMoore(rbm);
-                    }
-                }
-                else if (lcc is not null)
+                // Emit any anchors.
+                if (!EmitAnchors())
                  {
-                    if (rtl)
-                    {
-                        EmitLeadingCharacter_RightToLeft();
-                    }
-                    else
+                    // Either anchors weren't specified, or they don't completely root all matches to a specific location.
+
+                    // If whatever search operation we need to perform entails case-insensitive operations
+                    // that weren't already handled via creation of sets, we need to get an store the
+                    // TextInfo object to use (unless RegexOptions.CultureInvariant was specified).
+                    EmitTextInfo(writer, ref hasTextInfo, rm);
+
+                    // Emit the code for whatever find mode has been determined.
+                    switch (code.FindOptimizations.FindMode)
                      {
-                        EmitLeadingCharacter_LeftToRight();
+                        case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
+                            Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix));
+                            EmitIndexOf_LeftToRight(code.FindOptimizations.LeadingCaseSensitivePrefix);
+                            break;
+
+                        case FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive:
+                            Debug.Assert(!string.IsNullOrEmpty(code.FindOptimizations.LeadingCaseSensitivePrefix));
+                            EmitIndexOf_RightToLeft(code.FindOptimizations.LeadingCaseSensitivePrefix);
+                            break;
+
+                        case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive:
+                        case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive:
+                        case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
+                        case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive:
+                            Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
+                            EmitFixedSet_LeftToRight();
+                            break;
+
+                        case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive:
+                        case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive:
+                            Debug.Assert(code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
+                            EmitFixedSet_RightToLeft();
+                            break;
+
+                        default:
+                            Debug.Fail($"Unexpected mode: {code.FindOptimizations.FindMode}");
+                            goto case FindNextStartingPositionMode.NoSearch;
+
+                        case FindNextStartingPositionMode.NoSearch:
+                            writer.WriteLine("return true;");
+                            break;
                      }
                  }
-                else
-                {
-                    writer.WriteLine("return true;");
-                }
              }
              writer.WriteLine();
  
@@ -303,15 +319,15 @@ namespace System.Text.RegularExpressions.Generator
              writer.WriteLine(!rm.Code.RightToLeft ? "base.runtextpos = runtextend;" : "base.runtextpos = runtextbeg;");
              writer.WriteLine("return false;");
  
-            void EmitAnchors()
+            // Emits any anchors.  Returns true if the anchor roots any match to a specific location and thus no further
+            // searching is required; otherwise, false.
+            bool EmitAnchors()
              {
                  // Generate anchor checks.
-                if ((code.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0)
+                if ((code.FindOptimizations.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0)
                  {
-                    // TODO: RegexInterpreter also factors in a Boyer-Moore prefix check in places Compiled just returns true.
-                    // Determine if we should do so here and in Compiled as well, and potentially update RegexInterpreter.
-                    // Interpreted and Compiled also differ in various places as to whether they update positions, as do LTR vs RTL. Determine why.
-                    switch (code.LeadingAnchor)
+                    // TODO: Interpreted and Compiled differ in various places as to whether they update positions, as do LTR vs RTL. Determine why.
+                    switch (code.FindOptimizations.LeadingAnchor)
                      {
                          case RegexPrefixAnalyzer.Beginning:
                              writer.WriteLine("// Beginning \\A anchor");
@@ -331,7 +347,7 @@ namespace System.Text.RegularExpressions.Generator
                                  }
                              }
                              writer.WriteLine("return true;");
-                            return;
+                            return true;
  
                          case RegexPrefixAnalyzer.Start:
                              writer.WriteLine("// Start \\G anchor");
@@ -351,7 +367,7 @@ namespace System.Text.RegularExpressions.Generator
                                  }
                              }
                              writer.WriteLine("return true;");
-                            return;
+                            return true;
  
                          case RegexPrefixAnalyzer.EndZ:
                              // TODO: Why are the LTR and RTL cases inconsistent here with RegexOptions.Compiled?
@@ -372,9 +388,9 @@ namespace System.Text.RegularExpressions.Generator
                                  }
                              }
                              writer.WriteLine("return true;");
-                            return;
+                            return true;
  
-                        case RegexPrefixAnalyzer.End when minRequiredLength == 0: // if it's > 0, we already output a more stringent check
+                        case RegexPrefixAnalyzer.End:
                              writer.WriteLine("// End \\z anchor");
                              if (!rtl)
                              {
@@ -391,14 +407,14 @@ namespace System.Text.RegularExpressions.Generator
                                  }
                              }
                              writer.WriteLine("return true;");
-                            return;
+                            return true;
  
-                        case RegexPrefixAnalyzer.Bol when !rtl: // Don't bother optimizing for the niche case of RegexOptions.RightToLeft | RegexOptions.Multiline
+                        case RegexPrefixAnalyzer.Bol:
                              // Optimize the handling of a Beginning-Of-Line (BOL) anchor.  BOL is special, in that unlike
                              // other anchors like Beginning, there are potentially multiple places a BOL can match.  So unlike
                              // the other anchors, which all skip all subsequent processing if found, with BOL we just use it
-                            // to boost our position to the next line, and then continue normally with any Boyer-Moore or
-                            // leading char class searches.
+                            // to boost our position to the next line, and then continue normally with any searches.
+                            Debug.Assert(!rtl, "RightToLeft isn't implemented and should have been filtered out previously");
                              writer.WriteLine("// Beginning-of-line anchor");
                              using (EmitBlock(writer, "if (runtextpos > runtextbeg && runtext[runtextpos - 1] != '\\n')"))
                              {
@@ -413,131 +429,12 @@ namespace System.Text.RegularExpressions.Generator
                              break;
                      }
                  }
-            }
-
-            void EmitBoyerMoore(RegexBoyerMoore rbm)
-            {
-                EmitTextInfoIfRequired(writer, ref textInfoEmitted, ref hasTextInfo, rm);
-
-                int beforefirst;
-                int last;
-                if (!rtl)
-                {
-                    //limitLocal = "runtextend";
-                    beforefirst = -1;
-                    last = rbm.Pattern.Length - 1;
-                }
-                else
-                {
-                    //limitLocal = "runtextbeg";
-                    beforefirst = rbm.Pattern.Length;
-                    last = 0;
-                }
-
-                int chLast = rbm.Pattern[last];
-
-                EmitAdd(writer, "runtextpos", !rtl ? rbm.Pattern.Length - 1 : -rbm.Pattern.Length);
-
-                using (EmitBlock(writer, $"while ({(!rtl ? "runtextpos < runtextend" : "runtextpos >= runtextbeg")})"))
-                {
-                    writer.WriteLine($"ch = {ToLowerIfNeeded(hasTextInfo, options, "runtext[runtextpos]", rbm.CaseInsensitive)};");
-
-                    using (EmitBlock(writer, $"if (ch != {Literal((char)chLast)})"))
-                    {
-                        writer.WriteLine($"ch -= {Literal((char)rbm.LowASCII)};");
-                        using (EmitBlock(writer, $"if ((uint)ch > ({Literal((char)rbm.HighASCII)} - {Literal((char)rbm.LowASCII)}))"))
-                        {
-                            EmitAdd(writer, "runtextpos", (!rtl ? rbm.Pattern.Length : -rbm.Pattern.Length));
-                            writer.WriteLine("continue;");
-                        }
-
-                        int negativeRange = rbm.HighASCII - rbm.LowASCII + 1;
-                        if (negativeRange > 1) // High > Low
-                        {
-                            // Create a string to store the lookup table we use to find the offset.
-                            // Store the offsets into the string.  RightToLeft has negative offsets, so to support it with chars (unsigned), we negate
-                            // the values to be stored in the string, and then at run time after looking up the offset in the string, negate it again.
-                            Debug.Assert(rbm.Pattern.Length <= char.MaxValue, "RegexBoyerMoore should have limited the size allowed.");
-                            Span<char> span = new char[negativeRange];
-                            for (int i = 0; i < span.Length; i++)
-                            {
-                                int offset = rbm.NegativeASCII[i + rbm.LowASCII];
-                                if (offset == beforefirst)
-                                {
-                                    offset = rbm.Pattern.Length;
-                                }
-                                else if (rtl)
-                                {
-                                    offset = -offset;
-                                }
-                                Debug.Assert(offset >= 0 && offset <= char.MaxValue);
-                                span[i] = (char)offset;
-                            }
-
-                            writer.WriteLine($"runtextpos {(rtl ? "-=" : "+=")} {Literal(span.ToString())}[ch];");
-                        }
-                        else
-                        {
-                            Debug.Assert(negativeRange == 1); // High == Low
-                            int offset = rbm.NegativeASCII[rbm.LowASCII];
-                            if (offset == beforefirst)
-                            {
-                                offset = rtl ? -rbm.Pattern.Length : rbm.Pattern.Length;
-                            }
-                            EmitAdd(writer, "runtextpos", offset);
-                        }
-                        writer.WriteLine("continue;");
-                    }
-                    writer.WriteLine();
-                    writer.WriteLine("int test = runtextpos;");
-                    writer.WriteLine();
-
-                    for (int i = rbm.Pattern.Length - 2; i >= 0; i--)
-                    {
-                        int charIndex = !rtl ? i : rbm.Pattern.Length - 1 - i;
-                        bool sameAsPrev = i < rbm.Pattern.Length - 2 && rbm.Positive[charIndex] == rbm.Positive[!rtl ? i + 1 : rbm.Pattern.Length - 1 - (i + 1)];
-                        bool sameAsNext = i > 0 && rbm.Positive[charIndex] == rbm.Positive[!rtl ? i - 1 : rbm.Pattern.Length - 1 - (i - 1)];
-
-                        string condition = $"{ToLowerIfNeeded(hasTextInfo, options, (!rtl ? "runtext[--test]" : "runtext[++test]"), rbm.CaseInsensitive && RegexCharClass.ParticipatesInCaseConversion(rbm.Pattern[charIndex]))} != {Literal(rbm.Pattern[charIndex])}";
-                        switch ((sameAsPrev, sameAsNext))
-                        {
-                            case (true, true):
-                                writer.WriteLine($"    {condition} ||");
-                                break;
-
-                            case (false, true):
-                                writer.WriteLine($"if ({condition} ||");
-                                break;
-
-                            case (true, false):
-                                writer.WriteLine($"    {condition})");
-                                using (EmitBlock(writer, null))
-                                {
-                                    EmitAdd(writer, "runtextpos", rbm.Positive[charIndex]);
-                                    writer.WriteLine("continue;");
-                                }
-                                writer.WriteLine();
-                                break;
  
-                            case (false, false):
-                                using (EmitBlock(writer, $"if ({condition})"))
-                                {
-                                    EmitAdd(writer, "runtextpos", rbm.Positive[charIndex]);
-                                    writer.WriteLine("continue;");
-                                }
-                                writer.WriteLine();
-                                break;
-                        }
-                    }
-
-                    writer.WriteLine(!rtl ?
-                        "base.runtextpos = test;" :
-                        "base.runtextpos = test + 1;");
-                    writer.WriteLine("return true;");
-                }
+                return false;
              }
  
-            void EmitIndexOf(string prefix)
+            // Emits a case-sensitive left-to-right prefix search for a string at the beginning of the pattern.
+            void EmitIndexOf_LeftToRight(string prefix)
              {
                  writer.WriteLine($"int i = global::System.MemoryExtensions.IndexOf(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, runtextend - runtextpos), {Literal(prefix)});");
                  writer.WriteLine("if (i >= 0)");
@@ -547,87 +444,89 @@ namespace System.Text.RegularExpressions.Generator
                  writer.WriteLine("}");
              }
  
-            void EmitLeadingCharacter_RightToLeft()
+            // Emits a case-sensitive right-to-left prefix search for a string at the beginning of the pattern.
+            void EmitIndexOf_RightToLeft(string prefix)
              {
-                EmitTextInfoIfRequired(writer, ref textInfoEmitted, ref hasTextInfo, rm);
+                writer.WriteLine($"int i = global::System.MemoryExtensions.LastIndexOf(global::System.MemoryExtensions.AsSpan(runtext, runtextbeg, runtextpos - runtextbeg), {Literal(prefix)});");
+                writer.WriteLine("if (i >= 0)");
+                writer.WriteLine("{");
+                writer.WriteLine($"    base.runtextpos = runtextbeg + i + {prefix.Length};");
+                writer.WriteLine("    return true;");
+                writer.WriteLine("}");
+            }
  
-                Debug.Assert(lcc.Length == 1, "Only the FirstChars and not MultiFirstChars computation is supported for RightToLeft");
-                string set = lcc[0].CharClass;
-                if (RegexCharClass.IsSingleton(set))
+            // Emits a right-to-left search for a set at a fixed position from the start of the pattern.
+            // (Currently that position will always be a distance of 0, meaning the start of the pattern itself.)
+            void EmitFixedSet_RightToLeft()
+            {
+                (char[]? Chars, string Set, int Distance, bool CaseInsensitive) set = code.FindOptimizations.FixedDistanceSets![0];
+                Debug.Assert(set.Distance == 0);
+
+                if (set.Chars is { Length: 1 } && !set.CaseInsensitive)
                  {
-                    char ch = RegexCharClass.SingletonChar(set);
-                    using (EmitBlock(writer, "for (int i = runtextpos - 1; i >= runtextbeg; i--)"))
-                    {
-                        using (EmitBlock(writer, $"if (runtext[i] == {ToLowerIfNeeded(hasTextInfo, options, Literal(ch), lcc[0].CaseInsensitive)})"))
-                        {
-                            writer.WriteLine("base.runtextpos = i + 1;");
-                            writer.WriteLine("return true;");
-                        }
-                    }
+                    writer.WriteLine($"int i = global::System.MemoryExtensions.LastIndexOf(global::System.MemoryExtensions.AsSpan(runtext, runtextbeg, runtextpos - runtextbeg), {Literal(set.Chars[0])});");
+                    writer.WriteLine("if (i >= 0)");
+                    writer.WriteLine("{");
+                    writer.WriteLine("    base.runtextpos = runtextbeg + i + 1;");
+                    writer.WriteLine("    return true;");
+                    writer.WriteLine("}");
                  }
                  else
                  {
                      using (EmitBlock(writer, "for (int i = runtextpos - 1; i >= runtextbeg; i--)"))
                      {
-                        using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "runtext[i]", set, lcc[0].CaseInsensitive)})"))
+                        using (EmitBlock(writer, $"if ({MatchCharacterClass(hasTextInfo, options, "runtext[i]", set.Set, set.CaseInsensitive)})"))
                          {
-                            writer.WriteLine("runtextpos = i + 1;");
+                            writer.WriteLine("base.runtextpos = i + 1;");
                              writer.WriteLine("return true;");
                          }
                      }
                  }
              }
  
-            void EmitLeadingCharacter_LeftToRight()
+            // Emits a left-to-right search for a set at a fixed position from the start of the pattern,
+            // and potentially other sets at other fixed positions in the pattern.
+            void EmitFixedSet_LeftToRight()
              {
-                Debug.Assert(lcc is not null && lcc.Length > 0);
-
-                // If minRequiredLength > 0, we already output a more stringent check.  In the rare case
-                // where we were unable to get an accurate enough min required length to ensure it's larger
-                // than the prefixes we calculated, we also need to ensure we have enough space for those,
-                // as they also represent a min required length.
-                if (minRequiredLength < lcc.Length)
-                {
-                    writer.WriteLine($"// Validate at least {lcc.Length} characters are available to match");
-                    string endExpr = lcc.Length > 1 ? $"runtextend - {lcc.Length - 1}" : "runtextend";
-                    using (EmitBlock(writer, $"if (runtextpos >= {endExpr})"))
-                    {
-                        writer.WriteLine("goto ReturnFalse;");
-                    }
-                    writer.WriteLine();
-                }
-
-                writer.WriteLine("global::System.ReadOnlySpan<char> span = global::System.MemoryExtensions.AsSpan(runtext, runtextpos, runtextend - runtextpos);");
+                List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = code.FindOptimizations.FixedDistanceSets;
+                (char[]? Chars, string Set, int Distance, bool CaseInsensitive) primarySet = sets![0];
+                const int MaxSets = 4;
+                int setsToUse = Math.Min(sets.Count, MaxSets);
  
                  // If we can use IndexOf{Any}, try to accelerate the skip loop via vectorization to match the first prefix.
                  // We can use it if this is a case-sensitive class with a small number of characters in the class.
-                Span<char> setChars = stackalloc char[3]; // up to 3 characters handled by IndexOf{Any} below
-                int setCharsCount = 0, charClassIndex = 0;
-                bool canUseIndexOf =
-                    !lcc[0].CaseInsensitive &&
-                    (setCharsCount = RegexCharClass.GetSetChars(lcc[0].CharClass, setChars)) > 0 &&
-                    !RegexCharClass.IsNegated(lcc[0].CharClass);
-                bool needLoop = !canUseIndexOf || lcc.Length > 1;
+                int setIndex = 0;
+                bool canUseIndexOf = !primarySet.CaseInsensitive && primarySet.Chars is not null;
+                bool needLoop = !canUseIndexOf || setsToUse > 1;
  
                  FinishEmitScope loopBlock = default;
                  if (needLoop)
                  {
-                    EmitTextInfoIfRequired(writer, ref textInfoEmitted, ref hasTextInfo, rm);
-                    writer.WriteLine();
-                    string upperBound = lcc.Length > 1 ? $"span.Length - {lcc.Length - 1}" : "span.Length";
+                    writer.WriteLine("global::System.ReadOnlySpan<char> span = global::System.MemoryExtensions.AsSpan(runtext, runtextpos, runtextend - runtextpos);");
+                    string upperBound = "span.Length" + (setsToUse > 1 || primarySet.Distance != 0 ? $" - {minRequiredLength - 1}" : "");
                      loopBlock = EmitBlock(writer, $"for (int i = 0; i < {upperBound}; i++)");
                  }
  
                  if (canUseIndexOf)
                  {
-                    charClassIndex = 1;
+                    string span = needLoop ?
+                        "span" :
+                        "global::System.MemoryExtensions.AsSpan(runtext, runtextpos, runtextend - runtextpos)";
  
-                    string span = needLoop ? "span.Slice(i)" : "span";
-                    string indexOf = setCharsCount switch
+                    span = (needLoop, primarySet.Distance) switch
                      {
-                        1 => $"global::System.MemoryExtensions.IndexOf({span}, {Literal(setChars[0])})",
-                        2 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(setChars[0])}, {Literal(setChars[1])})",
-                        _ => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])})",
+                        (false, 0) => span,
+                        (true, 0) => $"{span}.Slice(i)",
+                        (false, _) => $"{span}.Slice({primarySet.Distance})",
+                        (true, _) => $"{span}.Slice(i + {primarySet.Distance})",
+                    };
+
+                    string indexOf = primarySet.Chars!.Length switch
+                    {
+                        1 => $"global::System.MemoryExtensions.IndexOf({span}, {Literal(primarySet.Chars[0])})",
+                        2 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])})",
+                        3 => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(primarySet.Chars[0])}, {Literal(primarySet.Chars[1])}, {Literal(primarySet.Chars[2])})",
+                        _ => $"global::System.MemoryExtensions.IndexOfAny({span}, {Literal(new string(primarySet.Chars))})",
                      };
  
                      if (needLoop)
@@ -640,60 +539,93 @@ namespace System.Text.RegularExpressions.Generator
                          writer.WriteLine("i += indexOfPos;");
                          writer.WriteLine();
  
-                        if (lcc.Length > 1)
+                        if (setsToUse > 1)
                          {
-                            using (EmitBlock(writer, $"if (i >= span.Length - {lcc.Length - 1})"))
+                            using (EmitBlock(writer, $"if (i >= span.Length - {minRequiredLength - 1})"))
                              {
                                  writer.WriteLine("goto ReturnFalse;");
                              }
+                            writer.WriteLine();
                          }
                      }
                      else
                      {
                          writer.WriteLine($"int i = {indexOf};");
-                        using (EmitBlock(writer, "if (i < 0)"))
+                        using (EmitBlock(writer, "if (i >= 0)"))
                          {
-                            writer.WriteLine("goto ReturnFalse;");
+                            writer.WriteLine("base.runtextpos = runtextpos + i;");
+                            writer.WriteLine("return true;");
                          }
                      }
-                    writer.WriteLine();
+
+                    setIndex = 1;
                  }
  
-                Debug.Assert(charClassIndex == 0 || charClassIndex == 1);
-                bool hasCharClassConditions = false;
-                if (charClassIndex < lcc.Length)
+                if (needLoop)
                  {
-                    // if (CharInClass(textSpan[i + charClassIndex], prefix[0], "...") &&
-                    //     ...)
-                    Debug.Assert(needLoop);
-                    int start = charClassIndex;
-                    for (; charClassIndex < lcc.Length; charClassIndex++)
+                    Debug.Assert(setIndex == 0 || setIndex == 1);
+                    bool hasCharClassConditions = false;
+                    if (setIndex < setsToUse)
                      {
-                        string spanIndex = charClassIndex > 0 ? $"span[i + {charClassIndex}]" : "span[i]";
-                        string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, lcc[charClassIndex].CharClass, lcc[charClassIndex].CaseInsensitive);
-
-                        if (charClassIndex == start)
+                        // if (CharInClass(textSpan[i + charClassIndex], prefix[0], "...") &&
+                        //     ...)
+                        Debug.Assert(needLoop);
+                        int start = setIndex;
+                        for (; setIndex < setsToUse; setIndex++)
                          {
-                            writer.Write($"if ({charInClassExpr}");
-                        }
-                        else
-                        {
-                            writer.WriteLine(" &&");
-                            writer.Write($"    {charInClassExpr}");
+                            string spanIndex = $"span[i{(sets[setIndex].Distance > 0 ? $" + {sets[setIndex].Distance}" : "")}]";
+                            string charInClassExpr = MatchCharacterClass(hasTextInfo, options, spanIndex, sets[setIndex].Set, sets[setIndex].CaseInsensitive);
+
+                            if (setIndex == start)
+                            {
+                                writer.Write($"if ({charInClassExpr}");
+                            }
+                            else
+                            {
+                                writer.WriteLine(" &&");
+                                writer.Write($"    {charInClassExpr}");
+                            }
                          }
+                        writer.WriteLine(")");
+                        hasCharClassConditions = true;
                      }
-                    writer.WriteLine(")");
-                    hasCharClassConditions = true;
-                }
  
-                using (hasCharClassConditions ? EmitBlock(writer, null) : default)
-                {
-                    writer.WriteLine("base.runtextpos = runtextpos + i;");
-                    writer.WriteLine("return true;");
+                    using (hasCharClassConditions ? EmitBlock(writer, null) : default)
+                    {
+                        writer.WriteLine("base.runtextpos = runtextpos + i;");
+                        writer.WriteLine("return true;");
+                    }
                  }
  
                  loopBlock.Dispose();
              }
+
+            // If a TextInfo is needed to perform ToLower operations, emits a local initialized to the TextInfo to use.
+            static void EmitTextInfo(IndentedTextWriter writer, ref bool hasTextInfo, RegexMethod rm)
+            {
+                // Emit local to store current culture if needed
+                if ((rm.Options & RegexOptions.CultureInvariant) == 0)
+                {
+                    bool needsCulture = rm.Code.FindOptimizations.FindMode switch
+                    {
+                        FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or
+                        FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive or
+                        FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
+                        FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or
+                        FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive => true,
+
+                        _ when rm.Code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
+
+                        _ => false,
+                    };
+
+                    if (needsCulture)
+                    {
+                        hasTextInfo = true;
+                        writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
+                    }
+                }
+            }
          }
  
          /// <summary>Emits the body of the Go override.</summary>
@@ -750,9 +682,12 @@ namespace System.Text.RegularExpressions.Generator
          /// <summary>Emits the body of a simplified Go implementation that's possible when there's minimal backtracking required by the expression.</summary>
          private static void EmitSimplifiedGo(IndentedTextWriter writer, RegexMethod rm, string id)
          {
+            // Arbitrary limit for unrolling vs creating a loop.  We want to balance size in the generated
+            // code with other costs, like the (small) overhead of slicing to create the temp span to iterate.
+            const int MaxUnrollSize = 16;
+
              RegexOptions options = (RegexOptions)rm.Options;
              RegexCode code = rm.Code;
-            (string CharClass, bool CaseInsensitive)[]? lcc = code.LeadingCharClasses;
              bool rtl = code.RightToLeft;
              bool hasTimeout = false;
  
@@ -1267,26 +1202,7 @@ namespace System.Text.RegularExpressions.Generator
                          break;
  
                      case RegexNode.Concatenate:
-                        int childCount = node.ChildCount();
-                        for (int i = 0; i < childCount; i++)
-                        {
-                            if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd))
-                            {
-                                EmitSpanLengthCheck(requiredLength);
-                                writer.WriteLine();
-
-                                for (; i < exclusiveEnd; i++)
-                                {
-                                    EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: false);
-                                }
-
-                                i--;
-                            }
-                            else
-                            {
-                                EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: emitLengthChecksIfRequired);
-                            }
-                        }
+                        EmitConcatenation(node, subsequent, emitLengthChecksIfRequired);
                          break;
  
                      case RegexNode.Capture:
@@ -1360,8 +1276,91 @@ namespace System.Text.RegularExpressions.Generator
                  writer.WriteLine("base.runtextpos = runtextpos;");
              }
  
+            void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired)
+            {
+                int childCount = node.ChildCount();
+                for (int i = 0; i < childCount; i++)
+                {
+                    if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd))
+                    {
+                        bool wroteClauses = true;
+                        writer.Write($"if ({SpanLengthCheck(requiredLength)}");
+
+                        while (i < exclusiveEnd)
+                        {
+                            for (; i < exclusiveEnd; i++)
+                            {
+                                void WriteSingleCharChild(RegexNode child)
+                                {
+                                    if (wroteClauses)
+                                    {
+                                        writer.WriteLine(" ||");
+                                        writer.Write("    ");
+                                    }
+                                    else
+                                    {
+                                        writer.Write("if (");
+                                    }
+                                    EmitSingleChar(child, emitLengthCheck: false, clauseOnly: true);
+                                    wroteClauses = true;
+                                }
+
+                                RegexNode child = node.Child(i);
+                                if (child.Type is RegexNode.One or RegexNode.Notone or RegexNode.Set)
+                                {
+                                    WriteSingleCharChild(child);
+                                    writer.Write($" /* {DescribeNode(child)} */");
+                                }
+                                else if (child.Type is RegexNode.Oneloop or RegexNode.Onelazy or RegexNode.Oneloopatomic or
+                                                       RegexNode.Setloop or RegexNode.Setlazy or RegexNode.Setloopatomic or
+                                                       RegexNode.Notoneloop or RegexNode.Notonelazy or RegexNode.Notoneloopatomic &&
+                                         child.M == child.N &&
+                                         child.M <= MaxUnrollSize)
+                                {
+                                    for (int c = 0; c < child.M; c++)
+                                    {
+                                        WriteSingleCharChild(child);
+                                        if (c == 0)
+                                        {
+                                            writer.Write($" /* {DescribeNode(child)} */");
+                                        }
+                                    }
+                                }
+                                else
+                                {
+                                    break;
+                                }
+                            }
+
+                            if (wroteClauses)
+                            {
+                                writer.WriteLine(")");
+                                using (EmitBlock(writer, null))
+                                {
+                                    writer.WriteLine($"goto {doneLabel};");
+                                }
+                                wroteClauses = false;
+                            }
+
+                            if (i < exclusiveEnd)
+                            {
+                                writer.WriteLine();
+                                EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: false);
+                                i++;
+                            }
+                        }
+
+                        i--;
+                    }
+                    else
+                    {
+                        EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: emitLengthChecksIfRequired);
+                    }
+                }
+            }
+
              // Emits the code to handle a single-character match.
-            void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset = null)
+            void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, string? offset = null, bool clauseOnly = false)
              {
                  // This only emits a single check, but it's called from the looping constructs in a loop
                  // to generate the code for a single check, so we map those looping constructs to the
@@ -1375,13 +1374,20 @@ namespace System.Text.RegularExpressions.Generator
                  }
                  else
                  {
-                    expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch));
+                    expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node));
                      expr = $"{expr} {(node.IsOneFamily ? "!=" : "==")} {Literal(node.Ch)}";
                  }
  
-                using (EmitBlock(writer, emitLengthCheck ? $"if ({SpanLengthCheck(1, offset)} || {expr})" : $"if ({expr})"))
+                if (clauseOnly)
                  {
-                    writer.WriteLine($"goto {doneLabel};");
+                    writer.Write(expr);
+                }
+                else
+                {
+                    using (EmitBlock(writer, emitLengthCheck ? $"if ({SpanLengthCheck(1, offset)} || {expr})" : $"if ({expr})"))
+                    {
+                        writer.WriteLine($"goto {doneLabel};");
+                    }
                  }
  
                  textSpanPos++;
@@ -1689,10 +1695,6 @@ namespace System.Text.RegularExpressions.Generator
                      EmitSpanLengthCheck(iterations);
                  }
  
-                // Arbitrary limit for unrolling vs creating a loop.  We want to balance size in the generated
-                // code with other costs, like the (small) overhead of slicing to create the temp span to iterate.
-                const int MaxUnrollSize = 16;
-
                  if (iterations <= MaxUnrollSize)
                  {
                      // if (textSpan[textSpanPos] != c1 ||
@@ -1775,13 +1777,13 @@ namespace System.Text.RegularExpressions.Generator
                  int minIterations = node.M;
                  int maxIterations = node.N;
  
-                Span<char> setChars = stackalloc char[3]; // 3 is max we can use with IndexOfAny
+                Span<char> setChars = stackalloc char[5]; // 5 is max optimized by IndexOfAny today
                  int numSetChars = 0;
  
                  string iterationLocal = NextLocalName("i");
                  if (node.IsNotoneFamily &&
                      maxIterations == int.MaxValue &&
-                    (!IsCaseInsensitive(node) || !RegexCharClass.ParticipatesInCaseConversion(node.Ch)))
+                    (!IsCaseInsensitive(node)))
                  {
                      // For Notone, we're looking for a specific character, as everything until we find
                      // it is consumed by the loop.  If we're unbounded, such as with ".*" and if we're case-sensitive,
@@ -1806,21 +1808,25 @@ namespace System.Text.RegularExpressions.Generator
                  else if (node.IsSetFamily &&
                      maxIterations == int.MaxValue &&
                      !IsCaseInsensitive(node) &&
-                    (numSetChars = RegexCharClass.GetSetChars(node.Str!, setChars)) > 1 &&
+                    (numSetChars = RegexCharClass.GetSetChars(node.Str!, setChars)) != 0 &&
                      RegexCharClass.IsNegated(node.Str!))
                  {
-                    // If the set is negated and contains only 2 or 3 characters (if it contained 1 and was negated, it would
+                    // If the set is negated and contains only a few characters (if it contained 1 and was negated, it should
                      // have been reduced to a Notone), we can use an IndexOfAny to find any of the target characters.
                      // As with the notoneloopatomic above, the unbounded constraint is purely for simplicity.
+                    Debug.Assert(numSetChars > 1);
  
                      writer.Write($"int {iterationLocal} = global::System.MemoryExtensions.IndexOfAny({textSpanLocal}");
                      if (textSpanPos != 0)
                      {
                          writer.Write($".Slice({textSpanPos})");
                      }
-                    writer.WriteLine(numSetChars == 2 ?
-                        $", {Literal(setChars[0])}, {Literal(setChars[1])});" :
-                        $", {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});");
+                    writer.WriteLine(numSetChars switch
+                    {
+                        2 => $", {Literal(setChars[0])}, {Literal(setChars[1])});",
+                        3 => $", {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])});",
+                        _ => $", {Literal(setChars.Slice(0, numSetChars).ToString())});",
+                    });
                      using (EmitBlock(writer, $"if ({iterationLocal} == -1)"))
                      {
                          writer.WriteLine(textSpanPos > 0 ?
@@ -1848,7 +1854,7 @@ namespace System.Text.RegularExpressions.Generator
                      }
                      else
                      {
-                        expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch));
+                        expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node));
                          expr = $"{expr} {(node.IsOneFamily ? "==" : "!=")} {Literal(node.Ch)}";
                      }
  
@@ -1899,7 +1905,7 @@ namespace System.Text.RegularExpressions.Generator
                  }
                  else
                  {
-                    expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch));
+                    expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive(node));
                      expr = $"{expr} {(node.IsOneFamily ? "==" : "!=")} {Literal(node.Ch)}";
                  }
  
@@ -2542,7 +2548,7 @@ namespace System.Text.RegularExpressions.Generator
  
                              clause += Code() == RegexCode.Set ?
                                  $"!{MatchCharacterClass(hasTextInfo, options, expr, rm.Code.Strings[Operand(0)], IsCaseInsensitive())}" :
-                                $"{ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)))} {(Code() == RegexCode.One ? "!=" : "==")} {Operand(0)}";
+                                $"{ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive())} {(Code() == RegexCode.One ? "!=" : "==")} {Operand(0)}";
  
                              using (EmitBlock(writer, $"if ({clause})"))
                              {
@@ -2559,7 +2565,7 @@ namespace System.Text.RegularExpressions.Generator
                              writer.WriteLine($"if (runtextend - runtextpos < {str.Length} ||");
                              for (int i = 0; i < str.Length; i++)
                              {
-                                writer.Write($"    {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos{(i == 0 ? "" : $" + {i}")}]", IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i]))} != {Literal(str[i])}");
+                                writer.Write($"    {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos{(i == 0 ? "" : $" + {i}")}]", IsCaseInsensitive())} != {Literal(str[i])}");
                                  writer.WriteLine(i < str.Length - 1 ? " ||" : ")");
                              }
                              using (EmitBlock(writer, null))
@@ -2579,7 +2585,7 @@ namespace System.Text.RegularExpressions.Generator
                              for (int i = str.Length; i > 0;)
                              {
                                  i--;
-                                writer.Write($"    {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {str.Length - i}]", IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i]))} != {Literal(str[i])}");
+                                writer.Write($"    {ToLowerIfNeeded(hasTextInfo, options, $"runtext[runtextpos - {str.Length - i}]", IsCaseInsensitive())} != {Literal(str[i])}");
                                  writer.WriteLine(i == 0 ? ")" : " ||");
                              }
                              using (EmitBlock(writer, null))
@@ -2665,7 +2671,7 @@ namespace System.Text.RegularExpressions.Generator
                                      }
                                      else
                                      {
-                                        expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)));
+                                        expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
                                          expr = $"{expr} {(Code() == RegexCode.Onerep ? "!=" : "==")} {Literal((char)Operand(0))}";
                                      }
  
@@ -2712,7 +2718,7 @@ namespace System.Text.RegularExpressions.Generator
                                  }
                                  else
                                  {
-                                    expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)));
+                                    expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
                                      string op = Code() == RegexCode.Onerep ? "!=" : "==";
                                      using (EmitBlock(writer, $"if ({expr} {op} {Literal((char)Operand(0))})"))
                                      {
@@ -2773,14 +2779,14 @@ namespace System.Text.RegularExpressions.Generator
                                  }
  
                                  string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? rm.Code.Strings[Operand(0)] : null;
-                                Span<char> setChars = stackalloc char[3];
+                                Span<char> setChars = stackalloc char[5]; // max optimized by IndexOfAny today
                                  int numSetChars;
  
                                  // If this is a notoneloop{atomic} and we're left-to-right and case-sensitive,
                                  // we can use the vectorized IndexOf to search for the target character.
                                  if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) &&
                                      !IsRightToLeft() &&
-                                    (!IsCaseInsensitive() || !RegexCharClass.ParticipatesInCaseConversion(Operand(0))))
+                                    !IsCaseInsensitive())
                                  {
                                      writer.WriteLine($"{I} = global::System.MemoryExtensions.IndexOf(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal((char)Operand(0))}); // i");
                                      using (EmitBlock(writer, $"if ({I} == -1)"))
@@ -2797,20 +2803,19 @@ namespace System.Text.RegularExpressions.Generator
                                  else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) &&
                                      !IsRightToLeft() &&
                                      !IsCaseInsensitive() &&
-                                    (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) > 1 &&
+                                    (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) != 0 &&
                                      RegexCharClass.IsNegated(set!))
                                  {
                                      // Similarly, if this is a setloop{atomic} and we're left-to-right and case-sensitive,
-                                    // and if the set contains only 2 or 3 negated chars, we can use the vectorized IndexOfAny
+                                    // and if the set contains only a few negated chars, we can use the vectorized IndexOfAny
                                      // to search for those chars.
-
-                                    Debug.Assert(numSetChars is 2 or 3);
-                                    writer.Write($"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}");
-                                    if (numSetChars == 3)
+                                    Debug.Assert(numSetChars > 1);
+                                    writer.WriteLine(numSetChars switch
                                      {
-                                        writer.Write($", {Literal(setChars[2])}");
-                                    }
-                                    writer.WriteLine("); // i");
+                                        2 => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}); // i",
+                                        3 => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars[0])}, {Literal(setChars[1])}, {Literal(setChars[2])}); // i",
+                                        _ => $"{I} = global::System.MemoryExtensions.IndexOfAny(global::System.MemoryExtensions.AsSpan(runtext, runtextpos, {Len}), {Literal(setChars.Slice(0, numSetChars).ToString())}); // i",
+                                    });
                                      using (EmitBlock(writer, $"if ({I} == -1)"))
                                      {
                                          writer.WriteLine($"runtextpos += {Len};");
@@ -2850,7 +2855,7 @@ namespace System.Text.RegularExpressions.Generator
                                          else
                                          {
                                              string op = Code() == RegexCode.Oneloop || Code() == RegexCode.Oneloopatomic ? "!=" : "==";
-                                            expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)));
+                                            expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
                                              expr = $"{expr} {op} {Literal((char)Operand(0))}";
                                          }
  
@@ -2979,7 +2984,7 @@ namespace System.Text.RegularExpressions.Generator
                              }
                              else
                              {
-                                expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)));
+                                expr = ToLowerIfNeeded(hasTextInfo, options, expr, IsCaseInsensitive());
                                  expr = $"{expr} {(Code() == RegexCode.Onelazy ? "!=" : "==")} {Literal((char)Operand(0))}";
                              }
  
@@ -3227,40 +3232,6 @@ namespace System.Text.RegularExpressions.Generator
  
          private static string ToLowerIfNeeded(bool hasTextInfo, RegexOptions options, string expression, bool toLower) => toLower ? ToLower(hasTextInfo, options, expression) : expression;
  
-        private static void EmitTextInfoIfRequired(IndentedTextWriter writer, ref bool textInfoEmitted, ref bool hasTextInfo, RegexMethod rm)
-        {
-            if (textInfoEmitted)
-            {
-                return;
-            }
-            textInfoEmitted = true;
-
-            // Emit local to store current culture if needed
-            if ((((RegexOptions)rm.Options) & RegexOptions.CultureInvariant) == 0)
-            {
-                bool needsCulture = (((RegexOptions)rm.Options) & RegexOptions.IgnoreCase) != 0 || rm.Code.BoyerMoorePrefix?.CaseInsensitive == true;
-                if (!needsCulture && rm.Code.LeadingCharClasses is not null)
-                {
-                    for (int i = 0; i < rm.Code.LeadingCharClasses.Length; i++)
-                    {
-                        if (rm.Code.LeadingCharClasses[i].CaseInsensitive)
-                        {
-                            needsCulture = true;
-                            break;
-                        }
-                    }
-                }
-
-                if (needsCulture)
-                {
-                    hasTextInfo = true;
-                    writer.WriteLine("// IgnoreCase with CultureInfo.CurrentCulture");
-                    writer.WriteLine("global::System.Globalization.TextInfo textInfo = global::System.Globalization.CultureInfo.CurrentCulture.TextInfo;");
-                    writer.WriteLine();
-                }
-            }
-        }
-
          private static string MatchCharacterClass(bool hasTextInfo, RegexOptions options, string chExpr, string charClass, bool caseInsensitive)
          {
              // We need to perform the equivalent of calling RegexRunner.CharInClass(ch, charClass),
@@ -3323,22 +3294,32 @@ namespace System.Text.RegularExpressions.Generator
                  return $"(char.GetUnicodeCategory({chExpr}) {(negated ? "!=" : "==")} global::System.Globalization.UnicodeCategory.{category})";
              }
  
-            // Next, if there's only 2 or 3 chars in the set (fairly common due to the sets we create for prefixes),
-            // it's cheaper and smaller to compare against each than it is to use a lookup table.
-            if (!invariant)
+            // Next, if there's only 2, 3, or 4 chars in the set (fairly common due to the sets we create for prefixes),
+            // it may be cheaper and smaller to compare against each than it is to use a lookup table.  We can also special-case
+            // the very common case with case insensitivity of two characters next to each other being the upper and lowercase
+            // ASCII variants of each other, in which case we can use bit manipulation to avoid a comparison.
+            if (!invariant && !RegexCharClass.IsNegated(charClass))
              {
-                Span<char> setChars = stackalloc char[3];
-                int numChars = RegexCharClass.GetSetChars(charClass, setChars);
-                if (!RegexCharClass.IsNegated(charClass))
+                Span<char> setChars = stackalloc char[4];
+                switch (RegexCharClass.GetSetChars(charClass, setChars))
                  {
-                    switch (numChars)
-                    {
-                        case 2:
-                            return $"(((ch = {chExpr}) == {Literal(setChars[0])}) | (ch == {Literal(setChars[1])}))";
+                    case 2:
+                        return (setChars[0] | 0x20) == setChars[1] ?
+                            $"(({chExpr} | 0x20) == {Literal(setChars[1])})" :
+                            $"(((ch = {chExpr}) == {Literal(setChars[0])}) | (ch == {Literal(setChars[1])}))";
  
-                        case 3:
-                            return $"(((ch = {chExpr}) == {Literal(setChars[0])}) | (ch == {Literal(setChars[1])}) | (ch == {Literal(setChars[2])}))";
-                    }
+                    case 3:
+                        return (setChars[0] | 0x20) == setChars[1] ?
+                            $"((((ch = {chExpr}) | 0x20) == {Literal(setChars[1])}) | (ch == {Literal(setChars[2])}))" :
+                            $"(((ch = {chExpr}) == {Literal(setChars[0])}) | (ch == {Literal(setChars[1])}) | (ch == {Literal(setChars[2])}))";
+
+                    case 4:
+                        if (((setChars[0] | 0x20) == setChars[1]) &&
+                            ((setChars[2] | 0x20) == setChars[3]))
+                        {
+                            return $"(((ch = ({chExpr} | 0x20)) == {Literal(setChars[1])}) | (ch == {Literal(setChars[3])}))";
+                        }
+                        break;
                  }
              }
  
diff --git a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs

index 045dbd5..cb3bed4 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs
@@ -178,7 +178,7 @@ namespace System.Text.RegularExpressions.Generator
              RegexCode code;
              try
              {
-                code = RegexWriter.Write(RegexParser.Parse(pattern, regexOptions, culture));
+                code = RegexWriter.Write(RegexParser.Parse(pattern, regexOptions, culture), culture);
              }
              catch (Exception e)
              {
diff --git a/src/libraries/System.Text.RegularExpressions/gen/Stubs.cs b/src/libraries/System.Text.RegularExpressions/gen/Stubs.cs

index aefab1d..13626a4 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/gen/Stubs.cs
+++ b/src/libraries/System.Text.RegularExpressions/gen/Stubs.cs
@@ -54,13 +54,13 @@ namespace System.Threading
  {
      internal static class InterlockedExtensions
      {
-        public static int Or(ref int location1, int value)
+        public static uint Or(ref uint location1, uint value)
          {
-            int current = location1;
+            uint current = location1;
              while (true)
              {
-                int newValue = current | value;
-                int oldValue = Interlocked.CompareExchange(ref location1, newValue, current);
+                uint newValue = current | value;
+                uint oldValue = (uint)Interlocked.CompareExchange(ref Unsafe.As<uint, int>(ref location1), (int)newValue, (int)current);
                  if (oldValue == current)
                  {
                      return oldValue;
diff --git a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj

index 7f59e37..8e1ec70 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
+++ b/src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj
@@ -30,10 +30,10 @@
      <Compile Include="$(CoreLibSharedDir)System\Collections\Generic\ValueListBuilder.cs" Link="Production\ValueListBuilder.cs" />
      <Compile Include="..\src\System\Collections\Generic\ValueListBuilder.Pop.cs" Link="Production\ValueListBuilder.Pop.cs" />
      <Compile Include="..\src\System\Threading\StackHelper.cs" Link="Production\StackHelper.cs" />
-    <Compile Include="..\src\System\Text\RegularExpressions\RegexBoyerMoore.cs" Link="Production\RegexBoyerMoore.cs" />
      <Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.cs" Link="Production\RegexCharClass.cs" />
      <Compile Include="..\src\System\Text\RegularExpressions\RegexCharClass.MappingTable.cs" Link="Production\RegexCharClass.MappingTable.cs" />
      <Compile Include="..\src\System\Text\RegularExpressions\RegexCode.cs" Link="Production\RegexCode.cs" />
+    <Compile Include="..\src\System\Text\RegularExpressions\RegexFindOptimizations.cs" Link="Production\RegexFindOptimizations.cs" />
      <Compile Include="..\src\System\Text\RegularExpressions\RegexNode.cs" Link="Production\RegexNode.cs" />
      <Compile Include="..\src\System\Text\RegularExpressions\RegexOptions.cs" Link="Production\RegexOptions.cs" />
      <Compile Include="..\src\System\Text\RegularExpressions\RegexParseError.cs" Link="Production\RegexParseError.cs" />
diff --git a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj

index da0f0ed..8537fd7 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
+++ b/src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj
@@ -23,11 +23,11 @@
      <Compile Include="System\Text\RegularExpressions\Regex.Replace.cs" />
      <Compile Include="System\Text\RegularExpressions\Regex.Split.cs" />
      <Compile Include="System\Text\RegularExpressions\Regex.Timeout.cs" />
-    <Compile Include="System\Text\RegularExpressions\RegexBoyerMoore.cs" />
      <Compile Include="System\Text\RegularExpressions\RegexCharClass.cs" />
      <Compile Include="System\Text\RegularExpressions\RegexCharClass.MappingTable.cs" />
      <Compile Include="System\Text\RegularExpressions\RegexCode.cs" />
      <Compile Include="System\Text\RegularExpressions\RegexCompilationInfo.cs" />
+    <Compile Include="System\Text\RegularExpressions\RegexFindOptimizations.cs" />
      <Compile Include="System\Text\RegularExpressions\RegexGeneratorAttribute.cs" />
      <Compile Include="System\Text\RegularExpressions\RegexInterpreter.cs" />
      <Compile Include="System\Text\RegularExpressions\RegexMatchTimeoutException.cs" />
@@ -100,6 +100,7 @@
      <Reference Include="System.Memory" />
      <Reference Include="System.Runtime" />
      <Reference Include="System.Runtime.Extensions" />
+    <Reference Include="System.Runtime.InteropServices" />
      <Reference Include="System.Threading" />
      <!-- References required for RegexOptions.Compiled -->
      <Reference Include="System.Reflection.Emit" />
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs

index edcb23b..66b1a81 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs
@@ -128,7 +128,7 @@ namespace System.Text.RegularExpressions
              Regex.ValidateOptions(options);
              Regex.ValidateMatchTimeout(matchTimeout);
  
-            CultureInfo culture = (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
+            CultureInfo culture = RegexParser.GetTargetCulture(options);
              Key key = new Key(pattern, culture.ToString(), options, matchTimeout);
  
              Regex? regex = Get(key);
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs

index 85d575c..ee276b3 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs
@@ -65,12 +65,12 @@ namespace System.Text.RegularExpressions
              // Call Init directly rather than delegating to a Regex ctor that takes
              // options to enable linking / tree shaking to remove the Regex compiler
              // and NonBacktracking implementation if it's not used.
-            Init(pattern, RegexOptions.None, s_defaultMatchTimeout, culture);
+            Init(pattern, RegexOptions.None, s_defaultMatchTimeout, culture ?? CultureInfo.CurrentCulture);
          }
  
          internal Regex(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo? culture)
          {
-            culture ??= GetTargetCulture(options);
+            culture ??= RegexParser.GetTargetCulture(options);
              Init(pattern, options, matchTimeout, culture);
  
              if ((options & RegexOptions.NonBacktracking) != 0)
@@ -87,10 +87,6 @@ namespace System.Text.RegularExpressions
              }
          }
  
-        /// <summary>Gets the culture to use based on the specified options.</summary>
-        private static CultureInfo GetTargetCulture(RegexOptions options) =>
-            (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
-
          /// <summary>Initializes the instance.</summary>
          /// <remarks>
          /// This is separated out of the constructor so that an app only using 'new Regex(pattern)'
@@ -98,7 +94,7 @@ namespace System.Text.RegularExpressions
          /// compiler, such that a tree shaker / linker can trim it away if it's not otherwise used.
          /// </remarks>
          [MemberNotNull(nameof(_code))]
-        private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo? culture)
+        private void Init(string pattern, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
          {
              ValidatePattern(pattern);
              ValidateOptions(options);
@@ -107,7 +103,6 @@ namespace System.Text.RegularExpressions
              this.pattern = pattern;
              internalMatchTimeout = matchTimeout;
              roptions = options;
-            culture ??= GetTargetCulture(options);
  
  #if DEBUG
              if (IsDebug)
@@ -121,7 +116,7 @@ namespace System.Text.RegularExpressions
  
              // Generate the RegexCode from the node tree.  This is required for interpreting,
              // and is used as input into RegexOptions.Compiled and RegexOptions.NonBacktracking.
-            _code = RegexWriter.Write(tree);
+            _code = RegexWriter.Write(tree, culture);
  
              if ((options & RegexOptions.NonBacktracking) != 0)
              {
@@ -434,7 +429,7 @@ namespace System.Text.RegularExpressions
          /// <summary>Creates a new runner instance.</summary>
          private RegexRunner CreateRunner() =>
              factory?.CreateInstance() ??
-            new RegexInterpreter(_code!, GetTargetCulture(roptions));
+            new RegexInterpreter(_code!, RegexParser.GetTargetCulture(roptions));
  
          /// <summary>True if the <see cref="RegexOptions.Compiled"/> option was set.</summary>
          protected bool UseOptionC() => (roptions & RegexOptions.Compiled) != 0;
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs

deleted file mode 100644 (file)

index 7fc3fb1..0000000
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs
+++ /dev/null
@@ -1,404 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-
-// The RegexBoyerMoore object precomputes the Boyer-Moore
-// tables for fast string scanning. These tables allow
-// you to scan for the first occurrence of a string within
-// a large body of text without examining every character.
-// The performance of the heuristic depends on the actual
-// string and the text being searched, but usually, the longer
-// the string that is being searched for, the fewer characters
-// need to be examined.
-
-using System.Diagnostics;
-using System.Diagnostics.CodeAnalysis;
-using System.Globalization;
-
-namespace System.Text.RegularExpressions
-{
-    internal sealed class RegexBoyerMoore
-    {
-        public readonly int[] Positive;
-        public readonly int[] NegativeASCII;
-        public readonly int[][]? NegativeUnicode;
-        public readonly string Pattern;
-        public readonly int LowASCII;
-        public readonly int HighASCII;
-        public readonly bool RightToLeft;
-        public readonly bool CaseInsensitive;
-        private readonly CultureInfo _culture;
-
-        /// <summary>The maximum prefix string length for which we'll attempt to create a Boyer-Moore table.</summary>
-        /// <remarks>This is limited in order to minimize the overhead of constructing a Regex.</remarks>
-        public const int MaxLimit = 50_000; // must be <= char.MaxValue for RegexCompiler to compile Boyer-Moore correctly
-
-        /// <summary>
-        /// Constructs a Boyer-Moore state machine for searching for the string
-        /// pattern. The string must not be zero-length.
-        /// </summary>
-        public RegexBoyerMoore(string pattern, bool caseInsensitive, bool rightToLeft, CultureInfo culture)
-        {
-            // Sorry, you just can't use Boyer-Moore to find an empty pattern.
-            // We're doing this for your own protection. (Really, for speed.)
-            Debug.Assert(pattern.Length != 0, "RegexBoyerMoore called with an empty string. This is bad for perf");
-            Debug.Assert(pattern.Length <= MaxLimit, "RegexBoyerMoore can take a long time for large patterns");
-#if DEBUG
-            if (caseInsensitive)
-            {
-                foreach (char c in pattern)
-                {
-                    // We expect each individual character to have been lower-cased. We don't validate the whole
-                    // string at once because the rest of the library doesn't currently recognize/support surrogate pairs.
-                    Debug.Assert(c == culture.TextInfo.ToLower(c), "Pattern wasn't lowercased with provided culture");
-                }
-            }
-#endif
-
-            Pattern = pattern;
-            RightToLeft = rightToLeft;
-            CaseInsensitive = caseInsensitive;
-            _culture = culture;
-
-            int beforefirst;
-            int last;
-            int bump;
-
-            if (!rightToLeft)
-            {
-                beforefirst = -1;
-                last = pattern.Length - 1;
-                bump = 1;
-            }
-            else
-            {
-                beforefirst = pattern.Length;
-                last = 0;
-                bump = -1;
-            }
-
-            // PART I - the good-suffix shift table
-            //
-            // compute the positive requirement:
-            // if char "i" is the first one from the right that doesn't match,
-            // then we know the matcher can advance by _positive[i].
-            //
-            // This algorithm is a simplified variant of the standard
-            // Boyer-Moore good suffix calculation.
-
-            Positive = new int[pattern.Length];
-
-            int examine = last;
-            char ch = pattern[examine];
-            Positive[examine] = bump;
-            examine -= bump;
-            int scan;
-            int match;
-
-            while (true)
-            {
-                // find an internal char (examine) that matches the tail
-
-                while (true)
-                {
-                    if (examine == beforefirst)
-                        goto OuterloopBreak;
-                    if (pattern[examine] == ch)
-                        break;
-                    examine -= bump;
-                }
-
-                match = last;
-                scan = examine;
-
-                // find the length of the match
-
-                while (true)
-                {
-                    if (scan == beforefirst || pattern[match] != pattern[scan])
-                    {
-                        // at the end of the match, note the difference in _positive
-                        // this is not the length of the match, but the distance from the internal match
-                        // to the tail suffix.
-                        if (Positive[match] == 0)
-                            Positive[match] = match - scan;
-
-                        break;
-                    }
-
-                    scan -= bump;
-                    match -= bump;
-                }
-
-                examine -= bump;
-            }
-
-        OuterloopBreak:
-
-            match = last - bump;
-
-            // scan for the chars for which there are no shifts that yield a different candidate
-
-
-            // The inside of the if statement used to say
-            // "_positive[match] = last - beforefirst;"
-            // This is slightly less aggressive in how much we skip, but at worst it
-            // should mean a little more work rather than skipping a potential match.
-            while (match != beforefirst)
-            {
-                if (Positive[match] == 0)
-                    Positive[match] = bump;
-
-                match -= bump;
-            }
-
-            // PART II - the bad-character shift table
-            //
-            // compute the negative requirement:
-            // if char "ch" is the reject character when testing position "i",
-            // we can slide up by _negative[ch];
-            // (_negative[ch] = str.Length - 1 - str.LastIndexOf(ch))
-            //
-            // the lookup table is divided into ASCII and Unicode portions;
-            // only those parts of the Unicode 16-bit code set that actually
-            // appear in the string are in the table. (Maximum size with
-            // Unicode is 65K; ASCII only case is 512 bytes.)
-
-            NegativeASCII = new int[128];
-
-            for (int i = 0; i < 128; i++)
-                NegativeASCII[i] = last - beforefirst;
-
-            LowASCII = 127;
-            HighASCII = 0;
-
-            for (examine = last; examine != beforefirst; examine -= bump)
-            {
-                ch = pattern[examine];
-
-                if (ch < 128)
-                {
-                    if (LowASCII > ch)
-                        LowASCII = ch;
-
-                    if (HighASCII < ch)
-                        HighASCII = ch;
-
-                    if (NegativeASCII[ch] == last - beforefirst)
-                        NegativeASCII[ch] = last - examine;
-                }
-                else
-                {
-                    int i = ch >> 8;
-                    int j = ch & 0xFF;
-
-                    if (NegativeUnicode == null)
-                    {
-                        NegativeUnicode = new int[256][];
-                    }
-
-                    if (NegativeUnicode[i] == null)
-                    {
-                        int[] newarray = new int[256];
-
-                        for (int k = 0; k < newarray.Length; k++)
-                            newarray[k] = last - beforefirst;
-
-                        if (i == 0)
-                        {
-                            Array.Copy(NegativeASCII, newarray, 128);
-                            NegativeASCII = newarray;
-                        }
-
-                        NegativeUnicode[i] = newarray;
-                    }
-
-                    if (NegativeUnicode[i][j] == last - beforefirst)
-                        NegativeUnicode[i][j] = last - examine;
-                }
-            }
-        }
-
-        // TODO: We should be able to avoid producing the RegexBoyerMoore instance
-        // entirely if we're going to go down the code path of using IndexOf. That will
-        // require some refactoring, though.
-
-        /// <summary>Gets whether IndexOf could be used to perform the match.</summary>
-        public bool PatternSupportsIndexOf =>
-            !RightToLeft && (!CaseInsensitive || !RegexCharClass.ParticipatesInCaseConversion(Pattern));
-
-        /// <summary>
-        /// When a regex is anchored, we can do a quick IsMatch test instead of a Scan
-        /// </summary>
-        public bool IsMatch(string text, int index, int beglimit, int endlimit)
-        {
-            if (!RightToLeft)
-            {
-                if (index < beglimit || endlimit - index < Pattern.Length)
-                    return false;
-            }
-            else
-            {
-                if (index > endlimit || index - beglimit < Pattern.Length)
-                    return false;
-
-                index -= Pattern.Length;
-            }
-
-            if (CaseInsensitive)
-            {
-                TextInfo textinfo = _culture.TextInfo;
-
-                for (int i = 0; i < Pattern.Length; i++)
-                {
-                    if (Pattern[i] != textinfo.ToLower(text[index + i]))
-                    {
-                        return false;
-                    }
-                }
-
-                return true;
-            }
-
-            return Pattern.AsSpan().SequenceEqual(text.AsSpan(index, Pattern.Length));
-        }
-
-        /// <summary>
-        /// Scan uses the Boyer-Moore algorithm to find the first occurrence
-        /// of the specified string within text, beginning at index, and
-        /// constrained within beglimit and endlimit.
-        ///
-        /// The direction and case-sensitivity of the match is determined
-        /// by the arguments to the RegexBoyerMoore constructor.
-        /// </summary>
-        public int Scan(string text, int index, int beglimit, int endlimit)
-        {
-            int defadv;
-            int test;
-            int startmatch;
-            int endmatch;
-            int bump;
-
-            if (!RightToLeft)
-            {
-                defadv = Pattern.Length;
-                startmatch = Pattern.Length - 1;
-                endmatch = 0;
-                test = index + defadv - 1;
-                bump = 1;
-            }
-            else
-            {
-                defadv = -Pattern.Length;
-                startmatch = 0;
-                endmatch = -defadv - 1;
-                test = index + defadv;
-                bump = -1;
-            }
-
-            char chMatch = Pattern[startmatch];
-            char chTest;
-            int test2;
-            int match;
-            int advance;
-            int[] unicodeLookup;
-
-            while (true)
-            {
-                if (test >= endlimit || test < beglimit)
-                    return -1;
-
-                chTest = text[test];
-
-                if (CaseInsensitive)
-                    chTest = _culture.TextInfo.ToLower(chTest);
-
-                if (chTest != chMatch)
-                {
-                    if (chTest < 128)
-                        advance = NegativeASCII[chTest];
-                    else if (null != NegativeUnicode && (null != (unicodeLookup = NegativeUnicode[chTest >> 8])))
-                        advance = unicodeLookup[chTest & 0xFF];
-                    else
-                        advance = defadv;
-
-                    test += advance;
-                }
-                else
-                { // if (chTest == chMatch)
-                    test2 = test;
-                    match = startmatch;
-
-                    while (true)
-                    {
-                        if (match == endmatch)
-                            return (RightToLeft ? test2 + 1 : test2);
-
-                        match -= bump;
-                        test2 -= bump;
-
-                        chTest = text[test2];
-
-                        if (CaseInsensitive)
-                            chTest = _culture.TextInfo.ToLower(chTest);
-
-                        if (chTest != Pattern[match])
-                        {
-                            advance = Positive[match];
-                            if ((chTest & 0xFF80) == 0)
-                                test2 = (match - startmatch) + NegativeASCII[chTest];
-                            else if (null != NegativeUnicode && (null != (unicodeLookup = NegativeUnicode[chTest >> 8])))
-                                test2 = (match - startmatch) + unicodeLookup[chTest & 0xFF];
-                            else
-                            {
-                                test += advance;
-                                break;
-                            }
-
-                            if (RightToLeft ? test2 < advance : test2 > advance)
-                                advance = test2;
-
-                            test += advance;
-                            break;
-                        }
-                    }
-                }
-            }
-        }
-
-#if DEBUG
-        /// <summary>Used when dumping for debugging.</summary>
-        [ExcludeFromCodeCoverage]
-        public override string ToString() => Dump(string.Empty);
-
-        [ExcludeFromCodeCoverage]
-        public string Dump(string indent)
-        {
-            var sb = new StringBuilder();
-
-            sb.AppendLine($"{indent}BM Pattern: {Pattern}");
-
-            sb.Append($"{indent}Positive: ");
-            foreach (int i in Positive)
-            {
-                sb.Append($"{i} ");
-            }
-            sb.AppendLine();
-
-            if (NegativeASCII != null)
-            {
-                sb.Append($"{indent}Negative table: ");
-                for (int i = 0; i < NegativeASCII.Length; i++)
-                {
-                    if (NegativeASCII[i] != Pattern.Length)
-                    {
-                        sb.Append($" {{{Regex.Escape(((char)i).ToString())} {NegativeASCII[i]}}}");
-                    }
-                }
-            }
-            sb.AppendLine();
-
-            return sb.ToString();
-        }
-#endif
-    }
-}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs

index 96ccdd2..e78f23c 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
@@ -5,6 +5,7 @@ using System.Collections.Generic;
  using System.Diagnostics;
  using System.Diagnostics.CodeAnalysis;
  using System.Globalization;
+using System.Runtime.CompilerServices;
  using System.Threading;
  
  namespace System.Text.RegularExpressions
@@ -71,6 +72,7 @@ namespace System.Text.RegularExpressions
          internal const string NotECMADigitClass = "\x01\x02\x00" + ECMADigitSet;
  
          internal const string AnyClass = "\x00\x01\x00\x00";
+        private const string EmptyClass = "\x00\x00\x00";
  
          // UnicodeCategory is zero based, so we add one to each value and subtract it off later
          private const int DefinedCategoriesCapacity = 38;
@@ -874,9 +876,9 @@ namespace System.Text.RegularExpressions
              }
          }
  
-        /// <summary>Gets whether the specified string participates in case conversion.</summary>
-        /// <remarks>The string participates in case conversion if any of its characters do.</remarks>
-        public static bool ParticipatesInCaseConversion(string s)
+        /// <summary>Gets whether the specified span participates in case conversion.</summary>
+        /// <remarks>The span participates in case conversion if any of its characters do.</remarks>
+        public static bool ParticipatesInCaseConversion(ReadOnlySpan<char> s)
          {
              foreach (char c in s)
              {
@@ -890,6 +892,7 @@ namespace System.Text.RegularExpressions
          }
  
          /// <summary>Gets whether we can iterate through the set list pairs in order to completely enumerate the set's contents.</summary>
+        /// <remarks>This may enumerate negated characters if the set is negated.</remarks>
          private static bool CanEasilyEnumerateSetContents(string set) =>
              set.Length > SetStartIndex &&
              set[SetLengthIndex] > 0 &&
@@ -1013,61 +1016,69 @@ namespace System.Text.RegularExpressions
              }
          }
  
-        public static bool CharInClass(char ch, string set, ref int[]? asciiResultCache)
+        /// <summary>Determines a character's membership in a character class (via the string representation of the class).</summary>
+        /// <param name="ch">The character.</param>
+        /// <param name="set">The string representation of the character class.</param>
+        /// <param name="asciiLazyCache">A lazily-populated cache for ASCII results stored in a 256-bit array.</param>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static bool CharInClass(char ch, string set, ref uint[]? asciiLazyCache)
          {
-            // The int[] contains 8 ints, or 256 bits.  These are laid out as pairs, where the first bit ("known") in the pair
-            // says whether the second bit ("value") in the pair has already been computed.  Once a value is computed, it's never
+            // The uint[] contains 8 ints, or 256 bits.  These are laid out as pairs, where the first bit in the pair
+            // says whether the second bit in the pair has already been computed.  Once a value is computed, it's never
              // changed, so since Int32s are written/read atomically, we can trust the value bit if we see that the known bit
              // has been set.  If the known bit hasn't been set, then we proceed to look it up, and then swap in the result.
              const int CacheArrayLength = 8;
-            Debug.Assert(asciiResultCache is null || asciiResultCache.Length == CacheArrayLength, "set lookup should be able to store two bits for each of the first 128 characters");
+            Debug.Assert(asciiLazyCache is null || asciiLazyCache.Length == CacheArrayLength, "set lookup should be able to store two bits for each of the first 128 characters");
  
-            if (ch < 128)
+            // If the value is ASCII and already has an answer for this value, use it.
+            if (asciiLazyCache is uint[] cache)
              {
-                // Lazily-initialize the cache for this set.
-                if (asciiResultCache is null)
+                int index = ch >> 4;
+                if ((uint)index < (uint)cache.Length)
                  {
-                    Interlocked.CompareExchange(ref asciiResultCache, new int[CacheArrayLength], null);
+                    Debug.Assert(ch < 128);
+                    uint current = cache[index];
+                    uint bit = 1u << ((ch & 0xF) << 1);
+                    if ((current & bit) != 0)
+                    {
+                        return (current & (bit << 1)) != 0;
+                    }
                  }
+            }
  
-                // Determine which int in the lookup array contains the known and value bits for this character,
-                // and compute their bit numbers.
-                ref int slot = ref asciiResultCache[ch >> 4];
-                int knownBit = 1 << ((ch & 0xF) << 1);
-                int valueBit = knownBit << 1;
-
-                // If the value for this bit has already been computed, use it.
-                int current = slot;
-                if ((current & knownBit) != 0)
-                {
-                    return (current & valueBit) != 0;
-                }
+            // For ASCII, lazily initialize. For non-ASCII, just compute the value.
+            return ch < 128 ?
+                InitializeValue(ch, set, ref asciiLazyCache) :
+                CharInClassRecursive(ch, set, 0);
  
+            static bool InitializeValue(char ch, string set, ref uint[]? asciiLazyCache)
+            {
                  // (After warm-up, we should find ourselves rarely getting here.)
+                Debug.Assert(ch < 128);
  
-                // Otherwise, compute it normally.
+                // Compute the result and determine which bits to write back to the array and "or" the bits back in a thread-safe manner.
                  bool isInClass = CharInClass(ch, set);
-
-                // Determine which bits to write back to the array and "or" the bits back in a thread-safe manner.
-                int bitsToSet = knownBit;
+                uint bitsToSet = 1u << ((ch & 0xF) << 1);
                  if (isInClass)
                  {
-                    bitsToSet |= valueBit;
+                    bitsToSet |= bitsToSet << 1;
                  }
+
+                uint[]? cache = asciiLazyCache ?? Interlocked.CompareExchange(ref asciiLazyCache, new uint[CacheArrayLength], null) ?? asciiLazyCache;
  #if REGEXGENERATOR
-                InterlockedExtensions.Or(ref slot, bitsToSet);
+                InterlockedExtensions.Or(ref cache[ch >> 4], bitsToSet);
  #else
-                Interlocked.Or(ref slot, bitsToSet);
+                Interlocked.Or(ref cache[ch >> 4], bitsToSet);
  #endif
  
                  // Return the computed value.
                  return isInClass;
              }
-
-            // Non-ASCII.  Fall back to computing the answer.
-            return CharInClassRecursive(ch, set, 0);
          }
  
+        /// <summary>
+        /// Determines a character's membership in a character class (via the string representation of the class).
+        /// </summary>
          public static bool CharInClass(char ch, string set) =>
              CharInClassRecursive(ch, set, 0);
  
@@ -1279,6 +1290,283 @@ namespace System.Text.RegularExpressions
              return new RegexCharClass(IsNegated(charClass, start), ranges, categoriesBuilder, sub);
          }
  
+        #region Perf workaround until https://github.com/dotnet/runtime/issues/61048 and https://github.com/dotnet/runtime/issues/59492 are addressed
+        // TODO: https://github.com/dotnet/runtime/issues/61048
+        // The below functionality needs to be removed/replaced/generalized.  The goal is to avoid relying on
+        // ToLower and culture-based operation at match time, and instead be able to compute at construction
+        // time case folding equivalence classes that let us determine up-front the set of characters considered
+        // valid for a match.  For now, we do this just for ASCII, and for anything else fall back to the
+        // pre-existing mechanism whereby a culture is used at construction time to ToLower and then one is
+        // used at match time to ToLower.  We also skip 'i' and 'I', as the casing of those varies across culture
+        // whereas every other ASCII value's casing is stable across culture.  We could hardcode the values for
+        // when an invariant vs tr/az culture vs any other culture is used, and we likely will, but for now doing
+        // so would be a breaking change, as in doing so we'd be relying only on the culture present at the time
+        // of construction rather than the one at the time of match.  That will be resolved with
+        // https://github.com/dotnet/runtime/issues/59492.
+
+        /// <summary>Creates a set string for a single character, optionally factoring in case-insensitivity.</summary>
+        /// <param name="c">The character for which to create the set.</param>
+        /// <param name="caseInsensitive">null if case-sensitive; non-null if case-insensitive, in which case it's the culture to use.</param>
+        /// <param name="resultIsCaseInsensitive">false if the caller should strip out RegexOptions.IgnoreCase because it's now fully represented by the set; otherwise, true.</param>
+        /// <returns>The create set string.</returns>
+        public static string OneToStringClass(char c, CultureInfo? caseInsensitive, out bool resultIsCaseInsensitive)
+        {
+            var vsb = new ValueStringBuilder(stackalloc char[4]);
+
+            if (caseInsensitive is null)
+            {
+                resultIsCaseInsensitive = false;
+                vsb.Append(c);
+            }
+            else if (c < 128 && (c | 0x20) != 'i')
+            {
+                resultIsCaseInsensitive = false;
+                switch (c)
+                {
+                    // These are the same in all cultures.  As with the rest of this support, we can generalize this
+                    // once we fix the aforementioned casing issues, e.g. by lazily populating an interning cache
+                    // rather than hardcoding the strings for these values, once almost all values will be the same
+                    // regardless of culture.
+                    case 'A': case 'a': return "\0\x0004\0ABab";
+                    case 'B': case 'b': return "\0\x0004\0BCbc";
+                    case 'C': case 'c': return "\0\x0004\0CDcd";
+                    case 'D': case 'd': return "\0\x0004\0DEde";
+                    case 'E': case 'e': return "\0\x0004\0EFef";
+                    case 'F': case 'f': return "\0\x0004\0FGfg";
+                    case 'G': case 'g': return "\0\x0004\0GHgh";
+                    case 'H': case 'h': return "\0\x0004\0HIhi";
+                    // allow 'i' to fall through
+                    case 'J': case 'j': return "\0\x0004\0JKjk";
+                    case 'K': case 'k': return "\0\x0006\0KLkl\u212A\u212B";
+                    case 'L': case 'l': return "\0\x0004\0LMlm";
+                    case 'M': case 'm': return "\0\x0004\0MNmn";
+                    case 'N': case 'n': return "\0\x0004\0NOno";
+                    case 'O': case 'o': return "\0\x0004\0OPop";
+                    case 'P': case 'p': return "\0\x0004\0PQpq";
+                    case 'Q': case 'q': return "\0\x0004\0QRqr";
+                    case 'R': case 'r': return "\0\x0004\0RSrs";
+                    case 'S': case 's': return "\0\x0004\0STst";
+                    case 'T': case 't': return "\0\x0004\0TUtu";
+                    case 'U': case 'u': return "\0\x0004\0UVuv";
+                    case 'V': case 'v': return "\0\x0004\0VWvw";
+                    case 'W': case 'w': return "\0\x0004\0WXwx";
+                    case 'X': case 'x': return "\0\x0004\0XYxy";
+                    case 'Y': case 'y': return "\0\x0004\0YZyz";
+                    case 'Z': case 'z': return "\0\x0004\0Z[z{";
+
+                    // All the ASCII !ParticipatesInCaseConversion
+                    case '\u0000': return "\0\u0002\0\u0000\u0001";
+                    case '\u0001': return "\0\u0002\0\u0001\u0002";
+                    case '\u0002': return "\0\u0002\0\u0002\u0003";
+                    case '\u0003': return "\0\u0002\0\u0003\u0004";
+                    case '\u0004': return "\0\u0002\0\u0004\u0005";
+                    case '\u0005': return "\0\u0002\0\u0005\u0006";
+                    case '\u0006': return "\0\u0002\0\u0006\u0007";
+                    case '\u0007': return "\0\u0002\0\u0007\u0008";
+                    case '\u0008': return "\0\u0002\0\u0008\u0009";
+                    case '\u0009': return "\0\u0002\0\u0009\u000A";
+                    case '\u000A': return "\0\u0002\0\u000A\u000B";
+                    case '\u000B': return "\0\u0002\0\u000B\u000C";
+                    case '\u000C': return "\0\u0002\0\u000C\u000D";
+                    case '\u000D': return "\0\u0002\0\u000D\u000E";
+                    case '\u000E': return "\0\u0002\0\u000E\u000F";
+                    case '\u000F': return "\0\u0002\0\u000F\u0010";
+                    case '\u0010': return "\0\u0002\0\u0010\u0011";
+                    case '\u0011': return "\0\u0002\0\u0011\u0012";
+                    case '\u0012': return "\0\u0002\0\u0012\u0013";
+                    case '\u0013': return "\0\u0002\0\u0013\u0014";
+                    case '\u0014': return "\0\u0002\0\u0014\u0015";
+                    case '\u0015': return "\0\u0002\0\u0015\u0016";
+                    case '\u0016': return "\0\u0002\0\u0016\u0017";
+                    case '\u0017': return "\0\u0002\0\u0017\u0018";
+                    case '\u0018': return "\0\u0002\0\u0018\u0019";
+                    case '\u0019': return "\0\u0002\0\u0019\u001A";
+                    case '\u001A': return "\0\u0002\0\u001A\u001B";
+                    case '\u001B': return "\0\u0002\0\u001B\u001C";
+                    case '\u001C': return "\0\u0002\0\u001C\u001D";
+                    case '\u001D': return "\0\u0002\0\u001D\u001E";
+                    case '\u001E': return "\0\u0002\0\u001E\u001F";
+                    case '\u001F': return "\0\u0002\0\u001F\u0020";
+                    case '\u0020': return "\0\u0002\0\u0020\u0021";
+                    case '\u0021': return "\0\u0002\0\u0021\u0022";
+                    case '\u0022': return "\0\u0002\0\u0022\u0023";
+                    case '\u0023': return "\0\u0002\0\u0023\u0024";
+                    case '\u0025': return "\0\u0002\0\u0025\u0026";
+                    case '\u0026': return "\0\u0002\0\u0026\u0027";
+                    case '\u0027': return "\0\u0002\0\u0027\u0028";
+                    case '\u0028': return "\0\u0002\0\u0028\u0029";
+                    case '\u0029': return "\0\u0002\0\u0029\u002A";
+                    case '\u002A': return "\0\u0002\0\u002A\u002B";
+                    case '\u002C': return "\0\u0002\0\u002C\u002D";
+                    case '\u002D': return "\0\u0002\0\u002D\u002E";
+                    case '\u002E': return "\0\u0002\0\u002E\u002F";
+                    case '\u002F': return "\0\u0002\0\u002F\u0030";
+                    case '\u0030': return "\0\u0002\0\u0030\u0031";
+                    case '\u0031': return "\0\u0002\0\u0031\u0032";
+                    case '\u0032': return "\0\u0002\0\u0032\u0033";
+                    case '\u0033': return "\0\u0002\0\u0033\u0034";
+                    case '\u0034': return "\0\u0002\0\u0034\u0035";
+                    case '\u0035': return "\0\u0002\0\u0035\u0036";
+                    case '\u0036': return "\0\u0002\0\u0036\u0037";
+                    case '\u0037': return "\0\u0002\0\u0037\u0038";
+                    case '\u0038': return "\0\u0002\0\u0038\u0039";
+                    case '\u0039': return "\0\u0002\0\u0039\u003A";
+                    case '\u003A': return "\0\u0002\0\u003A\u003B";
+                    case '\u003B': return "\0\u0002\0\u003B\u003C";
+                    case '\u003F': return "\0\u0002\0\u003F\u0040";
+                    case '\u0040': return "\0\u0002\0\u0040\u0041";
+                    case '\u005B': return "\0\u0002\0\u005B\u005C";
+                    case '\u005C': return "\0\u0002\0\u005C\u005D";
+                    case '\u005D': return "\0\u0002\0\u005D\u005E";
+                    case '\u005F': return "\0\u0002\0\u005F\u0060";
+                    case '\u007B': return "\0\u0002\0\u007B\u007C";
+                    case '\u007D': return "\0\u0002\0\u007D\u007E";
+                    case '\u007F': return "\0\u0002\0\u007F\u0080";
+                }
+                AddAsciiCharIgnoreCaseEquivalence(c, ref vsb, caseInsensitive);
+            }
+            else if (!ParticipatesInCaseConversion(c))
+            {
+                resultIsCaseInsensitive = false;
+                vsb.Append(c);
+            }
+            else
+            {
+                resultIsCaseInsensitive = true;
+                vsb.Append(char.ToLower(c, caseInsensitive));
+            }
+
+            string result = CharsToStringClass(vsb.AsSpan());
+            vsb.Dispose();
+            return result;
+        }
+
+        private static unsafe string CharsToStringClass(ReadOnlySpan<char> chars)
+        {
+#if DEBUG
+            // Make sure they're all sorted with no duplicates
+            for (int index = 0; index < chars.Length - 1; index++)
+            {
+                Debug.Assert(chars[index] < chars[index + 1]);
+            }
+#endif
+
+            // If there aren't any chars, just return an empty class.
+            if (chars.Length == 0)
+            {
+                return EmptyClass;
+            }
+
+            // Count how many characters there actually are.  All but the very last possible
+            // char value will have two characters, one for the inclusive beginning of range
+            // and one for the exclusive end of range.
+            int count = chars.Length * 2;
+            if (chars[chars.Length - 1] == LastChar)
+            {
+                count--;
+            }
+
+            // Get the pointer/length of the span to be able to pass it into string.Create.
+            fixed (char* charsPtr = chars)
+            {
+#if REGEXGENERATOR
+                return StringExtensions.Create(
+#else
+                return string.Create(
+#endif
+                    SetStartIndex + count, ((IntPtr)charsPtr, chars.Length), static (span, state) =>
+                {
+                    // Reconstruct the span now that we're inside of the lambda.
+                    ReadOnlySpan<char> chars = new ReadOnlySpan<char>((char*)state.Item1, state.Length);
+
+                    // Fill in the set string
+                    span[FlagsIndex] = (char)0;
+                    span[CategoryLengthIndex] = (char)0;
+                    span[SetLengthIndex] = (char)(span.Length - SetStartIndex);
+                    int i = SetStartIndex;
+                    foreach (char c in chars)
+                    {
+                        span[i++] = c;
+                        if (c != LastChar)
+                        {
+                            span[i++] = (char)(c + 1);
+                        }
+                    }
+                    Debug.Assert(i == span.Length);
+                });
+            }
+        }
+
+        /// <summary>Tries to create from a RegexOptions.IgnoreCase set string a new set string that can be used without RegexOptions.IgnoreCase.</summary>
+        /// <param name="set">The original set string from a RegexOptions.IgnoreCase node.</param>
+        /// <param name="culture">The culture in use.</param>
+        /// <returns>A new set string if one could be created.</returns>
+        public static string? MakeCaseSensitiveIfPossible(string set, CultureInfo culture)
+        {
+            if (IsNegated(set))
+            {
+                return null;
+            }
+
+            // We'll eventually need a more robust way to do this for any set.  For now, we iterate through each character
+            // in the set, and to avoid spending lots of time doing so, we limit the number of characters.  This approach also
+            // limits the structure of the sets allowed, e.g. they can't be negated, can't use subtraction, etc.
+            Span<char> setChars = stackalloc char[64]; // arbitary limit chosen to include common groupings like all ASCII letters and digits
+
+            // Try to get the set's characters.
+            int setCharsCount = GetSetChars(set, setChars);
+            if (setCharsCount == 0)
+            {
+                return null;
+            }
+
+            // Enumerate all the characters and add all characters that form their case folding equivalence class.
+            var rcc = new RegexCharClass();
+            var vsb = new ValueStringBuilder(stackalloc char[4]);
+            foreach (char c in setChars.Slice(0, setCharsCount))
+            {
+                if (c >= 128 || c == 'i' || c == 'I')
+                {
+                    return null;
+                }
+
+                vsb.Length = 0;
+                AddAsciiCharIgnoreCaseEquivalence(c, ref vsb, culture);
+                foreach (char v in vsb.AsSpan())
+                {
+                    rcc.AddChar(v);
+                }
+            }
+
+            // Return the constructed class.
+            return rcc.ToStringClass();
+        }
+
+        private static void AddAsciiCharIgnoreCaseEquivalence(char c, ref ValueStringBuilder vsb, CultureInfo culture)
+        {
+            Debug.Assert(c < 128, $"Expected ASCII, got {(int)c}");
+            Debug.Assert(c != 'i' && c != 'I', "'i' currently doesn't work correctly in all cultures");
+
+            char upper = char.ToUpper(c, culture);
+            char lower = char.ToLower(c, culture);
+
+            if (upper < lower)
+            {
+                vsb.Append(upper);
+            }
+            vsb.Append(lower);
+            if (upper > lower)
+            {
+                vsb.Append(upper);
+            }
+
+            if (c == 'k' || c == 'K')
+            {
+                vsb.Append((char)0x212A); // kelvin sign
+            }
+        }
+        #endregion
+
          /// <summary>
          /// Constructs the string representation of the class.
          /// </summary>
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs

index d8700eb..6f6c8cd 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
@@ -16,6 +16,7 @@
  using System.Collections;
  using System.Diagnostics;
  using System.Diagnostics.CodeAnalysis;
+using System.Globalization;
  
  namespace System.Text.RegularExpressions
  {
@@ -96,35 +97,25 @@ namespace System.Text.RegularExpressions
          public readonly RegexTree Tree;                                                 // the optimized parse tree
          public readonly int[] Codes;                                                    // the code
          public readonly string[] Strings;                                               // the string/set table
-        public readonly int[]?[] StringsAsciiLookup;                                    // the ASCII lookup table optimization for the sets in Strings
+        public readonly uint[]?[] StringsAsciiLookup;                                   // the ASCII lookup table optimization for the sets in Strings
          public readonly int TrackCount;                                                 // how many instructions use backtracking
          public readonly Hashtable? Caps;                                                // mapping of user group numbers -> impl group slots
          public readonly int CapSize;                                                    // number of impl group slots
-        public readonly (string CharClass, bool CaseInsensitive)[]? LeadingCharClasses; // the set of candidate first characters, if available.  Each entry corresponds to the next char in the input.
-        public int[]? LeadingCharClassAsciiLookup;                                      // the ASCII lookup table optimization for LeadingCharClasses[0], if it exists; only used by the interpreter
-        public readonly RegexBoyerMoore? BoyerMoorePrefix;                              // the fixed prefix string as a Boyer-Moore machine, if available
-        public readonly int LeadingAnchor;                                              // the leading anchor, if one exists (RegexPrefixAnalyzer.Bol, etc)
          public readonly bool RightToLeft;                                               // true if right to left
+        public readonly RegexFindOptimizations FindOptimizations;
  
-        public RegexCode(RegexTree tree, int[] codes, string[] strings, int trackcount,
-                         Hashtable? caps, int capsize,
-                         RegexBoyerMoore? boyerMoorePrefix,
-                         (string CharClass, bool CaseInsensitive)[]? leadingCharClasses,
-                         int leadingAnchor, bool rightToLeft)
+        public RegexCode(RegexTree tree, CultureInfo culture, int[] codes, string[] strings, int trackcount,
+                         Hashtable? caps, int capsize)
          {
-            Debug.Assert(boyerMoorePrefix is null || leadingCharClasses is null);
-
              Tree = tree;
              Codes = codes;
              Strings = strings;
-            StringsAsciiLookup = new int[strings.Length][];
+            StringsAsciiLookup = new uint[strings.Length][];
              TrackCount = trackcount;
              Caps = caps;
              CapSize = capsize;
-            BoyerMoorePrefix = boyerMoorePrefix;
-            LeadingCharClasses = leadingCharClasses;
-            LeadingAnchor = leadingAnchor;
-            RightToLeft = rightToLeft;
+            RightToLeft = (tree.Options & RegexOptions.RightToLeft) != 0;
+            FindOptimizations = new RegexFindOptimizations(tree, culture);
          }
  
          public static bool OpcodeBacktracks(int Op)
@@ -409,26 +400,8 @@ namespace System.Text.RegularExpressions
              var sb = new StringBuilder();
  
              sb.AppendLine($"Direction:  {(RightToLeft ? "right-to-left" : "left-to-right")}");
-            sb.AppendLine($"Anchor:     {RegexPrefixAnalyzer.AnchorDescription(LeadingAnchor)}");
+            sb.AppendLine($"Anchor:     {RegexPrefixAnalyzer.AnchorDescription(FindOptimizations.LeadingAnchor)}");
              sb.AppendLine();
-
-            if (BoyerMoorePrefix != null)
-            {
-                sb.AppendLine("Boyer-Moore:");
-                sb.AppendLine(BoyerMoorePrefix.Dump("    "));
-                sb.AppendLine();
-            }
-
-            if (LeadingCharClasses != null)
-            {
-                sb.AppendLine("First Chars:");
-                for (int i = 0; i < LeadingCharClasses.Length; i++)
-                {
-                    sb.AppendLine($"{i}: {RegexCharClass.SetDescription(LeadingCharClasses[i].CharClass)}");
-                }
-                sb.AppendLine();
-            }
-
              for (int i = 0; i < Codes.Length; i += OpcodeSize(Codes[i]))
              {
                  sb.AppendLine(OpcodeDescription(i));
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs

index e288567..49b88d3 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
@@ -62,6 +62,9 @@ namespace System.Text.RegularExpressions
          private static readonly MethodInfo s_spanIndexOfSpan = typeof(MemoryExtensions).GetMethod("IndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
          private static readonly MethodInfo s_spanIndexOfAnyCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char));
          private static readonly MethodInfo s_spanIndexOfAnyCharCharChar = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char));
+        private static readonly MethodInfo s_spanIndexOfAnySpan = typeof(MemoryExtensions).GetMethod("IndexOfAny", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
+        private static readonly MethodInfo s_spanLastIndexOfChar = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), Type.MakeGenericMethodParameter(0) })!.MakeGenericMethod(typeof(char));
+        private static readonly MethodInfo s_spanLastIndexOfSpan = typeof(MemoryExtensions).GetMethod("LastIndexOf", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
          private static readonly MethodInfo s_spanSliceIntMethod = typeof(ReadOnlySpan<char>).GetMethod("Slice", new Type[] { typeof(int) })!;
          private static readonly MethodInfo s_spanSliceIntIntMethod = typeof(ReadOnlySpan<char>).GetMethod("Slice", new Type[] { typeof(int), typeof(int) })!;
          private static readonly MethodInfo s_spanStartsWith = typeof(MemoryExtensions).GetMethod("StartsWith", new Type[] { typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)), typeof(ReadOnlySpan<>).MakeGenericType(Type.MakeGenericMethodParameter(0)) })!.MakeGenericMethod(typeof(char));
@@ -90,9 +93,6 @@ namespace System.Text.RegularExpressions
          protected RegexCode? _code;                                                // the RegexCode object
          protected int[]? _codes;                                                   // the RegexCodes being translated
          protected string[]? _strings;                                              // the stringtable associated with the RegexCodes
-        protected (string CharClass, bool CaseInsensitive)[]? _leadingCharClasses; // the possible first chars computed by RegexPrefixAnalyzer
-        protected RegexBoyerMoore? _boyerMoorePrefix;                              // a prefix as a boyer-moore machine
-        protected int _leadingAnchor;                                              // the set of anchors
          protected bool _hasTimeout;                                                // whether the regex has a non-infinite timeout
  
          private Label[]? _labels;                                                  // a label for every operation in _codes
@@ -928,20 +928,20 @@ namespace System.Text.RegularExpressions
              }
              _runtextLocal = DeclareString();
              _textInfoLocal = null;
-            if (!_options.HasFlag(RegexOptions.CultureInvariant))
+            if ((_options & RegexOptions.CultureInvariant) == 0)
              {
-                bool needsCulture = _options.HasFlag(RegexOptions.IgnoreCase) || _boyerMoorePrefix?.CaseInsensitive == true;
-                if (!needsCulture && _leadingCharClasses != null)
+                bool needsCulture = _code.FindOptimizations.FindMode switch
                  {
-                    for (int i = 0; i < _leadingCharClasses.Length; i++)
-                    {
-                        if (_leadingCharClasses[i].CaseInsensitive)
-                        {
-                            needsCulture = true;
-                            break;
-                        }
-                    }
-                }
+                    FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive or
+                    FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive or
+                    FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive or
+                    FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive or
+                    FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive => true,
+
+                    _ when _code.FindOptimizations.FixedDistanceSets is List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets => sets.Exists(set => set.CaseInsensitive),
+
+                    _ => false,
+                };
  
                  if (needsCulture)
                  {
@@ -1012,43 +1012,59 @@ namespace System.Text.RegularExpressions
              Ret();
              MarkLabel(finishedLengthCheck);
  
-            GenerateAnchorChecks();
-
-            if (_boyerMoorePrefix is RegexBoyerMoore { NegativeUnicode: null } rbm)
-            {
-                if (rbm.PatternSupportsIndexOf)
-                {
-                    GenerateIndexOf(rbm.Pattern);
-                }
-                else
-                {
-                    GenerateBoyerMoore(rbm);
-                }
-            }
-            else if (_leadingCharClasses is not null)
+            // Emit any anchors.
+            if (GenerateAnchors())
              {
-                if (_code.RightToLeft)
-                {
-                    GenerateLeadingCharacter_RightToLeft();
-                }
-                else
-                {
-                    GenerateLeadingCharacter_LeftToRight();
-                }
+                return;
              }
-            else
+
+            // Either anchors weren't specified, or they don't completely root all matches to a specific location.
+
+            switch (_code.FindOptimizations.FindMode)
              {
-                // return true;
-                Ldc(1);
-                Ret();
+                case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
+                    Debug.Assert(!string.IsNullOrEmpty(_code.FindOptimizations.LeadingCaseSensitivePrefix));
+                    GenerateIndexOf_LeftToRight(_code.FindOptimizations.LeadingCaseSensitivePrefix);
+                    break;
+
+                case FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive:
+                    Debug.Assert(!string.IsNullOrEmpty(_code.FindOptimizations.LeadingCaseSensitivePrefix));
+                    GenerateIndexOf_RightToLeft(_code.FindOptimizations.LeadingCaseSensitivePrefix);
+                    break;
+
+                case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
+                case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive:
+                case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive:
+                case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive:
+                    Debug.Assert(_code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
+                    GenerateFixedSet_LeftToRight();
+                    break;
+
+                case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive:
+                case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive:
+                    Debug.Assert(_code.FindOptimizations.FixedDistanceSets is { Count: > 0 });
+                    GenerateFixedSet_RightToLeft();
+                    break;
+
+                default:
+                    Debug.Fail($"Unexpected mode: {_code.FindOptimizations.FindMode}");
+                    goto case FindNextStartingPositionMode.NoSearch;
+
+                case FindNextStartingPositionMode.NoSearch:
+                    // return true;
+                    Ldc(1);
+                    Ret();
+                    break;
              }
  
-            void GenerateAnchorChecks()
+            // Emits any anchors.  Returns true if the anchor roots any match to a specific location and thus no further
+            // searching is required; otherwise, false.
+            bool GenerateAnchors()
              {
                  // Generate anchor checks.
-                if ((_leadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0)
+                if ((_code.FindOptimizations.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End | RegexPrefixAnalyzer.Bol)) != 0)
                  {
-                    switch (_leadingAnchor)
+                    switch (_code.FindOptimizations.LeadingAnchor)
                      {
                          case RegexPrefixAnalyzer.Beginning:
                              {
@@ -1072,7 +1088,7 @@ namespace System.Text.RegularExpressions
                              }
                              Ldc(1);
                              Ret();
-                            return;
+                            return true;
  
                          case RegexPrefixAnalyzer.Start:
                              {
@@ -1092,7 +1108,7 @@ namespace System.Text.RegularExpressions
                              }
                              Ldc(1);
                              Ret();
-                            return;
+                            return true;
  
                          case RegexPrefixAnalyzer.EndZ:
                              {
@@ -1134,9 +1150,9 @@ namespace System.Text.RegularExpressions
                              }
                              Ldc(1);
                              Ret();
-                            return;
+                            return true;
  
-                        case RegexPrefixAnalyzer.End when minRequiredLength == 0:  // if it's > 0, we already output a more stringent check
+                        case RegexPrefixAnalyzer.End:
                              {
                                  Label l1 = DefineLabel();
                                  Ldloc(_runtextposLocal);
@@ -1157,16 +1173,16 @@ namespace System.Text.RegularExpressions
                              }
                              Ldc(1);
                              Ret();
-                            return;
+                            return true;
  
-                        case RegexPrefixAnalyzer.Bol when !_code.RightToLeft: // don't bother optimizing for the niche case of RegexOptions.RightToLeft | RegexOptions.Multiline
+                        case RegexPrefixAnalyzer.Bol:
                              {
                                  // Optimize the handling of a Beginning-Of-Line (BOL) anchor.  BOL is special, in that unlike
                                  // other anchors like Beginning, there are potentially multiple places a BOL can match.  So unlike
                                  // the other anchors, which all skip all subsequent processing if found, with BOL we just use it
-                                // to boost our position to the next line, and then continue normally with any Boyer-Moore or
-                                // leading char class searches.
+                                // to boost our position to the next line, and then continue normally with any prefix or char class searches.
  
+                                Debug.Assert(!_code.RightToLeft, "RightToLeft isn't implemented and should have been filtered out previously");
                                  Label atBeginningOfLine = DefineLabel();
  
                                  // if (runtextpos > runtextbeg...
@@ -1218,218 +1234,11 @@ namespace System.Text.RegularExpressions
                              break;
                      }
                  }
-            }
-
-            void GenerateBoyerMoore(RegexBoyerMoore rbm)
-            {
-                LocalBuilder limitLocal;
-                int beforefirst;
-                int last;
-                if (!_code.RightToLeft)
-                {
-                    limitLocal = _runtextendLocal;
-                    beforefirst = -1;
-                    last = rbm.Pattern.Length - 1;
-                }
-                else
-                {
-                    limitLocal = _runtextbegLocal!;
-                    beforefirst = rbm.Pattern.Length;
-                    last = 0;
-                }
-
-                int chLast = rbm.Pattern[last];
  
-                // string runtext = this.runtext;
-                Mvfldloc(s_runtextField, _runtextLocal);
-
-                // runtextpos += pattern.Length - 1; // advance to match last character
-                Ldloc(_runtextposLocal);
-                if (!_code.RightToLeft)
-                {
-                    Ldc(rbm.Pattern.Length - 1);
-                    Add();
-                }
-                else
-                {
-                    Ldc(rbm.Pattern.Length);
-                    Sub();
-                }
-                Stloc(_runtextposLocal);
-
-                Label lStart = DefineLabel();
-                Br(lStart);
-
-                // DefaultAdvance:
-                // offset = pattern.Length;
-                Label lDefaultAdvance = DefineLabel();
-                MarkLabel(lDefaultAdvance);
-                Ldc(_code.RightToLeft ? -rbm.Pattern.Length : rbm.Pattern.Length);
-
-                // Advance:
-                // runtextpos += offset;
-                Label lAdvance = DefineLabel();
-                MarkLabel(lAdvance);
-                Ldloc(_runtextposLocal);
-                Add();
-                Stloc(_runtextposLocal);
-
-                // Start:
-                // if (runtextpos >= runtextend) goto returnFalse;
-                MarkLabel(lStart);
-                Ldloc(_runtextposLocal);
-                Ldloc(limitLocal);
-                if (!_code.RightToLeft)
-                {
-                    BgeFar(returnFalse);
-                }
-                else
-                {
-                    BltFar(returnFalse);
-                }
-
-                // ch = runtext[runtextpos];
-                Rightchar();
-                if (rbm.CaseInsensitive)
-                {
-                    CallToLower();
-                }
-
-                Label lPartialMatch = DefineLabel();
-                using (RentedLocalBuilder chLocal = RentInt32Local())
-                {
-                    Stloc(chLocal);
-                    Ldloc(chLocal);
-                    Ldc(chLast);
-
-                    // if (ch == lastChar) goto partialMatch;
-                    BeqFar(lPartialMatch);
-
-                    // ch -= lowAscii;
-                    // if (ch > (highAscii - lowAscii)) goto defaultAdvance;
-                    Ldloc(chLocal);
-                    Ldc(rbm.LowASCII);
-                    Sub();
-                    Stloc(chLocal);
-                    Ldloc(chLocal);
-                    Ldc(rbm.HighASCII - rbm.LowASCII);
-                    BgtUn(lDefaultAdvance);
-
-                    // int offset = "lookupstring"[num];
-                    // goto advance;
-                    int negativeRange = rbm.HighASCII - rbm.LowASCII + 1;
-                    if (negativeRange > 1)
-                    {
-                        // Create a string to store the lookup table we use to find the offset.
-                        Debug.Assert(rbm.Pattern.Length <= char.MaxValue, "RegexBoyerMoore should have limited the size allowed.");
-                        string negativeLookup = string.Create(negativeRange, (rbm, beforefirst), static (span, state) =>
-                        {
-                            // Store the offsets into the string.  RightToLeft has negative offsets, so to support it with chars (unsigned), we negate
-                            // the values to be stored in the string, and then at run time after looking up the offset in the string, negate it again.
-                            for (int i = 0; i < span.Length; i++)
-                            {
-                                int offset = state.rbm.NegativeASCII[i + state.rbm.LowASCII];
-                                if (offset == state.beforefirst)
-                                {
-                                    offset = state.rbm.Pattern.Length;
-                                }
-                                else if (state.rbm.RightToLeft)
-                                {
-                                    offset = -offset;
-                                }
-                                Debug.Assert(offset >= 0 && offset <= char.MaxValue);
-                                span[i] = (char)offset;
-                            }
-                        });
-
-                        // offset = lookupString[ch];
-                        // goto Advance;
-                        Ldstr(negativeLookup);
-                        Ldloc(chLocal);
-                        Call(s_stringGetCharsMethod);
-                        if (_code.RightToLeft)
-                        {
-                            Neg();
-                        }
-                    }
-                    else
-                    {
-                        // offset = value;
-                        Debug.Assert(negativeRange == 1);
-                        int offset = rbm.NegativeASCII[rbm.LowASCII];
-                        if (offset == beforefirst)
-                        {
-                            offset = _code.RightToLeft ? -rbm.Pattern.Length : rbm.Pattern.Length;
-                        }
-                        Ldc(offset);
-                    }
-                    BrFar(lAdvance);
-                }
-
-                // Emit a check for each character from the next to last down to the first.
-                MarkLabel(lPartialMatch);
-                Ldloc(_runtextposLocal);
-                using (RentedLocalBuilder testLocal = RentInt32Local())
-                {
-                    Stloc(testLocal);
-
-                    int prevLabelOffset = int.MaxValue;
-                    Label prevLabel = default;
-                    for (int i = rbm.Pattern.Length - 2; i >= 0; i--)
-                    {
-                        int charindex = _code.RightToLeft ? rbm.Pattern.Length - 1 - i : i;
-
-                        // if (runtext[--test] == pattern[index]) goto lNext;
-                        Ldloc(_runtextLocal);
-                        Ldloc(testLocal);
-                        Ldc(1);
-                        Sub(_code.RightToLeft);
-                        Stloc(testLocal);
-                        Ldloc(testLocal);
-                        Call(s_stringGetCharsMethod);
-                        if (rbm.CaseInsensitive && RegexCharClass.ParticipatesInCaseConversion(rbm.Pattern[charindex]))
-                        {
-                            CallToLower();
-                        }
-                        Ldc(rbm.Pattern[charindex]);
-
-                        if (prevLabelOffset == rbm.Positive[charindex])
-                        {
-                            BneFar(prevLabel);
-                        }
-                        else
-                        {
-                            Label lNext = DefineLabel();
-                            Beq(lNext);
-
-                            // offset = positive[ch];
-                            // goto advance;
-                            prevLabel = DefineLabel();
-                            prevLabelOffset = rbm.Positive[charindex];
-                            MarkLabel(prevLabel);
-                            Ldc(prevLabelOffset);
-                            BrFar(lAdvance);
-
-                            MarkLabel(lNext);
-                        }
-                    }
-
-                    // this.runtextpos = test;
-                    // return true;
-                    Ldthis();
-                    Ldloc(testLocal);
-                    if (_code.RightToLeft)
-                    {
-                        Ldc(1);
-                        Add();
-                    }
-                    Stfld(s_runtextposField);
-                    Ldc(1);
-                    Ret();
-                }
+                return false;
              }
  
-            void GenerateIndexOf(string prefix)
+            void GenerateIndexOf_LeftToRight(string prefix)
              {
                  using RentedLocalBuilder i = RentInt32Local();
  
@@ -1446,11 +1255,7 @@ namespace System.Text.RegularExpressions
                  Call(s_spanIndexOfSpan);
                  Stloc(i);
  
-                // if (i < 0)
-                // {
-                //     base.runtextpos = runtextend;
-                //     return false;
-                // }
+                // if (i < 0) goto ReturnFalse;
                  Ldloc(i);
                  Ldc(0);
                  BltFar(returnFalse);
@@ -1466,105 +1271,135 @@ namespace System.Text.RegularExpressions
                  Ret();
              }
  
-            void GenerateLeadingCharacter_RightToLeft()
+            void GenerateIndexOf_RightToLeft(string prefix)
              {
-                Debug.Assert(_leadingCharClasses.Length == 1, "Only the FirstChars and not MultiFirstChars computation is supported for RightToLeft");
-
-                using RentedLocalBuilder cLocal = RentInt32Local();
-
-                Label l1 = DefineLabel();
-                Label l2 = DefineLabel();
-                Label l3 = DefineLabel();
-                Label l4 = DefineLabel();
-                Label l5 = DefineLabel();
-
-                Mvfldloc(s_runtextField, _runtextLocal);
+                using RentedLocalBuilder i = RentInt32Local();
  
+                // int i = runtext.AsSpan(runtextpos, runtextbeg, runtextpos - runtextbeg).LastIndexOf(prefix);
+                Ldthis();
+                Ldfld(s_runtextField);
+                Ldloc(_runtextbegLocal!);
                  Ldloc(_runtextposLocal);
                  Ldloc(_runtextbegLocal!);
                  Sub();
-                Stloc(cLocal);
+                Call(s_stringAsSpanIntIntMethod);
+                Ldstr(prefix);
+                Call(s_stringAsSpanMethod);
+                Call(s_spanLastIndexOfSpan);
+                Stloc(i);
  
-                if (minRequiredLength == 0) // if minRequiredLength > 0, we already output a more stringent check
-                {
-                    Ldloc(cLocal);
-                    Ldc(0);
-                    BleFar(l4);
-                }
+                // if (i < 0) goto ReturnFalse;
+                Ldloc(i);
+                Ldc(0);
+                BltFar(returnFalse);
  
-                MarkLabel(l1);
-                Ldloc(cLocal);
+                // base.runtextpos = runtextbeg + i + LeadingCaseSensitivePrefix.Length;
+                // return true;
+                Ldthis();
+                Ldloc(_runtextbegLocal!);
+                Ldloc(i);
+                Add();
+                Ldc(prefix.Length);
+                Add();
+                Stfld(s_runtextposField);
                  Ldc(1);
-                Sub();
-                Stloc(cLocal);
+                Ret();
+            }
  
-                Leftcharnext();
+            void GenerateFixedSet_RightToLeft()
+            {
+                (char[]? Chars, string Set, int Distance, bool CaseInsensitive) set = _code.FindOptimizations.FixedDistanceSets![0];
+                Debug.Assert(set.Distance == 0);
  
-                if (!RegexCharClass.IsSingleton(_leadingCharClasses[0].CharClass))
-                {
-                    EmitMatchCharacterClass(_leadingCharClasses[0].CharClass, _leadingCharClasses[0].CaseInsensitive);
-                    Brtrue(l2);
-                }
-                else
+                using RentedLocalBuilder i = RentInt32Local();
+
+                if (set.Chars is { Length: 1 } && !set.CaseInsensitive)
                  {
-                    Ldc(RegexCharClass.SingletonChar(_leadingCharClasses[0].CharClass));
-                    Beq(l2);
-                }
+                    // int i = runtext.AsSpan(runtextpos, runtextbeg, runtextpos - runtextbeg).LastIndexOf(set.Chars[0]);
+                    Ldthis();
+                    Ldfld(s_runtextField);
+                    Ldloc(_runtextbegLocal!);
+                    Ldloc(_runtextposLocal);
+                    Ldloc(_runtextbegLocal!);
+                    Sub();
+                    Call(s_stringAsSpanIntIntMethod);
+                    Ldc(set.Chars[0]);
+                    Call(s_spanLastIndexOfChar);
+                    Stloc(i);
  
-                MarkLabel(l5);
+                    // if (i < 0) goto ReturnFalse;
+                    Ldloc(i);
+                    Ldc(0);
+                    BltFar(returnFalse);
  
-                Ldloc(cLocal);
-                Ldc(0);
-                if (!RegexCharClass.IsSingleton(_leadingCharClasses[0].CharClass))
-                {
-                    BgtFar(l1);
+                    // base.runtextpos = runtextbeg + i + 1;
+                    // return true;
+                    Ldthis();
+                    Ldloc(_runtextbegLocal!);
+                    Ldloc(i);
+                    Add();
+                    Ldc(1);
+                    Add();
+                    Stfld(s_runtextposField);
+                    Ldc(1);
+                    Ret();
                  }
                  else
                  {
-                    Bgt(l1);
-                }
+                    Label condition = DefineLabel();
+                    Label increment = DefineLabel();
+                    Label body = DefineLabel();
  
-                Ldc(0);
-                Br(l3);
+                    Mvfldloc(s_runtextField, _runtextLocal);
  
-                MarkLabel(l2);
-
-                Ldloc(_runtextposLocal);
-                Ldc(1);
-                Sub(_code.RightToLeft);
-                Stloc(_runtextposLocal);
-                Ldc(1);
+                    // for (int i = runtextpos - 1; ...
+                    Ldloc(_runtextposLocal);
+                    Ldc(1);
+                    Sub();
+                    Stloc(i);
+                    BrFar(condition);
+
+                    // if (MatchCharClass(runtext[i], set))
+                    MarkLabel(body);
+                    Ldloc(_runtextLocal);
+                    Ldloc(i);
+                    Call(s_stringGetCharsMethod);
+                    EmitMatchCharacterClass(set.Set, set.CaseInsensitive);
+                    Brfalse(increment);
+
+                    // base.runtextpos = i + 1;
+                    // return true;
+                    Ldthis();
+                    Ldloc(i);
+                    Ldc(1);
+                    Add();
+                    Stfld(s_runtextposField);
+                    Ldc(1);
+                    Ret();
  
-                MarkLabel(l3);
+                    // for (...; ...; i--)
+                    MarkLabel(increment);
+                    Ldloc(i);
+                    Ldc(1);
+                    Sub();
+                    Stloc(i);
  
-                Mvlocfld(_runtextposLocal, s_runtextposField);
-                Ret();
+                    // for (...; i >= runtextbeg; ...)
+                    MarkLabel(condition);
+                    Ldloc(i);
+                    Ldloc(_runtextbegLocal!);
+                    BgeFar(body);
  
-                MarkLabel(l4);
-                Ldc(0);
-                Ret();
+                    BrFar(returnFalse);
+                }
              }
  
-            void GenerateLeadingCharacter_LeftToRight()
+            void GenerateFixedSet_LeftToRight()
              {
-                Debug.Assert(_leadingCharClasses != null && _leadingCharClasses.Length > 0);
-
-                // If minRequiredLength > 0, we already output a more stringent check.  In the rare case
-                // where we were unable to get an accurate enough min required length to ensure it's larger
-                // than the prefixes we calculated, we also need to ensure we have enough spaces for those,
-                // as they also represent a min required length.
-                if (minRequiredLength < _leadingCharClasses.Length)
-                {
-                    // if (runtextpos >= runtextend - (_leadingCharClasses.Length - 1)) goto returnFalse;
-                    Ldloc(_runtextendLocal);
-                    if (_leadingCharClasses.Length > 1)
-                    {
-                        Ldc(_leadingCharClasses.Length - 1);
-                        Sub();
-                    }
-                    Ldloc(_runtextposLocal);
-                    BleFar(returnFalse);
-                }
+                List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? sets = _code.FindOptimizations.FixedDistanceSets;
+                (char[]? Chars, string Set, int Distance, bool CaseInsensitive) primarySet = sets![0];
+                const int MaxSets = 4;
+                int setsToUse = Math.Min(sets.Count, MaxSets);
  
                  using RentedLocalBuilder iLocal = RentInt32Local();
                  using RentedLocalBuilder textSpanLocal = RentReadOnlySpanCharLocal();
@@ -1580,13 +1415,9 @@ namespace System.Text.RegularExpressions
  
                  // If we can use IndexOf{Any}, try to accelerate the skip loop via vectorization to match the first prefix.
                  // We can use it if this is a case-sensitive class with a small number of characters in the class.
-                Span<char> setChars = stackalloc char[3]; // up to 3 characters handled by IndexOf{Any} below
-                int setCharsCount = 0, charClassIndex = 0;
-                bool canUseIndexOf =
-                    !_leadingCharClasses[0].CaseInsensitive &&
-                    (setCharsCount = RegexCharClass.GetSetChars(_leadingCharClasses[0].CharClass, setChars)) > 0 &&
-                    !RegexCharClass.IsNegated(_leadingCharClasses[0].CharClass);
-                bool needLoop = !canUseIndexOf || _leadingCharClasses.Length > 1;
+                int setIndex = 0;
+                bool canUseIndexOf = !primarySet.CaseInsensitive && primarySet.Chars is not null;
+                bool needLoop = !canUseIndexOf || setsToUse > 1;
  
                  Label checkSpanLengthLabel = default;
                  Label charNotInClassLabel = default;
@@ -1606,13 +1437,25 @@ namespace System.Text.RegularExpressions
  
                  if (canUseIndexOf)
                  {
-                    charClassIndex = 1;
+                    setIndex = 1;
  
                      if (needLoop)
                      {
-                        // textSpan.Slice(iLocal)
+                        // textSpan.Slice(iLocal + primarySet.Distance);
                          Ldloca(textSpanLocal);
                          Ldloc(iLocal);
+                        if (primarySet.Distance != 0)
+                        {
+                            Ldc(primarySet.Distance);
+                            Add();
+                        }
+                        Call(s_spanSliceIntMethod);
+                    }
+                    else if (primarySet.Distance != 0)
+                    {
+                        // textSpan.Slice(primarySet.Distance)
+                        Ldloca(textSpanLocal);
+                        Ldc(primarySet.Distance);
                          Call(s_spanSliceIntMethod);
                      }
                      else
@@ -1621,29 +1464,34 @@ namespace System.Text.RegularExpressions
                          Ldloc(textSpanLocal);
                      }
  
-                    switch (setCharsCount)
+                    switch (primarySet.Chars!.Length)
                      {
                          case 1:
                              // tmp = ...IndexOf(setChars[0]);
-                            Ldc(setChars[0]);
+                            Ldc(primarySet.Chars[0]);
                              Call(s_spanIndexOfChar);
                              break;
  
                          case 2:
                              // tmp = ...IndexOfAny(setChars[0], setChars[1]);
-                            Ldc(setChars[0]);
-                            Ldc(setChars[1]);
+                            Ldc(primarySet.Chars[0]);
+                            Ldc(primarySet.Chars[1]);
                              Call(s_spanIndexOfAnyCharChar);
                              break;
  
-                        default: // 3
+                        case 3:
                              // tmp = ...IndexOfAny(setChars[0], setChars[1], setChars[2]});
-                            Debug.Assert(setCharsCount == 3);
-                            Ldc(setChars[0]);
-                            Ldc(setChars[1]);
-                            Ldc(setChars[2]);
+                            Ldc(primarySet.Chars[0]);
+                            Ldc(primarySet.Chars[1]);
+                            Ldc(primarySet.Chars[2]);
                              Call(s_spanIndexOfAnyCharCharChar);
                              break;
+
+                        default:
+                            Ldstr(new string(primarySet.Chars));
+                            Call(s_stringAsSpanMethod);
+                            Call(s_spanIndexOfAnySpan);
+                            break;
                      }
  
                      if (needLoop)
@@ -1672,13 +1520,13 @@ namespace System.Text.RegularExpressions
                          BltFar(returnFalse);
                      }
  
-                    // if (i >= textSpan.Length - (_leadingCharClasses.Length - 1)) goto returnFalse;
-                    if (_leadingCharClasses.Length > 1)
+                    // if (i >= textSpan.Length - (minRequiredLength - 1)) goto returnFalse;
+                    if (sets.Count > 1)
                      {
                          Debug.Assert(needLoop);
                          Ldloca(textSpanLocal);
                          Call(s_spanGetLengthMethod);
-                        Ldc(_leadingCharClasses.Length - 1);
+                        Ldc(minRequiredLength - 1);
                          Sub();
                          Ldloc(iLocal);
                          BleFar(returnFalse);
@@ -1689,20 +1537,20 @@ namespace System.Text.RegularExpressions
                  // if (!CharInClass(textSpan[i + 1], prefix[1], "...")) continue;
                  // if (!CharInClass(textSpan[i + 2], prefix[2], "...")) continue;
                  // ...
-                Debug.Assert(charClassIndex == 0 || charClassIndex == 1);
-                for ( ; charClassIndex < _leadingCharClasses.Length; charClassIndex++)
+                Debug.Assert(setIndex == 0 || setIndex == 1);
+                for ( ; setIndex < sets.Count; setIndex++)
                  {
                      Debug.Assert(needLoop);
                      Ldloca(textSpanLocal);
                      Ldloc(iLocal);
-                    if (charClassIndex > 0)
+                    if (sets[setIndex].Distance != 0)
                      {
-                        Ldc(charClassIndex);
+                        Ldc(sets[setIndex].Distance);
                          Add();
                      }
                      Call(s_spanGetItemMethod);
                      LdindU2();
-                    EmitMatchCharacterClass(_leadingCharClasses[charClassIndex].CharClass, _leadingCharClasses[charClassIndex].CaseInsensitive);
+                    EmitMatchCharacterClass(sets[setIndex].Set, sets[setIndex].CaseInsensitive);
                      BrfalseFar(charNotInClassLabel);
                  }
  
@@ -1726,14 +1574,14 @@ namespace System.Text.RegularExpressions
                      Add();
                      Stloc(iLocal);
  
-                    // for (...; i < span.Length - (_leadingCharClasses.Length - 1); ...);
+                    // for (...; i < span.Length - (minRequiredLength - 1); ...);
                      MarkLabel(checkSpanLengthLabel);
                      Ldloc(iLocal);
                      Ldloca(textSpanLocal);
                      Call(s_spanGetLengthMethod);
-                    if (_leadingCharClasses.Length > 1)
+                    if (setsToUse > 1 || primarySet.Distance != 0)
                      {
-                        Ldc(_leadingCharClasses.Length - 1);
+                        Ldc(minRequiredLength - 1);
                          Sub();
                      }
                      BltFar(loopBody);
@@ -2278,23 +2126,7 @@ namespace System.Text.RegularExpressions
                          break;
  
                      case RegexNode.Concatenate:
-                        int childCount = node.ChildCount();
-                        for (int i = 0; i < childCount; i++)
-                        {
-                            if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd))
-                            {
-                                EmitSpanLengthCheck(requiredLength);
-                                for (; i < exclusiveEnd; i++)
-                                {
-                                    EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: false);
-                                }
-
-                                i--;
-                                continue;
-                            }
-
-                            EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent);
-                        }
+                        EmitConcatenation(node, subsequent, emitLengthChecksIfRequired);
                          break;
  
                      case RegexNode.Capture:
@@ -2339,6 +2171,28 @@ namespace System.Text.RegularExpressions
                  Stfld(s_runtextposField);
              }
  
+            // Emits code for a concatenation
+            void EmitConcatenation(RegexNode node, RegexNode? subsequent, bool emitLengthChecksIfRequired)
+            {
+                int childCount = node.ChildCount();
+                for (int i = 0; i < childCount; i++)
+                {
+                    if (emitLengthChecksIfRequired && node.TryGetJoinableLengthCheckChildRange(i, out int requiredLength, out int exclusiveEnd))
+                    {
+                        EmitSpanLengthCheck(requiredLength);
+                        for (; i < exclusiveEnd; i++)
+                        {
+                            EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent, emitLengthChecksIfRequired: false);
+                        }
+
+                        i--;
+                        continue;
+                    }
+
+                    EmitNode(node.Child(i), i + 1 < childCount ? node.Child(i + 1) : subsequent);
+                }
+            }
+
              // Emits the code to handle a single-character match.
              void EmitSingleChar(RegexNode node, bool emitLengthCheck = true, LocalBuilder? offset = null)
              {
@@ -2362,7 +2216,7 @@ namespace System.Text.RegularExpressions
                  }
                  else
                  {
-                    if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch))
+                    if (IsCaseInsensitive(node))
                      {
                          CallToLower();
                      }
@@ -2595,7 +2449,7 @@ namespace System.Text.RegularExpressions
                      EmitTextSpanOffset();
                      textSpanPos++;
                      LdindU2();
-                    if (caseInsensitive && RegexCharClass.ParticipatesInCaseConversion(s[i]))
+                    if (caseInsensitive)
                      {
                          CallToLower();
                      }
@@ -2865,12 +2719,12 @@ namespace System.Text.RegularExpressions
  
                  Label atomicLoopDoneLabel = DefineLabel();
  
-                Span<char> setChars = stackalloc char[3]; // 3 is max we can use with IndexOfAny
+                Span<char> setChars = stackalloc char[5]; // max optimized by IndexOfAny today
                  int numSetChars = 0;
  
                  if (node.IsNotoneFamily &&
                      maxIterations == int.MaxValue &&
-                    (!IsCaseInsensitive(node) || !RegexCharClass.ParticipatesInCaseConversion(node.Ch)))
+                    (!IsCaseInsensitive(node)))
                  {
                      // For Notone, we're looking for a specific character, as everything until we find
                      // it is consumed by the loop.  If we're unbounded, such as with ".*" and if we're case-sensitive,
@@ -2911,14 +2765,15 @@ namespace System.Text.RegularExpressions
                  else if (node.IsSetFamily &&
                      maxIterations == int.MaxValue &&
                      !IsCaseInsensitive(node) &&
-                    (numSetChars = RegexCharClass.GetSetChars(node.Str!, setChars)) > 1 &&
+                    (numSetChars = RegexCharClass.GetSetChars(node.Str!, setChars)) != 0 &&
                      RegexCharClass.IsNegated(node.Str!))
                  {
-                    // If the set is negated and contains only 2 or 3 characters (if it contained 1 and was negated, it would
+                    // If the set is negated and contains only a few characters (if it contained 1 and was negated, it would
                      // have been reduced to a Notone), we can use an IndexOfAny to find any of the target characters.
                      // As with the notoneloopatomic above, the unbounded constraint is purely for simplicity.
+                    Debug.Assert(numSetChars > 1);
  
-                    // int i = textSpan.Slice(textSpanPos).IndexOfAny(ch1, ch2{, ch3});
+                    // int i = textSpan.Slice(textSpanPos).IndexOfAny(ch1, ch2, ...);
                      if (textSpanPos > 0)
                      {
                          Ldloca(textSpanLocal);
@@ -2929,17 +2784,26 @@ namespace System.Text.RegularExpressions
                      {
                          Ldloc(textSpanLocal);
                      }
-                    Ldc(setChars[0]);
-                    Ldc(setChars[1]);
-                    if (numSetChars == 2)
-                    {
-                        Call(s_spanIndexOfAnyCharChar);
-                    }
-                    else
+                    switch (numSetChars)
                      {
-                        Debug.Assert(numSetChars == 3);
-                        Ldc(setChars[2]);
-                        Call(s_spanIndexOfAnyCharCharChar);
+                        case 2:
+                            Ldc(setChars[0]);
+                            Ldc(setChars[1]);
+                            Call(s_spanIndexOfAnyCharChar);
+                            break;
+
+                        case 3:
+                            Ldc(setChars[0]);
+                            Ldc(setChars[1]);
+                            Ldc(setChars[2]);
+                            Call(s_spanIndexOfAnyCharCharChar);
+                            break;
+
+                        default:
+                            Ldstr(setChars.Slice(0, numSetChars).ToString());
+                            Call(s_stringAsSpanMethod);
+                            Call(s_spanIndexOfSpan);
+                            break;
                      }
                      Stloc(iterationLocal);
  
@@ -3008,7 +2872,7 @@ namespace System.Text.RegularExpressions
                      }
                      else
                      {
-                        if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch))
+                        if (IsCaseInsensitive(node))
                          {
                              CallToLower();
                          }
@@ -3095,7 +2959,7 @@ namespace System.Text.RegularExpressions
                  }
                  else
                  {
-                    if (IsCaseInsensitive(node) && RegexCharClass.ParticipatesInCaseConversion(node.Ch))
+                    if (IsCaseInsensitive(node))
                      {
                          CallToLower();
                      }
@@ -4185,7 +4049,7 @@ namespace System.Text.RegularExpressions
                      }
                      else
                      {
-                        if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)))
+                        if (IsCaseInsensitive())
                          {
                              CallToLower();
                          }
@@ -4231,7 +4095,7 @@ namespace System.Text.RegularExpressions
                                  Add();
                              }
                              Call(s_stringGetCharsMethod);
-                            if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i]))
+                            if (IsCaseInsensitive())
                              {
                                  CallToLower();
                              }
@@ -4274,7 +4138,7 @@ namespace System.Text.RegularExpressions
                              Ldc(str.Length - i);
                              Sub();
                              Call(s_stringGetCharsMethod);
-                            if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(str[i]))
+                            if (IsCaseInsensitive())
                              {
                                  CallToLower();
                              }
@@ -4477,7 +4341,7 @@ namespace System.Text.RegularExpressions
                          }
                          else
                          {
-                            if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)))
+                            if (IsCaseInsensitive())
                              {
                                  CallToLower();
                              }
@@ -4579,14 +4443,14 @@ namespace System.Text.RegularExpressions
  
                          Label loopEnd = DefineLabel();
                          string? set = Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic ? _strings![Operand(0)] : null;
-                        Span<char> setChars = stackalloc char[3];
+                        Span<char> setChars = stackalloc char[5]; // max optimized by IndexOfAny today
                          int numSetChars;
  
                          // If this is a notoneloop{atomic} and we're left-to-right and case-sensitive,
                          // we can use the vectorized IndexOf to search for the target character.
                          if ((Code() == RegexCode.Notoneloop || Code() == RegexCode.Notoneloopatomic) &&
                              !IsRightToLeft() &&
-                            (!IsCaseInsensitive() || !RegexCharClass.ParticipatesInCaseConversion(Operand(0))))
+                            (!IsCaseInsensitive()))
                          {
                              // i = runtext.AsSpan(runtextpos, len).IndexOf(ch);
                              Ldloc(_runtextLocal!);
@@ -4633,29 +4497,39 @@ namespace System.Text.RegularExpressions
                          else if ((Code() == RegexCode.Setloop || Code() == RegexCode.Setloopatomic) &&
                              !IsRightToLeft() &&
                              !IsCaseInsensitive() &&
-                            (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) > 1 &&
+                            (numSetChars = RegexCharClass.GetSetChars(set!, setChars)) != 0 &&
                              RegexCharClass.IsNegated(set!))
                          {
                              // Similarly, if this is a setloop{atomic} and we're left-to-right and case-sensitive,
-                            // and if the set contains only 2 or 3 negated chars, we can use the vectorized IndexOfAny
+                            // and if the set contains only a few negated chars, we can use the vectorized IndexOfAny
                              // to search for those chars.
+                            Debug.Assert(numSetChars > 1);
  
                              // i = runtext.AsSpan(runtextpos, len).IndexOfAny(ch1, ch2{, ch3});
                              Ldloc(_runtextLocal!);
                              Ldloc(_runtextposLocal!);
                              Ldloc(lenLocal);
                              Call(s_stringAsSpanIntIntMethod);
-                            Ldc(setChars[0]);
-                            Ldc(setChars[1]);
-                            if (numSetChars == 2)
-                            {
-                                Call(s_spanIndexOfAnyCharChar);
-                            }
-                            else
+                            switch (numSetChars)
                              {
-                                Debug.Assert(numSetChars == 3);
-                                Ldc(setChars[2]);
-                                Call(s_spanIndexOfAnyCharCharChar);
+                                case 2:
+                                    Ldc(setChars[0]);
+                                    Ldc(setChars[1]);
+                                    Call(s_spanIndexOfAnyCharChar);
+                                    break;
+
+                                case 3:
+                                    Ldc(setChars[0]);
+                                    Ldc(setChars[1]);
+                                    Ldc(setChars[2]);
+                                    Call(s_spanIndexOfAnyCharCharChar);
+                                    break;
+
+                                default:
+                                    Ldstr(setChars.Slice(0, numSetChars).ToString());
+                                    Call(s_stringAsSpanMethod);
+                                    Call(s_spanIndexOfSpan);
+                                    break;
                              }
                              Stloc(iLocal);
  
@@ -4754,7 +4628,7 @@ namespace System.Text.RegularExpressions
                              }
                              else
                              {
-                                if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)))
+                                if (IsCaseInsensitive())
                                  {
                                      CallToLower();
                                  }
@@ -4955,7 +4829,7 @@ namespace System.Text.RegularExpressions
                          }
                          else
                          {
-                            if (IsCaseInsensitive() && RegexCharClass.ParticipatesInCaseConversion(Operand(0)))
+                            if (IsCaseInsensitive())
                              {
                                  CallToLower();
                              }
@@ -5105,21 +4979,34 @@ namespace System.Text.RegularExpressions
  
              // Next, if there's only 2 or 3 chars in the set (fairly common due to the sets we create for prefixes),
              // it's cheaper and smaller to compare against each than it is to use a lookup table.
-            if (!invariant)
+            if (!invariant && !RegexCharClass.IsNegated(charClass))
              {
-                Span<char> setChars = stackalloc char[3];
+                Span<char> setChars = stackalloc char[4];
                  int numChars = RegexCharClass.GetSetChars(charClass, setChars);
-                if (numChars > 0 && !RegexCharClass.IsNegated(charClass))
+                if (numChars is 2 or 3)
                  {
-                    // (ch == setChars[0]) | (ch == setChars[1]) { | (ch == setChars[2]) }
-                    Debug.Assert(numChars == 2 || numChars == 3);
-                    Ldloc(tempLocal);
-                    Ldc(setChars[0]);
-                    Ceq();
-                    Ldloc(tempLocal);
-                    Ldc(setChars[1]);
-                    Ceq();
-                    Or();
+                    if ((setChars[0] | 0x20) == setChars[1]) // special-case common case of an upper and lowercase ASCII letter combination
+                    {
+                        // ((ch | 0x20) == setChars[1])
+                        Ldloc(tempLocal);
+                        Ldc(0x20);
+                        Or();
+                        Ldc(setChars[1]);
+                        Ceq();
+                    }
+                    else
+                    {
+                        // (ch == setChars[0]) | (ch == setChars[1])
+                        Ldloc(tempLocal);
+                        Ldc(setChars[0]);
+                        Ceq();
+                        Ldloc(tempLocal);
+                        Ldc(setChars[1]);
+                        Ceq();
+                        Or();
+                    }
+
+                    // | (ch == setChars[2])
                      if (numChars == 3)
                      {
                          Ldloc(tempLocal);
@@ -5130,6 +5017,27 @@ namespace System.Text.RegularExpressions
  
                      return;
                  }
+                else if (numChars == 4 &&
+                         (setChars[0] | 0x20) == setChars[1] &&
+                         (setChars[2] | 0x20) == setChars[3])
+                {
+                    // ((ch | 0x20) == setChars[1])
+                    Ldloc(tempLocal);
+                    Ldc(0x20);
+                    Or();
+                    Ldc(setChars[1]);
+                    Ceq();
+
+                    // ((ch | 0x20) == setChars[3])
+                    Ldloc(tempLocal);
+                    Ldc(0x20);
+                    Or();
+                    Ldc(setChars[3]);
+                    Ceq();
+
+                    Or();
+                    return;
+                }
              }
  
              using RentedLocalBuilder resultLocal = RentInt32Local();
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs

new file mode 100644 (file)

index 0000000..f1b2858
--- /dev/null
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs
@@ -0,0 +1,664 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Collections.Generic;
+using System.Diagnostics;
+using System.Globalization;
+
+namespace System.Text.RegularExpressions
+{
+    /// <summary>Contains state and provides operations related to finding the next location a match could possibly begin.</summary>
+    internal sealed class RegexFindOptimizations
+    {
+        /// <summary>The minimum required length an input need be to match the pattern.  May be 0.</summary>
+        private readonly int _minRequiredLength;
+        /// <summary>True if the input should be processed right-to-left rather than left-to-right.</summary>
+        private readonly bool _rightToLeft;
+        /// <summary>Provides the ToLower routine for lowercasing characters.</summary>
+        private readonly TextInfo _textInfo;
+        /// <summary>Lookup table used for optimizing ASCII when doing set queries.</summary>
+        private readonly uint[]?[]? _asciiLookups;
+
+        public RegexFindOptimizations(RegexTree tree, CultureInfo culture)
+        {
+            _rightToLeft = (tree.Options & RegexOptions.RightToLeft) != 0;
+            _minRequiredLength = tree.MinRequiredLength;
+            _textInfo = culture.TextInfo;
+
+            // Compute any anchor starting the expression.  If there is one, we won't need to search for anything,
+            // as we can just match at that single location.
+            LeadingAnchor = RegexPrefixAnalyzer.FindLeadingAnchor(tree);
+            if (_rightToLeft)
+            {
+                // Filter out Bol for RightToLeft, as we don't currently optimize for it.
+                LeadingAnchor &= ~RegexPrefixAnalyzer.Bol;
+            }
+            if ((LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End)) != 0)
+            {
+                FindMode = (LeadingAnchor, _rightToLeft) switch
+                {
+                    (RegexPrefixAnalyzer.Beginning, false) => FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning,
+                    (RegexPrefixAnalyzer.Beginning, true) => FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Beginning,
+                    (RegexPrefixAnalyzer.Start, false) => FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Start,
+                    (RegexPrefixAnalyzer.Start, true) => FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Start,
+                    (RegexPrefixAnalyzer.End, false) => FindNextStartingPositionMode.LeadingAnchor_LeftToRight_End,
+                    (RegexPrefixAnalyzer.End, true) => FindNextStartingPositionMode.LeadingAnchor_RightToLeft_End,
+                    (_, false) => FindNextStartingPositionMode.LeadingAnchor_LeftToRight_EndZ,
+                    (_, true) => FindNextStartingPositionMode.LeadingAnchor_RightToLeft_EndZ,
+                };
+                return;
+            }
+
+            // If there's a leading case-sensitive substring, just use IndexOf and inherit all of its optimizations.
+            string caseSensitivePrefix = RegexPrefixAnalyzer.FindCaseSensitivePrefix(tree);
+            if (caseSensitivePrefix.Length > 1)
+            {
+                LeadingCaseSensitivePrefix = caseSensitivePrefix;
+                FindMode = _rightToLeft ?
+                    FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive :
+                    FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive;
+                return;
+            }
+
+            // At this point there are no fast-searchable anchors or case-sensitive prefixes. We can now analyze the
+            // pattern for sets and then use any found sets to determine what kind of search to perform.
+
+            // If we're compiling, then the compilation process already handles sets that reduce to a single literal,
+            // so we can simplify and just always go for the sets.
+            bool dfa = (tree.Options & RegexOptions.NonBacktracking) != 0;
+            bool compiled = (tree.Options & RegexOptions.Compiled) != 0 && !dfa; // for now, we never generate code for NonBacktracking, so treat it as non-compiled
+            bool interpreter = !compiled && !dfa;
+
+            // For interpreter, we want to employ optimizations, but we don't want to make construction significantly
+            // more expensive; someone who wants to pay to do more work can specify Compiled.  So for the interpreter
+            // we focus only on creating a set for the first character.  Same for right-to-left, which is used very
+            // rarely and thus we don't need to invest in special-casing it.
+            if (_rightToLeft)
+            {
+                // Determine a set for anything that can possibly start the expression.
+                if (RegexPrefixAnalyzer.FindFirstCharClass(tree, culture) is (string CharClass, bool CaseInsensitive) set)
+                {
+                    // See if the set is limited to holding only a few characters.
+                    Span<char> scratch = stackalloc char[5]; // max optimized by IndexOfAny today
+                    int scratchCount;
+                    char[]? chars = null;
+                    if (!RegexCharClass.IsNegated(set.CharClass) &&
+                        (scratchCount = RegexCharClass.GetSetChars(set.CharClass, scratch)) > 0)
+                    {
+                        chars = scratch.Slice(0, scratchCount).ToArray();
+                    }
+
+                    if (!compiled &&
+                        chars is { Length: 1 })
+                    {
+                        // The set contains one and only one character, meaning every match starts
+                        // with the same literal value (potentially case-insensitive). Search for that.
+                        FixedDistanceLiteral = (chars[0], 0);
+                        FindMode = (_rightToLeft, set.CaseInsensitive) switch
+                        {
+                            (false, false) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseSensitive,
+                            (false, true) => FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive,
+                            (true, false) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive,
+                            (true, true) => FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive,
+                        };
+                    }
+                    else
+                    {
+                        // The set may match multiple characters.  Search for that.
+                        FixedDistanceSets = new() { (chars, set.CharClass, 0, set.CaseInsensitive) };
+                        FindMode = (_rightToLeft, set.CaseInsensitive) switch
+                        {
+                            (false, false) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive,
+                            (false, true) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive,
+                            (true, false) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive,
+                            (true, true) => FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive,
+                        };
+                        _asciiLookups = new uint[1][];
+                    }
+                }
+                return;
+            }
+
+            // We're now left-to-right only and looking for sets.
+
+            // Build up a list of all of the sets that are a fixed distance from the start of the expression.
+            List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? fixedDistanceSets = RegexPrefixAnalyzer.FindFixedDistanceSets(tree, culture, thorough: !interpreter);
+            if (fixedDistanceSets is not null)
+            {
+                Debug.Assert(fixedDistanceSets.Count != 0);
+
+                // Determine whether to do searching based on one or more sets or on a single literal. Compiled engines
+                // don't need to special-case literals as they already do codegen to create the optimal lookup based on
+                // the set's characteristics.
+                if (!compiled &&
+                    fixedDistanceSets.Count == 1 &&
+                    fixedDistanceSets[0].Chars is { Length: 1 })
+                {
+                    FixedDistanceLiteral = (fixedDistanceSets[0].Chars![0], fixedDistanceSets[0].Distance);
+                    FindMode = fixedDistanceSets[0].CaseInsensitive ?
+                        FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive :
+                        FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseSensitive;
+                }
+                else
+                {
+                    // Limit how many sets we use to avoid doing lots of unnecessary work.  The list was already
+                    // sorted from best to worst, so just keep the first ones up to our limit.
+                    const int MaxSetsToUse = 3; // arbitrary tuned limit
+                    if (fixedDistanceSets.Count > MaxSetsToUse)
+                    {
+                        fixedDistanceSets.RemoveRange(MaxSetsToUse, fixedDistanceSets.Count - MaxSetsToUse);
+                    }
+
+                    // Store the sets, and compute which mode to use.
+                    FixedDistanceSets = fixedDistanceSets;
+                    FindMode = (fixedDistanceSets.Count == 1 && fixedDistanceSets[0].Distance == 0, fixedDistanceSets[0].CaseInsensitive) switch
+                    {
+                        (true, true) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive,
+                        (true, false) => FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive,
+                        (false, true) => FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive,
+                        (false, false) => FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive,
+                    };
+                    _asciiLookups = new uint[fixedDistanceSets.Count][];
+                }
+                return;
+            }
+        }
+
+        /// <summary>Gets the selected mode for performing the next <see cref="TryFindNextStartingPosition"/> operation</summary>
+        public FindNextStartingPositionMode FindMode { get; } = FindNextStartingPositionMode.NoSearch;
+
+        /// <summary>Gets the leading anchor, if one exists (RegexPrefixAnalyzer.Bol, etc).</summary>
+        public int LeadingAnchor { get; }
+
+        /// <summary>Gets the leading prefix.  May be an empty string.</summary>
+        public string LeadingCaseSensitivePrefix { get; } = string.Empty;
+
+        /// <summary>When in fixed distance literal mode, gets the literal and how far it is from the start of the pattern.</summary>
+        public (char Literal, int Distance) FixedDistanceLiteral { get; }
+
+        /// <summary>When in fixed distance set mode, gets the set and how far it is from the start of the pattern.</summary>
+        /// <remarks>The case-insensitivity of the 0th entry will always match the mode selected, but subsequent entries may not.</remarks>
+        public List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? FixedDistanceSets { get; }
+
+        /// <summary>Try to advance to the next starting position that might be a location for a match.</summary>
+        /// <param name="text">The text to search.</param>
+        /// <param name="pos">The position in <paramref name="text"/>.  This is updated with the found position.</param>
+        /// <param name="beginning">The index in <paramref name="text"/> to consider the beginning for beginning anchor purposes.</param>
+        /// <param name="start">The index in <paramref name="text"/> to consider the start for start anchor purposes.</param>
+        /// <param name="end">The index in <paramref name="text"/> to consider the non-inclusive end of the string.</param>
+        /// <returns>true if a position to attempt a match was found; false if none was found.</returns>
+        public bool TryFindNextStartingPosition(string text, ref int pos, int beginning, int start, int end)
+        {
+            // Return early if we know there's not enough input left to match.
+            if (!_rightToLeft)
+            {
+                if (pos > end - _minRequiredLength)
+                {
+                    pos = end;
+                    return false;
+                }
+            }
+            else
+            {
+                if (pos - _minRequiredLength < beginning)
+                {
+                    pos = beginning;
+                    return false;
+                }
+            }
+
+            // Optimize the handling of a Beginning-Of-Line (BOL) anchor (only for left-to-right).  BOL is special, in that unlike
+            // other anchors like Beginning, there are potentially multiple places a BOL can match.  So unlike
+            // the other anchors, which all skip all subsequent processing if found, with BOL we just use it
+            // to boost our position to the next line, and then continue normally with any searches.
+            if (LeadingAnchor == RegexPrefixAnalyzer.Bol)
+            {
+                // If we're not currently positioned at the beginning of a line (either
+                // the beginning of the string or just after a line feed), find the next
+                // newline and position just after it.
+                Debug.Assert(!_rightToLeft);
+                if (pos > beginning && text[pos - 1] != '\n')
+                {
+                    int newline = text.IndexOf('\n', pos);
+                    if (newline == -1 || newline + 1 > end)
+                    {
+                        pos = end;
+                        return false;
+                    }
+
+                    pos = newline + 1;
+                }
+            }
+
+            switch (FindMode)
+            {
+                // There's an anchor.  For some, we can simply compare against the current position.
+                // For others, we can jump to the relevant location.
+
+                case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Beginning:
+                    if (pos > beginning)
+                    {
+                        pos = end;
+                        return false;
+                    }
+                    return true;
+
+                case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_Start:
+                    if (pos > start)
+                    {
+                        pos = end;
+                        return false;
+                    }
+                    return true;
+
+                case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_EndZ:
+                    if (pos < end - 1)
+                    {
+                        pos = end - 1;
+                    }
+                    return true;
+
+                case FindNextStartingPositionMode.LeadingAnchor_LeftToRight_End:
+                    if (pos < end)
+                    {
+                        pos = end;
+                    }
+                    return true;
+
+                case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Beginning:
+                    if (pos > beginning)
+                    {
+                        pos = beginning;
+                    }
+                    return true;
+
+                case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_Start:
+                    if (pos < start)
+                    {
+                        pos = beginning;
+                        return false;
+                    }
+                    return true;
+
+                case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_EndZ:
+                    if (pos < end - 1 || (pos == end - 1 && text[pos] != '\n'))
+                    {
+                        pos = beginning;
+                        return false;
+                    }
+                    return true;
+
+                case FindNextStartingPositionMode.LeadingAnchor_RightToLeft_End:
+                    if (pos < end)
+                    {
+                        pos = beginning;
+                        return false;
+                    }
+                    return true;
+
+                // There's a case-sensitive prefix.  Search for it with ordinal IndexOf.
+
+                case FindNextStartingPositionMode.LeadingPrefix_LeftToRight_CaseSensitive:
+                    {
+                        int i = text.AsSpan(pos, end - pos).IndexOf(LeadingCaseSensitivePrefix.AsSpan());
+                        if (i >= 0)
+                        {
+                            pos += i;
+                            return true;
+                        }
+
+                        pos = end;
+                        return false;
+                    }
+
+                case FindNextStartingPositionMode.LeadingPrefix_RightToLeft_CaseSensitive:
+                    {
+                        int i = text.AsSpan(beginning, pos - beginning).LastIndexOf(LeadingCaseSensitivePrefix.AsSpan());
+                        if (i >= 0)
+                        {
+                            pos = beginning + i + LeadingCaseSensitivePrefix.Length;
+                            return true;
+                        }
+
+                        pos = beginning;
+                        return false;
+                    }
+
+                // There's a literal at the beginning of the pattern.  Search for it.
+
+                case FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseSensitive:
+                    {
+                        int i = text.AsSpan(beginning, pos - beginning).LastIndexOf(FixedDistanceLiteral.Literal);
+                        if (i >= 0)
+                        {
+                            pos = beginning + i + 1;
+                            return true;
+                        }
+
+                        pos = beginning;
+                        return false;
+                    }
+
+                case FindNextStartingPositionMode.LeadingLiteral_RightToLeft_CaseInsensitive:
+                    {
+                        char ch = FixedDistanceLiteral.Literal;
+                        TextInfo ti = _textInfo;
+
+                        ReadOnlySpan<char> span = text.AsSpan(beginning, pos - beginning);
+                        for (int i = span.Length - 1; i >= 0; i--)
+                        {
+                            if (ti.ToLower(span[i]) == ch)
+                            {
+                                pos = beginning + i + 1;
+                                return true;
+                            }
+                        }
+
+                        pos = beginning;
+                        return false;
+                    }
+
+                // There's a set at the beginning of the pattern.  Search for it.
+
+                case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseSensitive:
+                    {
+                        (char[]? chars, string set, _, _) = FixedDistanceSets![0];
+
+                        ReadOnlySpan<char> span = text.AsSpan(pos, end - pos);
+                        if (chars is not null)
+                        {
+                            int i = span.IndexOfAny(chars);
+                            if (i >= 0)
+                            {
+                                pos += i;
+                                return true;
+                            }
+                        }
+                        else
+                        {
+                            ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
+                            for (int i = 0; i < span.Length; i++)
+                            {
+                                if (RegexCharClass.CharInClass(span[i], set, ref startingAsciiLookup))
+                                {
+                                    pos += i;
+                                    return true;
+                                }
+                            }
+                        }
+
+                        pos = end;
+                        return false;
+                    }
+
+                case FindNextStartingPositionMode.LeadingSet_LeftToRight_CaseInsensitive:
+                    {
+                        ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
+                        string set = FixedDistanceSets![0].Set;
+                        TextInfo ti = _textInfo;
+
+                        ReadOnlySpan<char> span = text.AsSpan(pos, end - pos);
+                        for (int i = 0; i < span.Length; i++)
+                        {
+                            if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref startingAsciiLookup))
+                            {
+                                pos += i;
+                                return true;
+                            }
+                        }
+
+                        pos = end;
+                        return false;
+                    }
+
+                case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseSensitive:
+                    {
+                        ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
+                        string set = FixedDistanceSets![0].Set;
+
+                        ReadOnlySpan<char> span = text.AsSpan(beginning, pos - beginning);
+                        for (int i = span.Length - 1; i >= 0; i--)
+                        {
+                            if (RegexCharClass.CharInClass(span[i], set, ref startingAsciiLookup))
+                            {
+                                pos = beginning + i + 1;
+                                return true;
+                            }
+                        }
+
+                        pos = beginning;
+                        return false;
+                    }
+
+                case FindNextStartingPositionMode.LeadingSet_RightToLeft_CaseInsensitive:
+                    {
+                        ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
+                        string set = FixedDistanceSets![0].Set;
+                        TextInfo ti = _textInfo;
+
+                        ReadOnlySpan<char> span = text.AsSpan(beginning, pos - beginning);
+                        for (int i = span.Length - 1; i >= 0; i--)
+                        {
+                            if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref startingAsciiLookup))
+                            {
+                                pos = beginning + i + 1;
+                                return true;
+                            }
+                        }
+
+                        pos = beginning;
+                        return false;
+                    }
+
+                // There's a literal at a fixed offset from the beginning of the pattern.  Search for it.
+
+                case FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseSensitive:
+                    {
+                        Debug.Assert(FixedDistanceLiteral.Distance <= _minRequiredLength);
+
+                        int i = text.AsSpan(pos + FixedDistanceLiteral.Distance, end - pos - FixedDistanceLiteral.Distance).IndexOf(FixedDistanceLiteral.Literal);
+                        if (i >= 0)
+                        {
+                            pos += i;
+                            return true;
+                        }
+
+                        pos = end;
+                        return false;
+                    }
+
+                case FindNextStartingPositionMode.FixedLiteral_LeftToRight_CaseInsensitive:
+                    {
+                        Debug.Assert(FixedDistanceLiteral.Distance <= _minRequiredLength);
+
+                        char ch = FixedDistanceLiteral.Literal;
+                        TextInfo ti = _textInfo;
+
+                        ReadOnlySpan<char> span = text.AsSpan(pos + FixedDistanceLiteral.Distance, end - pos - FixedDistanceLiteral.Distance);
+                        for (int i = 0; i < span.Length; i++)
+                        {
+                            if (ti.ToLower(span[i]) == ch)
+                            {
+                                pos += i;
+                                return true;
+                            }
+                        }
+
+                        pos = end;
+                        return false;
+                    }
+
+                // There are one or more sets at fixed offsets from the start of the pattern.
+
+                case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseSensitive:
+                    {
+                        List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets = FixedDistanceSets!;
+                        (char[]? primaryChars, string primarySet, int primaryDistance, _) = sets[0];
+                        int endMinusRequiredLength = end - Math.Max(1, _minRequiredLength);
+
+                        if (primaryChars is not null)
+                        {
+                            for (int inputPosition = pos; inputPosition <= endMinusRequiredLength; inputPosition++)
+                            {
+                                int offset = inputPosition + primaryDistance;
+                                int index = text.IndexOfAny(primaryChars, offset, end - offset);
+                                if (index < 0)
+                                {
+                                    break;
+                                }
+
+                                inputPosition = index - primaryDistance;
+                                if (inputPosition > endMinusRequiredLength)
+                                {
+                                    break;
+                                }
+
+                                for (int i = 1; i < sets.Count; i++)
+                                {
+                                    (_, string nextSet, int nextDistance, bool nextCaseInsensitive) = sets[i];
+                                    char c = text[inputPosition + nextDistance];
+                                    if (!RegexCharClass.CharInClass(nextCaseInsensitive ? _textInfo.ToLower(c) : c, nextSet, ref _asciiLookups![i]))
+                                    {
+                                        goto Bumpalong;
+                                    }
+                                }
+
+                                pos = inputPosition;
+                                return true;
+
+                            Bumpalong:;
+                            }
+                        }
+                        else
+                        {
+                            ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
+
+                            for (int inputPosition = pos; inputPosition <= endMinusRequiredLength; inputPosition++)
+                            {
+                                char c = text[inputPosition + primaryDistance];
+                                if (!RegexCharClass.CharInClass(c, primarySet, ref startingAsciiLookup))
+                                {
+                                    goto Bumpalong;
+                                }
+
+                                for (int i = 1; i < sets.Count; i++)
+                                {
+                                    (_, string nextSet, int nextDistance, bool nextCaseInsensitive) = sets[i];
+                                    c = text[inputPosition + nextDistance];
+                                    if (!RegexCharClass.CharInClass(nextCaseInsensitive ? _textInfo.ToLower(c) : c, nextSet, ref _asciiLookups![i]))
+                                    {
+                                        goto Bumpalong;
+                                    }
+                                }
+
+                                pos = inputPosition;
+                                return true;
+
+                            Bumpalong:;
+                            }
+                        }
+
+                        pos = end;
+                        return false;
+                    }
+
+                case FindNextStartingPositionMode.FixedSets_LeftToRight_CaseInsensitive:
+                    {
+                        List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> sets = FixedDistanceSets!;
+                        (_, string primarySet, int primaryDistance, _) = sets[0];
+
+                        int endMinusRequiredLength = end - Math.Max(1, _minRequiredLength);
+                        TextInfo ti = _textInfo;
+                        ref uint[]? startingAsciiLookup = ref _asciiLookups![0];
+
+                        for (int inputPosition = pos; inputPosition <= endMinusRequiredLength; inputPosition++)
+                        {
+                            char c = text[inputPosition + primaryDistance];
+                            if (!RegexCharClass.CharInClass(ti.ToLower(c), primarySet, ref startingAsciiLookup))
+                            {
+                                goto Bumpalong;
+                            }
+
+                            for (int i = 1; i < sets.Count; i++)
+                            {
+                                (_, string nextSet, int nextDistance, bool nextCaseInsensitive) = sets[i];
+                                c = text[inputPosition + nextDistance];
+                                if (!RegexCharClass.CharInClass(nextCaseInsensitive ? _textInfo.ToLower(c) : c, nextSet, ref _asciiLookups![i]))
+                                {
+                                    goto Bumpalong;
+                                }
+                            }
+
+                            pos = inputPosition;
+                            return true;
+
+                        Bumpalong:;
+                        }
+
+                        pos = end;
+                        return false;
+                    }
+
+                // Nothing special to look for.  Just return true indicating this is a valid position to try to match.
+
+                default:
+                    Debug.Assert(FindMode == FindNextStartingPositionMode.NoSearch);
+                    return true;
+            }
+        }
+    }
+
+    /// <summary>Mode to use for searching for the next location of a possible match.</summary>
+    internal enum FindNextStartingPositionMode
+    {
+        /// <summary>A "beginning" anchor at the beginning of the pattern.</summary>
+        LeadingAnchor_LeftToRight_Beginning,
+        /// <summary>A "start" anchor at the beginning of the pattern.</summary>
+        LeadingAnchor_LeftToRight_Start,
+        /// <summary>An "endz" anchor at the beginning of the pattern.  This is rare.</summary>
+        LeadingAnchor_LeftToRight_EndZ,
+        /// <summary>An "end" anchor at the beginning of the pattern.  This is rare.</summary>
+        LeadingAnchor_LeftToRight_End,
+
+        /// <summary>A "beginning" anchor at the beginning of the right-to-left pattern.</summary>
+        LeadingAnchor_RightToLeft_Beginning,
+        /// <summary>A "start" anchor at the beginning of the right-to-left pattern.</summary>
+        LeadingAnchor_RightToLeft_Start,
+        /// <summary>An "endz" anchor at the beginning of the right-to-left pattern.  This is rare.</summary>
+        LeadingAnchor_RightToLeft_EndZ,
+        /// <summary>An "end" anchor at the beginning of the right-to-left pattern.  This is rare.</summary>
+        LeadingAnchor_RightToLeft_End,
+
+        /// <summary>A case-sensitive multi-character substring at the beginning of the pattern.</summary>
+        LeadingPrefix_LeftToRight_CaseSensitive,
+        /// <summary>A case-sensitive multi-character substring at the beginning of the right-to-left pattern.</summary>
+        LeadingPrefix_RightToLeft_CaseSensitive,
+
+        /// <summary>A case-sensitive set starting the pattern.</summary>
+        LeadingSet_LeftToRight_CaseSensitive,
+        /// <summary>A case-insensitive set starting the pattern.</summary>
+        LeadingSet_LeftToRight_CaseInsensitive,
+        /// <summary>A case-sensitive set starting the right-to-left pattern.</summary>
+        LeadingSet_RightToLeft_CaseSensitive,
+        /// <summary>A case-insensitive set starting the right-to-left pattern.</summary>
+        LeadingSet_RightToLeft_CaseInsensitive,
+
+        /// <summary>A case-sensitive single character at a fixed distance from the start of the right-to-left pattern.</summary>
+        LeadingLiteral_RightToLeft_CaseSensitive,
+        /// <summary>A case-insensitive single character at a fixed distance from the start of the right-to-left pattern.</summary>
+        LeadingLiteral_RightToLeft_CaseInsensitive,
+
+        /// <summary>A case-sensitive single character at a fixed distance from the start of the pattern.</summary>
+        FixedLiteral_LeftToRight_CaseSensitive,
+        /// <summary>A case-insensitive single character at a fixed distance from the start of the pattern.</summary>
+        FixedLiteral_LeftToRight_CaseInsensitive,
+
+        /// <summary>One or more sets at a fixed distance from the start of the pattern.  At least the first set is case-sensitive.</summary>
+        FixedSets_LeftToRight_CaseSensitive,
+        /// <summary>One or more sets at a fixed distance from the start of the pattern.  At least the first set is case-insensitive.</summary>
+        FixedSets_LeftToRight_CaseInsensitive,
+
+        /// <summary>Nothing to search for. Nop.</summary>
+        NoSearch,
+    }
+}
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs

index c679c01..4351473 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
@@ -15,7 +15,6 @@ namespace System.Text.RegularExpressions
  
          private readonly RegexCode _code;
          private readonly TextInfo _textInfo;
-        private readonly FindFirstCharMode _findFirstCharMode;
  
          private int _operator;
          private int _codepos;
@@ -29,48 +28,6 @@ namespace System.Text.RegularExpressions
  
              _code = code;
              _textInfo = culture.TextInfo;
-
-            // Determine what searching mode FindFirstChar will employ.
-            if ((_code.LeadingAnchor & (RegexPrefixAnalyzer.Beginning | RegexPrefixAnalyzer.Start | RegexPrefixAnalyzer.EndZ | RegexPrefixAnalyzer.End)) != 0)
-            {
-                _findFirstCharMode = (_code.LeadingAnchor, code.RightToLeft) switch
-                {
-                    (RegexPrefixAnalyzer.Beginning, false) => FindFirstCharMode.LeadingAnchor_LeftToRight_Beginning,
-                    (RegexPrefixAnalyzer.Beginning, true) => FindFirstCharMode.LeadingAnchor_RightToLeft_Beginning,
-                    (RegexPrefixAnalyzer.Start, false) => FindFirstCharMode.LeadingAnchor_LeftToRight_Start,
-                    (RegexPrefixAnalyzer.Start, true) => FindFirstCharMode.LeadingAnchor_RightToLeft_Start,
-                    (RegexPrefixAnalyzer.End, false) => FindFirstCharMode.LeadingAnchor_LeftToRight_End,
-                    (RegexPrefixAnalyzer.End, true) => FindFirstCharMode.LeadingAnchor_RightToLeft_End,
-                    (_, false) => FindFirstCharMode.LeadingAnchor_LeftToRight_EndZ,
-                    (_, true) => FindFirstCharMode.LeadingAnchor_RightToLeft_EndZ,
-                };
-            }
-            else if (code.BoyerMoorePrefix is RegexBoyerMoore rbm)
-            {
-                _findFirstCharMode = rbm.PatternSupportsIndexOf ?
-                    FindFirstCharMode.IndexOf :
-                    FindFirstCharMode.BoyerMoore;
-            }
-            else if (code.LeadingCharClasses is not null)
-            {
-                (string charClass, bool caseInsensitive) = code.LeadingCharClasses[0];
-                bool isSet = !RegexCharClass.IsSingleton(charClass);
-                _findFirstCharMode = (code.RightToLeft, caseInsensitive, isSet) switch
-                {
-                    (false, false, false) => FindFirstCharMode.LeadingCharClass_LeftToRight_CaseSensitive_Singleton,
-                    (false, false, true) => FindFirstCharMode.LeadingCharClass_LeftToRight_CaseSensitive_Set,
-                    (false, true, false) => FindFirstCharMode.LeadingCharClass_LeftToRight_CaseInsensitive_Singleton,
-                    (false, true, true) => FindFirstCharMode.LeadingCharClass_LeftToRight_CaseInsensitive_Set,
-                    (true, false, false) => FindFirstCharMode.LeadingCharClass_RightToLeft_CaseSensitive_Singleton,
-                    (true, false, true) => FindFirstCharMode.LeadingCharClass_RightToLeft_CaseSensitive_Set,
-                    (true, true, false) => FindFirstCharMode.LeadingCharClass_RightToLeft_CaseInsensitive_Singleton,
-                    (true, true, true) => FindFirstCharMode.LeadingCharClass_RightToLeft_CaseInsensitive_Set,
-                };
-            }
-            else
-            {
-                _findFirstCharMode = FindFirstCharMode.NoSearch;
-            }
          }
  
          protected override void InitTrackCount() => runtrackcount = _code.TrackCount;
@@ -372,306 +329,8 @@ namespace System.Text.RegularExpressions
  
          private void Backwardnext() => runtextpos += _rightToLeft ? 1 : -1;
  
-        private enum FindFirstCharMode
-        {
-            LeadingAnchor_LeftToRight_Beginning,
-            LeadingAnchor_LeftToRight_Start,
-            LeadingAnchor_LeftToRight_EndZ,
-            LeadingAnchor_LeftToRight_End,
-
-            LeadingAnchor_RightToLeft_Beginning,
-            LeadingAnchor_RightToLeft_Start,
-            LeadingAnchor_RightToLeft_EndZ,
-            LeadingAnchor_RightToLeft_End,
-
-            IndexOf,
-            BoyerMoore,
-
-            LeadingCharClass_LeftToRight_CaseSensitive_Singleton,
-            LeadingCharClass_LeftToRight_CaseSensitive_Set,
-            LeadingCharClass_LeftToRight_CaseInsensitive_Singleton,
-            LeadingCharClass_LeftToRight_CaseInsensitive_Set,
-
-            LeadingCharClass_RightToLeft_CaseSensitive_Singleton,
-            LeadingCharClass_RightToLeft_CaseSensitive_Set,
-            LeadingCharClass_RightToLeft_CaseInsensitive_Singleton,
-            LeadingCharClass_RightToLeft_CaseInsensitive_Set,
-
-            NoSearch,
-        }
-
-        protected override bool FindFirstChar()
-        {
-            // Return early if we know there's not enough input left to match.
-            if (!_code.RightToLeft)
-            {
-                if (runtextpos > runtextend - _code.Tree.MinRequiredLength)
-                {
-                    runtextpos = runtextend;
-                    return false;
-                }
-            }
-            else
-            {
-                if (runtextpos - _code.Tree.MinRequiredLength < runtextbeg)
-                {
-                    runtextpos = runtextbeg;
-                    return false;
-                }
-            }
-
-            // Optimize the handling of a Beginning-Of-Line (BOL) anchor.  BOL is special, in that unlike
-            // other anchors like Beginning, there are potentially multiple places a BOL can match.  So unlike
-            // the other anchors, which all skip all subsequent processing if found, with BOL we just use it
-            // to boost our position to the next line, and then continue normally with any Boyer-Moore or
-            // leading char class searches.
-            if (_code.LeadingAnchor == RegexPrefixAnalyzer.Bol &&
-                !_code.RightToLeft) // don't bother customizing this optimization for the very niche RTL + Multiline case
-            {
-                // If we're not currently positioned at the beginning of a line (either
-                // the beginning of the string or just after a line feed), find the next
-                // newline and position just after it.
-                if (runtextpos > runtextbeg && runtext![runtextpos - 1] != '\n')
-                {
-                    int newline = runtext.IndexOf('\n', runtextpos);
-                    if (newline == -1 || newline + 1 > runtextend)
-                    {
-                        runtextpos = runtextend;
-                        return false;
-                    }
-
-                    runtextpos = newline + 1;
-                }
-            }
-
-            switch (_findFirstCharMode)
-            {
-                // If the pattern is anchored, we can update our position appropriately and return immediately.
-                // If there's a Boyer-Moore prefix, we can also validate it.
-
-                case FindFirstCharMode.LeadingAnchor_LeftToRight_Beginning:
-                    if (runtextpos > runtextbeg)
-                    {
-                        runtextpos = runtextend;
-                        return false;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                case FindFirstCharMode.LeadingAnchor_LeftToRight_Start:
-                    if (runtextpos > runtextstart)
-                    {
-                        runtextpos = runtextend;
-                        return false;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                case FindFirstCharMode.LeadingAnchor_LeftToRight_EndZ:
-                    if (runtextpos < runtextend - 1)
-                    {
-                        runtextpos = runtextend - 1;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                case FindFirstCharMode.LeadingAnchor_LeftToRight_End:
-                    if (runtextpos < runtextend)
-                    {
-                        runtextpos = runtextend;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                case FindFirstCharMode.LeadingAnchor_RightToLeft_Beginning:
-                    if (runtextpos > runtextbeg)
-                    {
-                        runtextpos = runtextbeg;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                case FindFirstCharMode.LeadingAnchor_RightToLeft_Start:
-                    if (runtextpos < runtextstart)
-                    {
-                        runtextpos = runtextbeg;
-                        return false;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                case FindFirstCharMode.LeadingAnchor_RightToLeft_EndZ:
-                    if (runtextpos < runtextend - 1 || (runtextpos == runtextend - 1 && runtext![runtextpos] != '\n'))
-                    {
-                        runtextpos = runtextbeg;
-                        return false;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                case FindFirstCharMode.LeadingAnchor_RightToLeft_End:
-                    if (runtextpos < runtextend)
-                    {
-                        runtextpos = runtextbeg;
-                        return false;
-                    }
-                    return NoPrefixOrPrefixMatches();
-
-                // There was a prefix.  Scan for it.
-
-                case FindFirstCharMode.IndexOf:
-                    {
-                        int i = runtext.AsSpan(runtextpos, runtextend - runtextpos).IndexOf(_code.BoyerMoorePrefix!.Pattern);
-                        if (i >= 0)
-                        {
-                            runtextpos += i;
-                            return true;
-                        }
-                        runtextpos = runtextend;
-                        return false;
-                    }
-
-                case FindFirstCharMode.BoyerMoore:
-                    runtextpos = _code.BoyerMoorePrefix!.Scan(runtext!, runtextpos, runtextbeg, runtextend);
-                    if (runtextpos >= 0)
-                    {
-                        return true;
-                    }
-                    runtextpos = _code.RightToLeft ? runtextbeg : runtextend;
-                    return false;
-
-                // There's a leading character class. Search for it.
-
-                case FindFirstCharMode.LeadingCharClass_LeftToRight_CaseSensitive_Singleton:
-                    {
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextpos, runtextend - runtextpos);
-                        int i = span.IndexOf(RegexCharClass.SingletonChar(_code.LeadingCharClasses![0].CharClass));
-                        if (i >= 0)
-                        {
-                            runtextpos += i;
-                            return true;
-                        }
-                        runtextpos = runtextend;
-                        return false;
-                    }
-
-                case FindFirstCharMode.LeadingCharClass_LeftToRight_CaseSensitive_Set:
-                    {
-                        string set = _code.LeadingCharClasses![0].CharClass;
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextpos, runtextend - runtextpos);
-                        for (int i = 0; i < span.Length; i++)
-                        {
-                            if (RegexCharClass.CharInClass(span[i], set, ref _code.LeadingCharClassAsciiLookup))
-                            {
-                                runtextpos += i;
-                                return true;
-                            }
-                        }
-                        runtextpos = runtextend;
-                        return false;
-                    }
-
-                case FindFirstCharMode.LeadingCharClass_LeftToRight_CaseInsensitive_Singleton:
-                    {
-                        char ch = RegexCharClass.SingletonChar(_code.LeadingCharClasses![0].CharClass);
-                        TextInfo ti = _textInfo;
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextpos, runtextend - runtextpos);
-                        for (int i = 0; i < span.Length; i++)
-                        {
-                            if (ch == ti.ToLower(span[i]))
-                            {
-                                runtextpos += i;
-                                return true;
-                            }
-                        }
-                        runtextpos = runtextend;
-                        return false;
-                    }
-
-                case FindFirstCharMode.LeadingCharClass_LeftToRight_CaseInsensitive_Set:
-                    {
-                        string set = _code.LeadingCharClasses![0].CharClass;
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextpos, runtextend - runtextpos);
-                        TextInfo ti = _textInfo;
-                        for (int i = 0; i < span.Length; i++)
-                        {
-                            if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref _code.LeadingCharClassAsciiLookup))
-                            {
-                                runtextpos += i;
-                                return true;
-                            }
-                        }
-                        runtextpos = runtextend;
-                        return false;
-                    }
-
-                case FindFirstCharMode.LeadingCharClass_RightToLeft_CaseSensitive_Singleton:
-                    {
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextbeg, runtextpos - runtextbeg);
-                        int i = span.LastIndexOf(RegexCharClass.SingletonChar(_code.LeadingCharClasses![0].CharClass));
-                        if (i >= 0)
-                        {
-                            runtextpos = runtextbeg + i + 1;
-                            return true;
-                        }
-                        runtextpos = runtextbeg;
-                        return false;
-                    }
-
-                case FindFirstCharMode.LeadingCharClass_RightToLeft_CaseSensitive_Set:
-                    {
-                        string set = _code.LeadingCharClasses![0].CharClass;
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextbeg, runtextpos - runtextbeg);
-                        for (int i = span.Length - 1; i >= 0; i--)
-                        {
-                            if (RegexCharClass.CharInClass(span[i], set, ref _code.LeadingCharClassAsciiLookup))
-                            {
-                                runtextpos = runtextbeg + i + 1;
-                                return true;
-                            }
-                        }
-                        runtextpos = runtextbeg;
-                        return false;
-                    }
-
-                case FindFirstCharMode.LeadingCharClass_RightToLeft_CaseInsensitive_Singleton:
-                    {
-                        char ch = RegexCharClass.SingletonChar(_code.LeadingCharClasses![0].CharClass);
-                        TextInfo ti = _textInfo;
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextbeg, runtextpos - runtextbeg);
-                        for (int i = span.Length - 1; i >= 0; i--)
-                        {
-                            if (ch == ti.ToLower(span[i]))
-                            {
-                                runtextpos = runtextbeg + i + 1;
-                                return true;
-                            }
-                        }
-                        runtextpos = runtextbeg;
-                        return false;
-                    }
-
-                case FindFirstCharMode.LeadingCharClass_RightToLeft_CaseInsensitive_Set:
-                    {
-                        string set = _code.LeadingCharClasses![0].CharClass;
-                        ReadOnlySpan<char> span = runtext.AsSpan(runtextbeg, runtextpos - runtextbeg);
-                        TextInfo ti = _textInfo;
-                        for (int i = span.Length - 1; i >= 0; i--)
-                        {
-                            if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref _code.LeadingCharClassAsciiLookup))
-                            {
-                                runtextpos = runtextbeg + i + 1;
-                                return true;
-                            }
-                        }
-                        runtextpos = runtextbeg;
-                        return false;
-                    }
-
-                // Nothing special to look for.  Just return true indicating this is a valid position to try to match.
-
-                default:
-                    Debug.Assert(_findFirstCharMode == FindFirstCharMode.NoSearch);
-                    return true;
-            }
-
-            bool NoPrefixOrPrefixMatches() =>
-                _code.BoyerMoorePrefix is not RegexBoyerMoore rbm ||
-                rbm.IsMatch(runtext!, runtextpos, runtextbeg, runtextend);
-        }
+        protected override bool FindFirstChar() =>
+            _code.FindOptimizations.TryFindNextStartingPosition(runtext!, ref runtextpos, runtextbeg, runtextstart, runtextend);
  
          protected override void Go()
          {
@@ -1230,7 +889,7 @@ namespace System.Text.RegularExpressions
  
                              int operand0 = Operand(0);
                              string set = _code.Strings[operand0];
-                            ref int[]? setLookup = ref _code.StringsAsciiLookup[operand0];
+                            ref uint[]? setLookup = ref _code.StringsAsciiLookup[operand0];
  
                              while (c-- > 0)
                              {
@@ -1322,7 +981,7 @@ namespace System.Text.RegularExpressions
                              int len = Math.Min(Operand(1), Forwardchars());
                              int operand0 = Operand(0);
                              string set = _code.Strings[operand0];
-                            ref int[]? setLookup = ref _code.StringsAsciiLookup[operand0];
+                            ref uint[]? setLookup = ref _code.StringsAsciiLookup[operand0];
                              int i;
  
                              for (i = len; i > 0; i--)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs

index 8ed30bb..53b78c5 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs
@@ -39,9 +39,6 @@ namespace System.Text.RegularExpressions
              _code = code;
              _codes = code.Codes;
              _strings = code.Strings;
-            _leadingCharClasses = code.LeadingCharClasses;
-            _boyerMoorePrefix = code.BoyerMoorePrefix;
-            _leadingAnchor = code.LeadingAnchor;
              _trackcount = code.TrackCount;
              _options = options;
              _hasTimeout = hasTimeout;
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs

index c23bb50..e7b0e71 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
@@ -41,6 +41,7 @@
  using System.Collections.Generic;
  using System.Diagnostics;
  using System.Diagnostics.CodeAnalysis;
+using System.Globalization;
  using System.Threading;
  
  namespace System.Text.RegularExpressions
@@ -151,11 +152,49 @@ namespace System.Text.RegularExpressions
              N = n;
          }
  
-        public bool UseOptionR() => (Options & RegexOptions.RightToLeft) != 0;
+        /// <summary>Creates a RegexNode representing a single character.</summary>
+        /// <param name="ch">The character.</param>
+        /// <param name="options">The node's options.</param>
+        /// <param name="culture">The culture to use to perform any required transformations.</param>
+        /// <returns>The created RegexNode.  This might be a RegexNode.One or a RegexNode.Set.</returns>
+        public static RegexNode CreateOneWithCaseConversion(char ch, RegexOptions options, CultureInfo? culture)
+        {
+            // If the options specify case-insensitivity, we try to create a node that fully encapsulates that.
+            if ((options & RegexOptions.IgnoreCase) != 0)
+            {
+                Debug.Assert(culture is not null);
+
+                // If the character is part of a Unicode category that doesn't participate in case conversion,
+                // we can simply strip out the IgnoreCase option and make the node case-sensitive.
+                if (!RegexCharClass.ParticipatesInCaseConversion(ch))
+                {
+                    return new RegexNode(One, options & ~RegexOptions.IgnoreCase, ch);
+                }
+
+                // Create a set for the character, trying to include all case-insensitive equivalent characters.
+                // If it's successful in doing so, resultIsCaseInsensitive will be false and we can strip
+                // out RegexOptions.IgnoreCase as part of creating the set.
+                string stringSet = RegexCharClass.OneToStringClass(ch, culture, out bool resultIsCaseInsensitive);
+                if (!resultIsCaseInsensitive)
+                {
+                    return new RegexNode(Set, options & ~RegexOptions.IgnoreCase, stringSet);
+                }
+
+                // Otherwise, until we can get rid of ToLower usage at match time entirely (https://github.com/dotnet/runtime/issues/61048),
+                // lowercase the character and proceed to create an IgnoreCase One node.
+                ch = culture.TextInfo.ToLower(ch);
+            }
+
+            // Create a One node for the character.
+            return new RegexNode(One, options, ch);
+        }
  
-        public RegexNode ReverseLeft()
+        /// <summary>Reverses all children of a concatenation when in RightToLeft mode.</summary>
+        public RegexNode ReverseConcatenationIfRightToLeft()
          {
-            if (UseOptionR() && Type == Concatenate && ChildCount() > 1)
+            if ((Options & RegexOptions.RightToLeft) != 0 &&
+                Type == Concatenate &&
+                ChildCount() > 1)
              {
                  ((List<RegexNode>)Children!).Reverse();
              }
@@ -203,13 +242,26 @@ namespace System.Text.RegularExpressions
              {
                  RegexNode node = toExamine.Pop();
  
+                // Add all children to be examined
+                int childCount = node.ChildCount();
+                for (int i = 0; i < childCount; i++)
+                {
+                    RegexNode child = node.Child(i);
+                    Debug.Assert(child.Next == node, $"{child.Description()} missing reference to parent {node.Description()}");
+
+                    toExamine.Push(child);
+                }
+
                  // Validate that we never see certain node types.
                  Debug.Assert(Type != Group, "All Group nodes should have been removed.");
  
-                // Validate expected child counts.
-                int childCount = node.ChildCount();
+                // Validate node types and expected child counts.
                  switch (node.Type)
                  {
+                    case Group:
+                        Debug.Fail("All Group nodes should have been removed.");
+                        break;
+
                      case Beginning:
                      case Bol:
                      case Boundary:
@@ -247,25 +299,20 @@ namespace System.Text.RegularExpressions
                      case Prevent:
                      case Require:
                          Debug.Assert(childCount == 1, $"Expected one and only one child for {node.TypeName}, got {childCount}.");
-                        toExamine.Push(node.Child(0));
                          break;
  
                      case Testref:
                      case Testgroup:
                          Debug.Assert(childCount >= 1, $"Expected at least one child for {node.TypeName}, got {childCount}.");
-                        for (int i = 0; i < childCount; i++)
-                        {
-                            toExamine.Push(node.Child(i));
-                        }
                          break;
  
                      case Concatenate:
                      case Alternate:
                          Debug.Assert(childCount >= 2, $"Expected at least two children for {node.TypeName}, got {childCount}.");
-                        for (int i = 0; i < childCount; i++)
-                        {
-                            toExamine.Push(node.Child(i));
-                        }
+                        break;
+
+                    default:
+                        Debug.Fail($"Unexpected node type: {node.Type}");
                          break;
                  }
  
@@ -273,6 +320,10 @@ namespace System.Text.RegularExpressions
                  switch (node.Type)
                  {
                      case Multi:
+                        Debug.Assert(node.Str is not null, "Expect non-null multi string");
+                        Debug.Assert(node.Str.Length >= 2, $"Expected {node.Str} to be at least two characters");
+                        break;
+
                      case Set:
                      case Setloop:
                      case Setloopatomic:
@@ -881,8 +932,10 @@ namespace System.Text.RegularExpressions
  
                  default:
                      ReduceSingleLetterAndNestedAlternations();
-                    RegexNode newThis = ReplaceNodeIfUnnecessary(Nothing);
-                    return newThis != this ? newThis : ExtractCommonPrefixes();
+                    RegexNode node = ReplaceNodeIfUnnecessary(Nothing);
+                    node = ExtractCommonPrefixText(node);
+                    node = ExtractCommonPrefixOneNotoneSet(node);
+                    return node;
              }
  
              // This function performs two optimizations:
@@ -952,7 +1005,6 @@ namespace System.Text.RegularExpressions
                                  break;
                              }
  
-
                              // The last node was a Set or a One, we're a Set or One and our options are the same.
                              // Merge the two nodes.
                              j--;
@@ -981,6 +1033,12 @@ namespace System.Text.RegularExpressions
  
                              prev.Type = Set;
                              prev.Str = prevCharClass.ToStringClass(Options);
+                            if ((prev.Options & RegexOptions.IgnoreCase) != 0 &&
+                                RegexCharClass.MakeCaseSensitiveIfPossible(prev.Str, RegexParser.GetTargetCulture(prev.Options)) is string newSetString)
+                            {
+                                prev.Str = newSetString;
+                                prev.Options &= ~RegexOptions.IgnoreCase;
+                            }
                          }
                          else if (at.Type == Nothing)
                          {
@@ -1001,6 +1059,106 @@ namespace System.Text.RegularExpressions
                  }
              }
  
+            // This function optimizes out prefix nodes from alternation branches that are
+            // the same across multiple contiguous branches.
+            // e.g. \w12|\d34|\d56|\w78|\w90 => \w12|\d(?:34|56)|\w(?:78|90)
+            static RegexNode ExtractCommonPrefixOneNotoneSet(RegexNode alternation)
+            {
+                if (alternation.Type != Alternate)
+                {
+                    return alternation;
+                }
+
+                Debug.Assert(alternation.Children is List<RegexNode> { Count: >= 2 });
+                var children = (List<RegexNode>)alternation.Children;
+
+                // Only process left-to-right prefixes.
+                if ((alternation.Options & RegexOptions.RightToLeft) != 0)
+                {
+                    return alternation;
+                }
+
+                // Only handle the case where each branch is a concatenation
+                foreach (RegexNode child in children)
+                {
+                    if (child.Type != Concatenate || child.ChildCount() < 2)
+                    {
+                        return alternation;
+                    }
+                }
+
+                for (int startingIndex = 0; startingIndex < children.Count - 1; startingIndex++)
+                {
+                    Debug.Assert(children[startingIndex].Children is List<RegexNode> { Count: >= 2 });
+
+                    // Only handle the case where each branch begins with the same One, Notone, or Set (individual or loop).
+                    // Note that while we can do this for individual characters, fixed length loops, and atomic loops, doing
+                    // it for non-atomic variable length loops could change behavior as each branch could otherwise have a
+                    // different number of characters consumed by the loop based on what's after it.
+                    RegexNode required = children[startingIndex].Child(0);
+                    switch (required.Type)
+                    {
+                        case One or Notone or Set:
+                        case Oneloopatomic or Notoneloopatomic or Setloopatomic:
+                        case Oneloop or Notoneloop or Setloop or Onelazy or Notonelazy or Setlazy when required.M == required.N:
+                            break;
+
+                        default:
+                            continue;
+                    }
+
+                    // Only handle the case where each branch begins with the exact same node value
+                    int endingIndex = startingIndex + 1;
+                    for (; endingIndex < children.Count; endingIndex++)
+                    {
+                        RegexNode other = children[endingIndex].Child(0);
+                        if (required.Type != other.Type ||
+                            required.Options != other.Options ||
+                            required.M != other.M ||
+                            required.N != other.N ||
+                            required.Ch != other.Ch ||
+                            required.Str != other.Str)
+                        {
+                            break;
+                        }
+                    }
+
+                    if (endingIndex - startingIndex <= 1)
+                    {
+                        // Nothing to extract from this starting index.
+                        continue;
+                    }
+
+                    // Remove the prefix node from every branch, adding it to a new alternation
+                    var newAlternate = new RegexNode(Alternate, alternation.Options);
+                    for (int i = startingIndex; i < endingIndex; i++)
+                    {
+                        ((List<RegexNode>)children[i].Children!).RemoveAt(0);
+                        newAlternate.AddChild(children[i]);
+                    }
+
+                    // If this alternation is wrapped as atomic, we need to do the same for the new alternation.
+                    if (alternation.Next is RegexNode parent && parent.Type == Atomic)
+                    {
+                        var atomic = new RegexNode(Atomic, alternation.Options);
+                        atomic.AddChild(newAlternate);
+                        newAlternate = atomic;
+                    }
+
+                    // Now create a concatenation of the prefix node with the new alternation for the combined
+                    // branches, and replace all of the branches in this alternation with that new concatenation.
+                    var newConcat = new RegexNode(Concatenate, alternation.Options);
+                    newConcat.AddChild(required);
+                    newConcat.AddChild(newAlternate);
+                    alternation.ReplaceChild(startingIndex, newConcat);
+                    children.RemoveRange(startingIndex + 1, endingIndex - startingIndex - 1);
+                }
+
+                // If we've reduced this alternation to just a single branch, return it.
+                // Otherwise, return the alternation.
+                return alternation.ChildCount() == 1 ? alternation.Child(0) : alternation;
+            }
+
              // Analyzes all the branches of the alternation for text that's identical at the beginning
              // of every branch.  That text is then pulled out into its own one or multi node in a
              // concatenation with the alternation (whose branches are updated to remove that prefix).
@@ -1010,22 +1168,25 @@ namespace System.Text.RegularExpressions
              // by sets that can be merged.  Third, it reduces the amount of duplicated comparisons required
              // if we end up backtracking into subsequent branches.
              // e.g. abc|ade => a(?bc|de)
-            RegexNode ExtractCommonPrefixes()
+            static RegexNode ExtractCommonPrefixText(RegexNode alternation)
              {
+                if (alternation.Type != Alternate)
+                {
+                    return alternation;
+                }
+
+                Debug.Assert(alternation.Children is List<RegexNode> { Count: >= 2 });
+                var children = (List<RegexNode>)alternation.Children;
+
                  // To keep things relatively simple, we currently only handle:
                  // - Left to right (e.g. we don't process alternations in lookbehinds)
                  // - Branches that are one or multi nodes, or that are concatenations beginning with one or multi nodes.
                  // - All branches having the same options.
-                // - Text, rather than also trying to combine identical sets that start each branch.
-
-                Debug.Assert(Children is List<RegexNode>);
-                var children = (List<RegexNode>)Children;
-                Debug.Assert(children.Count >= 2);
  
                  // Only extract left-to-right prefixes.
-                if ((Options & RegexOptions.RightToLeft) != 0)
+                if ((alternation.Options & RegexOptions.RightToLeft) != 0)
                  {
-                    return this;
+                    return alternation;
                  }
  
                  Span<char> scratchChar = stackalloc char[1];
@@ -1036,7 +1197,7 @@ namespace System.Text.RegularExpressions
                      RegexNode? startingNode = children[startingIndex].FindBranchOneOrMultiStart();
                      if (startingNode is null)
                      {
-                        return this;
+                        return alternation;
                      }
  
                      RegexOptions startingNodeOptions = startingNode.Options;
@@ -1159,7 +1320,7 @@ namespace System.Text.RegularExpressions
                          }
                      }
  
-                    if (Next is RegexNode parent && parent.Type == Atomic)
+                    if (alternation.Next is RegexNode parent && parent.Type == Atomic)
                      {
                          var atomic = new RegexNode(Atomic, startingNodeOptions);
                          atomic.AddChild(newAlternate);
@@ -1169,11 +1330,11 @@ namespace System.Text.RegularExpressions
                      var newConcat = new RegexNode(Concatenate, startingNodeOptions);
                      newConcat.AddChild(prefix);
                      newConcat.AddChild(newAlternate);
-                    ReplaceChild(startingIndex, newConcat);
+                    alternation.ReplaceChild(startingIndex, newConcat);
                      children.RemoveRange(startingIndex + 1, endingIndex - startingIndex - 1);
                  }
  
-                return ChildCount() == 1 ? Child(0) : this;
+                return alternation.ChildCount() == 1 ? alternation.Child(0) : alternation;
              }
          }
  
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs

index 1f4a05a..0bda8a2 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs
@@ -81,6 +81,10 @@ namespace System.Text.RegularExpressions
          {
          }
  
+        /// <summary>Gets the culture to use based on the specified options.</summary>
+        internal static CultureInfo GetTargetCulture(RegexOptions options) =>
+            (options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
+
          public static RegexTree Parse(string pattern, RegexOptions options, CultureInfo culture)
          {
              var parser = new RegexParser(pattern, options, culture, stackalloc int[OptionStackDefaultSize]);
@@ -319,7 +323,12 @@ namespace System.Text.RegularExpressions
                          goto ContinueOuterScan;
  
                      case '[':
-                        AddUnitSet(ScanCharClass(UseOptionI(), scanOnly: false)!.ToStringClass(_options));
+                        {
+                            string setString = ScanCharClass(UseOptionI(), scanOnly: false)!.ToStringClass(_options);
+                            _unit = UseOptionI() && RegexCharClass.MakeCaseSensitiveIfPossible(setString, _culture) is string newSetString ?
+                                new RegexNode(RegexNode.Set, _options & ~RegexOptions.IgnoreCase, newSetString) :
+                                new RegexNode(RegexNode.Set, _options, setString);
+                        }
                          break;
  
                      case '(':
@@ -378,14 +387,9 @@ namespace System.Text.RegularExpressions
                          break;
  
                      case '.':
-                        if (UseOptionS())
-                        {
-                            AddUnitSet(RegexCharClass.AnyClass);
-                        }
-                        else
-                        {
-                            AddUnitNotone('\n');
-                        }
+                        _unit = UseOptionS() ?
+                            new RegexNode(RegexNode.Set, _options & ~RegexOptions.IgnoreCase, RegexCharClass.AnyClass) :
+                            new RegexNode(RegexNode.Notone, _options & ~RegexOptions.IgnoreCase, '\n');
                          break;
  
                      case '{':
@@ -734,21 +738,17 @@ namespace System.Text.RegularExpressions
                  {
                      // we aren't in a range, and now there is a subtraction.  Usually this happens
                      // only when a subtraction follows a range, like [a-z-[b]]
+                    MoveRight();
+                    RegexCharClass? rcc = ScanCharClass(caseInsensitive, scanOnly);
                      if (!scanOnly)
                      {
-                        MoveRight(1);
-                        charClass!.AddSubtraction(ScanCharClass(caseInsensitive, scanOnly)!);
+                        charClass!.AddSubtraction(rcc!);
  
                          if (CharsRight() > 0 && RightChar() != ']')
                          {
                              throw MakeException(RegexParseError.ExclusionGroupNotLast, SR.ExclusionGroupNotLast);
                          }
                      }
-                    else
-                    {
-                        MoveRight(1);
-                        ScanCharClass(caseInsensitive, scanOnly);
-                    }
                  }
                  else
                  {
@@ -1173,32 +1173,32 @@ namespace System.Text.RegularExpressions
                  case 'w':
                      MoveRight();
                      return scanOnly ? null :
-                        new RegexNode(RegexNode.Set, _options, UseOptionE() ? RegexCharClass.ECMAWordClass : RegexCharClass.WordClass);
+                        new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMAWordClass : RegexCharClass.WordClass);
  
                  case 'W':
                      MoveRight();
                      return scanOnly ? null :
-                        new RegexNode(RegexNode.Set, _options, UseOptionE() ? RegexCharClass.NotECMAWordClass : RegexCharClass.NotWordClass);
+                        new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMAWordClass : RegexCharClass.NotWordClass);
  
                  case 's':
                      MoveRight();
                      return scanOnly ? null :
-                        new RegexNode(RegexNode.Set, _options, UseOptionE() ? RegexCharClass.ECMASpaceClass : RegexCharClass.SpaceClass);
+                        new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMASpaceClass : RegexCharClass.SpaceClass);
  
                  case 'S':
                      MoveRight();
                      return scanOnly ? null :
-                        new RegexNode(RegexNode.Set, _options, UseOptionE() ? RegexCharClass.NotECMASpaceClass : RegexCharClass.NotSpaceClass);
+                        new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMASpaceClass : RegexCharClass.NotSpaceClass);
  
                  case 'd':
                      MoveRight();
                      return scanOnly ? null :
-                        new RegexNode(RegexNode.Set, _options, UseOptionE() ? RegexCharClass.ECMADigitClass : RegexCharClass.DigitClass);
+                        new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.ECMADigitClass : RegexCharClass.DigitClass);
  
                  case 'D':
                      MoveRight();
                      return scanOnly ? null :
-                        new RegexNode(RegexNode.Set, _options, UseOptionE() ? RegexCharClass.NotECMADigitClass : RegexCharClass.NotDigitClass);
+                        new RegexNode(RegexNode.Set, RemoveIgnoreCaseIfNotEcma(_options), UseOptionE() ? RegexCharClass.NotECMADigitClass : RegexCharClass.NotDigitClass);
  
                  case 'p':
                  case 'P':
@@ -1220,6 +1220,22 @@ namespace System.Text.RegularExpressions
                  default:
                      return ScanBasicBackslash(scanOnly);
              }
+
+            static RegexOptions RemoveIgnoreCaseIfNotEcma(RegexOptions options)
+            {
+                // This function is used for \w, \W, \d, \D, \s, and \S to remove IgnoreCase,
+                // since they already include the notion of casing in their definitions.
+                // However, for compatibility, if ECMAScript is specified, we avoid stripping
+                // out the IgnoreCase.  We should revisit this as part of https://github.com/dotnet/runtime/issues/61048,
+                // as it seems wrong that specifying ECMAScript (which implies non-Unicode) would
+                // then still involve lowercasing potentially Unicode character inputs to match
+                // against these sets.
+                if ((options & RegexOptions.ECMAScript) == 0)
+                {
+                    options &= ~RegexOptions.IgnoreCase;
+                }
+                return options;
+            }
          }
  
          /// <summary>Scans \-style backreferences and character escapes</summary>
@@ -1354,12 +1370,9 @@ namespace System.Text.RegularExpressions
              Textto(backpos);
              ch = ScanCharEscape();
  
-            if (UseOptionI())
-            {
-                ch = _culture.TextInfo.ToLower(ch);
-            }
-
-            return scanOnly ? null : new RegexNode(RegexNode.One, _options, ch);
+            return !scanOnly ?
+                RegexNode.CreateOneWithCaseConversion(ch, _options, _culture) :
+                null;
          }
  
          /*
@@ -1369,7 +1382,7 @@ namespace System.Text.RegularExpressions
          {
              if (CharsRight() == 0)
              {
-                return new RegexNode(RegexNode.One, _options, '$');
+                return RegexNode.CreateOneWithCaseConversion('$', _options, _culture);
              }
  
              char ch = RightChar();
@@ -1469,7 +1482,7 @@ namespace System.Text.RegularExpressions
                  {
                      case '$':
                          MoveRight();
-                        return new RegexNode(RegexNode.One, _options, '$');
+                        return RegexNode.CreateOneWithCaseConversion('$', _options, _culture);
  
                      case '&':
                          capnum = 0;
@@ -1502,7 +1515,7 @@ namespace System.Text.RegularExpressions
              // unrecognized $: literalize
  
              Textto(backpos);
-            return new RegexNode(RegexNode.One, _options, '$');
+            return RegexNode.CreateOneWithCaseConversion('$', _options, _culture);
          }
  
          /// <summary>Throws on unsupported capture references for NonBacktracking in replacement patterns.</summary>
@@ -2149,50 +2162,26 @@ namespace System.Text.RegularExpressions
          /// <summary>Add a string to the last concatenate.</summary>
          private void AddConcatenate(int pos, int cch, bool isReplacement)
          {
-            if (cch == 0)
+            switch (cch)
              {
-                return;
-            }
+                case 0:
+                    return;
  
-            RegexNode node;
-            if (cch > 1)
-            {
-                string str = UseOptionI() && !isReplacement ?
-#if REGEXGENERATOR
-                    StringExtensions.Create
-#else
-                    string.Create
-#endif
-                        (cch, (_pattern, _culture, pos, cch), static (dest, state) =>
-                    {
-                        // We do the ToLower character-by character for consistency with the rest of the implementation.
-                        // With surrogate pairs, doing a ToLower on the entire string is more correct linguistically, but
-                        // Regex doesn't support surrogates, and not doing this character-by-character then causes differences
-                        // from matching where characters are lowercased individually.
-                        ReadOnlySpan<char> src = state._pattern.AsSpan(state.pos, state.cch);
-                        TextInfo ti = state._culture.TextInfo;
-                        for (int i = 0; i < dest.Length; i++)
-                        {
-                            dest[i] = ti.ToLower(src[i]);
-                        }
-                    }) :
-                    _pattern.Substring(pos, cch);
-
-                node = new RegexNode(RegexNode.Multi, _options, str);
-            }
-            else
-            {
-                char ch = _pattern[pos];
+                case 1:
+                    _concatenation!.AddChild(RegexNode.CreateOneWithCaseConversion(_pattern[pos], isReplacement ? _options & ~RegexOptions.IgnoreCase : _options, _culture));
+                    break;
  
-                if (UseOptionI() && !isReplacement)
-                {
-                    ch = _culture.TextInfo.ToLower(ch);
-                }
+                case > 1 when !UseOptionI() || isReplacement:
+                    _concatenation!.AddChild(new RegexNode(RegexNode.Multi, _options & ~RegexOptions.IgnoreCase, _pattern.Substring(pos, cch)));
+                    break;
  
-                node = new RegexNode(RegexNode.One, _options, ch);
+                default:
+                    foreach (char c in _pattern.AsSpan(pos, cch))
+                    {
+                        _concatenation!.AddChild(RegexNode.CreateOneWithCaseConversion(c, _options, _culture));
+                    }
+                    break;
              }
-
-            _concatenation!.AddChild(node);
          }
  
          /// <summary>Push the parser state (in response to an open paren)</summary>
@@ -2243,11 +2232,11 @@ namespace System.Text.RegularExpressions
  
              if (_group!.Type == RegexNode.Testgroup || _group.Type == RegexNode.Testref)
              {
-                _group.AddChild(_concatenation!.ReverseLeft());
+                _group.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
              }
              else
              {
-                _alternation!.AddChild(_concatenation!.ReverseLeft());
+                _alternation!.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
              }
  
              _concatenation = new RegexNode(RegexNode.Concatenate, _options);
@@ -2273,29 +2262,7 @@ namespace System.Text.RegularExpressions
          private RegexNode? Unit() => _unit;
  
          /// <summary>Sets the current unit to a single char node</summary>
-        private void AddUnitOne(char ch)
-        {
-            if (UseOptionI())
-            {
-                ch = _culture.TextInfo.ToLower(ch);
-            }
-
-            _unit = new RegexNode(RegexNode.One, _options, ch);
-        }
-
-        /// <summary>Sets the current unit to a single inverse-char node</summary>
-        private void AddUnitNotone(char ch)
-        {
-            if (UseOptionI())
-            {
-                ch = _culture.TextInfo.ToLower(ch);
-            }
-
-            _unit = new RegexNode(RegexNode.Notone, _options, ch);
-        }
-
-        /// <summary>Sets the current unit to a single set node</summary>
-        private void AddUnitSet(string cc) => _unit = new RegexNode(RegexNode.Set, _options, cc);
+        private void AddUnitOne(char ch) => _unit = RegexNode.CreateOneWithCaseConversion(ch, _options, _culture);
  
          /// <summary>Sets the current unit to a subtree</summary>
          private void AddUnitNode(RegexNode node) => _unit = node;
@@ -2308,7 +2275,7 @@ namespace System.Text.RegularExpressions
          {
              if (_group!.Type == RegexNode.Testgroup || _group.Type == RegexNode.Testref)
              {
-                _group.AddChild(_concatenation!.ReverseLeft());
+                _group.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
  
                  if (_group.Type == RegexNode.Testref && _group.ChildCount() > 2 || _group.ChildCount() > 3)
                  {
@@ -2317,7 +2284,7 @@ namespace System.Text.RegularExpressions
              }
              else
              {
-                _alternation!.AddChild(_concatenation!.ReverseLeft());
+                _alternation!.AddChild(_concatenation!.ReverseConcatenationIfRightToLeft());
                  _group.AddChild(_alternation);
              }
  
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs

index 96a709b..22a2abb 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs
@@ -5,6 +5,8 @@ using System.Collections.Generic;
  using System.Diagnostics;
  using System.Diagnostics.CodeAnalysis;
  using System.Globalization;
+using System.Runtime.CompilerServices;
+using System.Threading;
  
  namespace System.Text.RegularExpressions
  {
@@ -40,256 +42,518 @@ namespace System.Text.RegularExpressions
              _skipAllChildren = false;
          }
  
-        /// <summary>Computes the leading substring in <paramref name="tree"/>.</summary>
-        /// <remarks>It's quite trivial and gives up easily, in which case an empty string is returned.</remarks>
-        public static (string Prefix, bool CaseInsensitive) ComputeLeadingSubstring(RegexTree tree)
+        /// <summary>Computes the leading substring in <paramref name="tree"/>; may be empty.</summary>
+        public static string FindCaseSensitivePrefix(RegexTree tree)
          {
-            RegexNode curNode = tree.Root;
-            RegexNode? concatNode = null;
-            int nextChild = 0;
+            var vsb = new ValueStringBuilder(stackalloc char[64]);
+            Process(tree.Root, ref vsb);
+            return vsb.ToString();
  
-            while (true)
+            // Processes the node, adding any prefix text to the builder.
+            // Returns whether processing should continue with subsequent nodes.
+            static bool Process(RegexNode node, ref ValueStringBuilder vsb)
              {
-                switch (curNode.Type)
+                if (!StackHelper.TryEnsureSufficientExecutionStack())
                  {
+                    // If we're too deep on the stack, just give up finding any more prefix.
+                    return false;
+                }
+
+                // We don't bother to handle reversed input, so process at most one node
+                // when handling RightToLeft.
+                bool rtl = (node.Options & RegexOptions.RightToLeft) != 0;
+
+                switch (node.Type)
+                {
+                    // Concatenation
                      case RegexNode.Concatenate:
-                        if (curNode.ChildCount() > 0)
                          {
-                            concatNode = curNode;
-                            nextChild = 0;
+                            int childCount = node.ChildCount();
+                            for (int i = 0; i < childCount; i++)
+                            {
+                                if (!Process(node.Child(i), ref vsb))
+                                {
+                                    return false;
+                                }
+                            }
+                            return !rtl;
                          }
-                        break;
  
-                    case RegexNode.Atomic:
-                    case RegexNode.Capture:
-                        curNode = curNode.Child(0);
-                        concatNode = null;
-                        continue;
+                    // Alternation: find a string that's a shared prefix of all branches
+                    case RegexNode.Alternate:
+                        {
+                            int childCount = node.ChildCount();
  
-                    case RegexNode.Oneloop:
-                    case RegexNode.Oneloopatomic:
-                    case RegexNode.Onelazy:
+                            // Store the initial branch into the target builder
+                            int initialLength = vsb.Length;
+                            bool keepExploring = Process(node.Child(0), ref vsb);
+                            int addedLength = vsb.Length - initialLength;
  
-                        // In release, cutoff at a length to which we can still reasonably construct a string and Boyer-Moore search.
-                        // In debug, use a smaller cutoff to exercise the cutoff path in tests
-                        const int Cutoff =
-#if DEBUG
-                            50;
-#else
-                            RegexBoyerMoore.MaxLimit;
-#endif
+                            // Then explore the rest of the branches, finding the length
+                            // a prefix they all share in common with the initial branch.
+                            if (addedLength != 0)
+                            {
+                                var alternateSb = new ValueStringBuilder(64);
  
-                        if (curNode.M > 0 && curNode.M < Cutoff)
-                        {
-                            return (new string(curNode.Ch, curNode.M), (curNode.Options & RegexOptions.IgnoreCase) != 0);
-                        }
+                                // Process each branch.  If we reach a point where we've proven there's
+                                // no overlap, we can bail early.
+                                for (int i = 1; i < childCount && addedLength != 0; i++)
+                                {
+                                    alternateSb.Length = 0;
+
+                                    // Process the branch.  We want to keep exploring after this alternation,
+                                    // but we can't if either this branch doesn't allow for it or if the prefix
+                                    // supplied by this branch doesn't entirely match all the previous ones.
+                                    keepExploring &= Process(node.Child(i), ref alternateSb);
+                                    keepExploring &= alternateSb.Length == addedLength;
+
+                                    addedLength = Math.Min(addedLength, alternateSb.Length);
+                                    for (int j = 0; j < addedLength; j++)
+                                    {
+                                        if (vsb[initialLength + j] != alternateSb[j])
+                                        {
+                                            addedLength = j;
+                                            keepExploring = false;
+                                            break;
+                                        }
+                                    }
+                                }
  
-                        return (string.Empty, false);
+                                alternateSb.Dispose();
  
-                    case RegexNode.One:
-                        return (curNode.Ch.ToString(), (curNode.Options & RegexOptions.IgnoreCase) != 0);
+                                // Then cull back on what was added based on the other branches.
+                                vsb.Length = initialLength + addedLength;
+                            }
  
-                    case RegexNode.Multi:
-                        return (curNode.Str!, (curNode.Options & RegexOptions.IgnoreCase) != 0);
+                            return !rtl && keepExploring;
+                        }
  
+                    // One character
+                    case RegexNode.One when (node.Options & RegexOptions.IgnoreCase) == 0:
+                        vsb.Append(node.Ch);
+                        return !rtl;
+
+                    // Multiple characters
+                    case RegexNode.Multi when (node.Options & RegexOptions.IgnoreCase) == 0:
+                        vsb.Append(node.Str);
+                        return !rtl;
+
+                    // Loop of one character
+                    case RegexNode.Oneloop or RegexNode.Oneloopatomic or RegexNode.Onelazy when node.M > 0 && (node.Options & RegexOptions.IgnoreCase) == 0:
+                        const int SingleCharIterationLimit = 32; // arbitrary cut-off to avoid creating super long strings unnecessarily
+                        int count = Math.Min(node.M, SingleCharIterationLimit);
+                        vsb.Append(node.Ch, count);
+                        return count == node.N && !rtl;
+
+                    // Loop of a node
+                    case RegexNode.Loop or RegexNode.Lazyloop when node.M > 0:
+                        {
+                            const int NodeIterationLimit = 4; // arbitrary cut-off to avoid creating super long strings unnecessarily
+                            int limit = Math.Min(node.M, NodeIterationLimit);
+                            for (int i = 0; i < limit; i++)
+                            {
+                                if (!Process(node.Child(0), ref vsb))
+                                {
+                                    return false;
+                                }
+                            }
+                            return limit == node.N && !rtl;
+                        }
+
+                    // Grouping nodes for which we only care about their single child
+                    case RegexNode.Atomic:
+                    case RegexNode.Capture:
+                        return Process(node.Child(0), ref vsb);
+
+                    // Zero-width anchors and assertions
                      case RegexNode.Bol:
                      case RegexNode.Eol:
                      case RegexNode.Boundary:
                      case RegexNode.ECMABoundary:
+                    case RegexNode.NonBoundary:
+                    case RegexNode.NonECMABoundary:
                      case RegexNode.Beginning:
                      case RegexNode.Start:
                      case RegexNode.EndZ:
                      case RegexNode.End:
                      case RegexNode.Empty:
+                    case RegexNode.UpdateBumpalong:
                      case RegexNode.Require:
                      case RegexNode.Prevent:
-                        break;
+                        return true;
  
+                    // Give up for anything else
                      default:
-                        return (string.Empty, false);
+                        return false;
                  }
-
-                if (concatNode == null || nextChild >= concatNode.ChildCount())
-                {
-                    return (string.Empty, false);
-                }
-
-                curNode = concatNode.Child(nextChild++);
              }
          }
  
-        /// <summary>Computes a character class for the first character in <paramref name="tree"/>.</summary>
-        /// <remarks>true if a character class could be computed; otherwise, false.</remarks>
-        public static (string CharClass, bool CaseInsensitive)[]? ComputeFirstCharClass(RegexTree tree)
+        /// <summary>Finds sets at fixed-offsets from the beginning of the pattern/</summary>
+        /// <param name="tree">The RegexNode tree.</param>
+        /// <param name="culture">The culture to use for any case conversions.</param>
+        /// <param name="thorough">true to spend more time finding sets (e.g. through alternations); false to do a faster analysis that's potentially more incomplete.</param>
+        /// <returns>The array of found sets, or null if there aren't any.</returns>
+        public static List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>? FindFixedDistanceSets(
+            RegexTree tree, CultureInfo culture, bool thorough)
          {
-            var s = new RegexPrefixAnalyzer(stackalloc int[StackBufferSize]);
-            RegexFC? fc = s.RegexFCFromRegexTree(tree);
-            s.Dispose();
+            const int MaxLoopExpansion = 20; // arbitrary cut-off to avoid loops adding significant overhead to processing
+            const int MaxFixedResults = 50; // arbitrary cut-off to avoid generating lots of sets unnecessarily
  
-            if (fc == null || fc._nullable)
+            // Find all fixed-distance sets.
+            var results = new List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>();
+            int distance = 0;
+            TryFindFixedSets(tree.Root, results, ref distance, culture, thorough);
+#if DEBUG
+            foreach ((char[]? Chars, string Set, int Distance, bool CaseInsensitive) result in results)
              {
-                return null;
+                Debug.Assert(result.Distance <= tree.MinRequiredLength, $"Min: {tree.MinRequiredLength}, Distance: {result.Distance}, Tree: {tree}");
              }
+#endif
  
-            if (fc.CaseInsensitive)
+            // Remove any sets that match everything; they're not helpful.  (This check exists primarily to weed
+            // out use of . in Singleline mode.)
+            bool hasAny = false;
+            for (int i = 0; i < results.Count; i++)
              {
-                fc.AddLowercase(((tree.Options & RegexOptions.CultureInvariant) != 0) ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture);
+                if (results[i].Set == RegexCharClass.AnyClass)
+                {
+                    hasAny = true;
+                    break;
+                }
              }
-
-            return new[] { (fc.GetFirstChars(), fc.CaseInsensitive) };
-        }
-
-        /// <summary>Computes character classes for the first <paramref name="maxChars"/> characters in <paramref name="tree"/>.</summary>
-        /// <remarks>
-        /// For example, given "hello|world" and a <paramref name="maxChars"/> of 3, this will compute the sets [hw], [eo], and [lr].
-        /// As with some of the other computations, it's quite trivial and gives up easily; for example, we could in
-        /// theory handle nodes in a concatenation after an alternation, but we look only at the branches of the
-        /// alternation itself.  As this computation is intended primarily to handle global alternations, it's currently
-        /// a reasonable tradeoff between simplicity, performance, and the fullness of potential optimizations.
-        /// </remarks>
-        public static (string CharClass, bool CaseInsensitive)[]? ComputeMultipleCharClasses(RegexTree tree, int maxChars)
-        {
-            Debug.Assert(maxChars > 1);
-
-            if ((tree.Options & RegexOptions.RightToLeft) != 0)
+            if (hasAny)
              {
-                // We don't bother for RightToLeft.  It's rare and adds non-trivial complication.
-                return null;
+                results.RemoveAll(s => s.Set == RegexCharClass.AnyClass);
              }
  
-            // The known minimum required length will have already factored in knowledge about alternations.
-            // If the known min length is less than the maximum number of chars requested, we can
-            // cut this short.  If it's zero, there's nothing to be found.  If it's one, we won't do
-            // any better than ComputeFirstCharClass (and likely worse).  Otherwise, don't bother looking for more
-            // the min of the min length and the max requested chars.
-            maxChars = Math.Min(tree.MinRequiredLength, maxChars);
-            if (maxChars <= 1)
+            // If we don't have any results, try harder to compute one for the starting character.
+            // This is a more involved computation that can find things the fixed-distance investigation
+            // doesn't.
+            if (results.Count == 0)
              {
-                return null;
+                (string CharClass, bool CaseInsensitive)? first = FindFirstCharClass(tree, culture);
+                if (first is not null)
+                {
+                    results.Add((null, first.Value.CharClass, 0, first.Value.CaseInsensitive));
+                }
+
+                if (results.Count == 0)
+                {
+                    return null;
+                }
              }
  
-            // Find an alternation on the path to the first node.  If we can't, bail.
-            RegexNode node = tree.Root;
-            while (node.Type != RegexNode.Alternate)
+            // For every entry, see if we can mark any that are case-insensitive as actually being case-sensitive
+            // based on not participating in case conversion.  And then for ones that are case-sensitive, try to
+            // get the chars that make up the set, if there are few enough.
+            Span<char> scratch = stackalloc char[5]; // max optimized by IndexOfAny today
+            for (int i = 0; i < results.Count; i++)
              {
-                switch (node.Type)
+                (char[]? Chars, string Set, int Distance, bool CaseInsensitive) result = results[i];
+                if (!RegexCharClass.IsNegated(result.Set))
                  {
-                    case RegexNode.Atomic:
-                    case RegexNode.Capture:
-                    case RegexNode.Concatenate:
-                        node = node.Child(0);
-                        break;
+                    int count = RegexCharClass.GetSetChars(result.Set, scratch);
+                    if (count != 0)
+                    {
+                        if (result.CaseInsensitive && !RegexCharClass.ParticipatesInCaseConversion(scratch.Slice(0, count)))
+                        {
+                            result.CaseInsensitive = false;
+                        }
  
-                    default:
-                        return null;
+                        if (!result.CaseInsensitive)
+                        {
+                            result.Chars = scratch.Slice(0, count).ToArray();
+                        }
+
+                        results[i] = result;
+                    }
                  }
              }
-            Debug.Assert(node.Type == RegexNode.Alternate);
  
-            // Create RegexCharClasses to store the built-up sets.  We may end up returning fewer
-            // than this if we find we can't easily fill this number of sets with 100% confidence.
-            var classes = new RegexCharClass?[maxChars];
-            bool caseInsensitive = false;
+            // Finally, try to move the "best" results to be earlier.  "best" here are ones we're able to search
+            // for the fastest and that have the best chance of matching as few false positives as possible.
+            results.Sort((s1, s2) =>
+            {
+                if (s1.CaseInsensitive != s2.CaseInsensitive)
+                {
+                    // If their case-sensitivities don't match, whichever is case-sensitive comes first / is considered lower.
+                    return s1.CaseInsensitive ? 1 : -1;
+                }
+
+                if (s1.Chars is not null && s2.Chars is not null)
+                {
+                    // Then of the ones that are the same length, prefer those with less frequent values.  The frequency is
+                    // only an approximation, used as a tie-breaker when we'd otherwise effectively be picking randomly.  True
+                    // frequencies will vary widely based on the actual data being searched, the language of the data, etc.
+                    int c = SumFrequencies(s1.Chars).CompareTo(SumFrequencies(s2.Chars));
+                    if (c != 0)
+                    {
+                        return c;
+                    }
  
-            int branches = node.ChildCount();
-            Debug.Assert(branches >= 2);
-            for (int branchNum = 0; branchNum < branches; branchNum++)
+                    [MethodImpl(MethodImplOptions.AggressiveInlining)]
+                    static float SumFrequencies(char[] chars)
+                    {
+                        float sum = 0;
+                        foreach (char c in chars)
+                        {
+                            // Lookup each character in the table.  For values > 255, this will end up truncating
+                            // and thus we'll get skew in the data.  It's already a gross approximation, though,
+                            // and it is primarily meant for disambiguation of ASCII letters.
+                            sum += s_frequency[(byte)c];
+                        }
+                        return sum;
+                    }
+                }
+                else if (s1.Chars is not null)
+                {
+                    // If s1 has chars and s2 doesn't, then s1 has fewer chars.
+                    return -1;
+                }
+                else if (s2.Chars is not null)
+                {
+                    // If s2 has chars and s1 doesn't, then s2 has fewer chars.
+                    return 1;
+                }
+
+                return s1.Distance.CompareTo(s2.Distance);
+            });
+
+            return results;
+
+            // Starting from the specified root node, populates results with any characters at a fixed distance
+            // from the node's starting position.  The function returns true if the entire contents of the node
+            // is at a fixed distance, in which case distance will have been updated to include the full length
+            // of the node.  If it returns false, the node isn't entirely fixed, in which case subsequent nodes
+            // shouldn't be examined and distance should no longer be trusted.  However, regardless of whether it
+            // returns true or false, it may have populated results, and all populated results are valid.
+            static bool TryFindFixedSets(RegexNode node, List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)> results, ref int distance, CultureInfo culture, bool thorough)
              {
-                RegexNode alternateBranch = node.Child(branchNum);
-                caseInsensitive |= (alternateBranch.Options & RegexOptions.IgnoreCase) != 0;
+                if (!StackHelper.TryEnsureSufficientExecutionStack())
+                {
+                    return false;
+                }
  
-                switch (alternateBranch.Type)
+                if ((node.Options & RegexOptions.RightToLeft) != 0)
                  {
+                    return false;
+                }
+
+                bool caseInsensitive = (node.Options & RegexOptions.IgnoreCase) != 0;
+
+                switch (node.Type)
+                {
+                    case RegexNode.One:
+                        if (results.Count < MaxFixedResults)
+                        {
+                            string setString = RegexCharClass.OneToStringClass(node.Ch, caseInsensitive ? culture : null, out bool resultIsCaseInsensitive);
+                            results.Add((null, setString, distance++, resultIsCaseInsensitive));
+                            return true;
+                        }
+                        return false;
+
+                    case RegexNode.Onelazy or RegexNode.Oneloop or RegexNode.Oneloopatomic when node.M > 0:
+                        {
+                            string setString = RegexCharClass.OneToStringClass(node.Ch, caseInsensitive ? culture : null, out bool resultIsCaseInsensitive);
+                            int minIterations = Math.Min(node.M, MaxLoopExpansion);
+                            int i = 0;
+                            for (; i < minIterations && results.Count < MaxFixedResults; i++)
+                            {
+                                results.Add((null, setString, distance++, resultIsCaseInsensitive));
+                            }
+                            return i == node.M && i == node.N;
+                        }
+
                      case RegexNode.Multi:
-                        maxChars = Math.Min(maxChars, alternateBranch.Str!.Length);
-                        for (int i = 0; i < maxChars; i++)
                          {
-                            (classes[i] ??= new RegexCharClass()).AddChar(alternateBranch.Str[i]);
+                            string s = node.Str!;
+                            int i = 0;
+                            for (; i < s.Length && results.Count < MaxFixedResults; i++)
+                            {
+                                string setString = RegexCharClass.OneToStringClass(s[i], caseInsensitive ? culture : null, out bool resultIsCaseInsensitive);
+                                results.Add((null, setString, distance++, resultIsCaseInsensitive));
+                            }
+                            return i == s.Length;
                          }
-                        continue;
+
+                    case RegexNode.Set:
+                        if (results.Count < MaxFixedResults)
+                        {
+                            results.Add((null, node.Str!, distance++, caseInsensitive));
+                            return true;
+                        }
+                        return false;
+
+                    case RegexNode.Setlazy or RegexNode.Setloop or RegexNode.Setloopatomic when node.M > 0:
+                        {
+                            int minIterations = Math.Min(node.M, MaxLoopExpansion);
+                            int i = 0;
+                            for (; i < minIterations && results.Count < MaxFixedResults; i++)
+                            {
+                                results.Add((null, node.Str!, distance++, caseInsensitive));
+                            }
+                            return i == node.M && i == node.N;
+                        }
+
+                    case RegexNode.Notone:
+                        // We could create a set out of Notone, but it will be of little value in helping to improve
+                        // the speed of finding the first place to match, as almost every character will match it.
+                        distance++;
+                        return true;
+
+                    case RegexNode.Notonelazy or RegexNode.Notoneloop or RegexNode.Notoneloopatomic when node.M == node.N:
+                        distance += node.M;
+                        return true;
+
+                    case RegexNode.Beginning:
+                    case RegexNode.Bol:
+                    case RegexNode.Boundary:
+                    case RegexNode.ECMABoundary:
+                    case RegexNode.Empty:
+                    case RegexNode.End:
+                    case RegexNode.EndZ:
+                    case RegexNode.Eol:
+                    case RegexNode.NonBoundary:
+                    case RegexNode.NonECMABoundary:
+                    case RegexNode.UpdateBumpalong:
+                    case RegexNode.Start:
+                    case RegexNode.Prevent:
+                    case RegexNode.Require:
+                        // Zero-width anchors and assertions.  In theory for Prevent and Require we could also investigate
+                        // them and use the learned knowledge to impact the generated sets, at least for lookaheads.
+                        // For now, we don't bother.
+                        return true;
+
+                    case RegexNode.Atomic:
+                    case RegexNode.Group:
+                    case RegexNode.Capture:
+                        return TryFindFixedSets(node.Child(0), results, ref distance, culture, thorough);
+
+                    case RegexNode.Lazyloop or RegexNode.Loop when node.M > 0:
+                        // This effectively only iterates the loop once.  If deemed valuable,
+                        // it could be updated in the future to duplicate the found results
+                        // (updated to incorporate distance from previous iterations) and
+                        // summed distance for all node.M iterations.  If node.M == node.N,
+                        // this would then also allow continued evaluation of the rest of the
+                        // expression after the loop.
+                        TryFindFixedSets(node.Child(0), results, ref distance, culture, thorough);
+                        return false;
  
                      case RegexNode.Concatenate:
                          {
-                            int classPos = 0;
-                            int concatChildren = alternateBranch.ChildCount();
-                            for (int i = 0; i < concatChildren && classPos < classes.Length; i++)
+                            int childCount = node.ChildCount();
+                            for (int i = 0; i < childCount; i++)
                              {
-                                RegexNode concatChild = alternateBranch.Child(i);
-                                caseInsensitive |= (concatChild.Options & RegexOptions.IgnoreCase) != 0;
+                                if (!TryFindFixedSets(node.Child(i), results, ref distance, culture, thorough))
+                                {
+                                    return false;
+                                }
+                            }
+                            return true;
+                        }
  
-                                switch (concatChild.Type)
+                    case RegexNode.Alternate when thorough:
+                        {
+                            int childCount = node.ChildCount();
+                            bool allSameSize = true;
+                            int? sameDistance = null;
+                            var combined = new Dictionary<int, (RegexCharClass Set, bool CaseInsensitive, int Count)>();
+
+                            var localResults = new List<(char[]? Chars, string Set, int Distance, bool CaseInsensitive)>();
+                            for (int i = 0; i < childCount; i++)
+                            {
+                                localResults.Clear();
+                                int localDistance = 0;
+                                allSameSize &= TryFindFixedSets(node.Child(i), localResults, ref localDistance, culture, thorough);
+
+                                if (localResults.Count == 0)
                                  {
-                                    case RegexNode.One:
-                                        (classes[classPos++] ??= new RegexCharClass()).AddChar(concatChild.Ch);
-                                        break;
-                                    case RegexNode.Set:
-                                        if (!(classes[classPos++] ??= new RegexCharClass()).TryAddCharClass(RegexCharClass.Parse(concatChild.Str!)))
-                                        {
-                                            // If the classes can't be merged, give up.
-                                            return null;
-                                        }
-                                        break;
-                                    case RegexNode.Multi:
-                                        for (int c = 0; c < concatChild.Str!.Length && classPos < classes.Length; c++)
+                                    return false;
+                                }
+
+                                if (allSameSize)
+                                {
+                                    if (sameDistance is null)
+                                    {
+                                        sameDistance = localDistance;
+                                    }
+                                    else if (sameDistance.Value != localDistance)
+                                    {
+                                        allSameSize = false;
+                                    }
+                                }
+
+                                foreach ((char[]? Chars, string Set, int Distance, bool CaseInsensitive) fixedSet in localResults)
+                                {
+                                    if (combined.TryGetValue(fixedSet.Distance, out (RegexCharClass Set, bool CaseInsensitive, int Count) value))
+                                    {
+                                        if (fixedSet.CaseInsensitive == value.CaseInsensitive &&
+                                            value.Set.TryAddCharClass(RegexCharClass.Parse(fixedSet.Set)))
                                          {
-                                            (classes[classPos++] ??= new RegexCharClass()).AddChar(concatChild.Str[c]);
+                                            value.Count++;
+                                            combined[fixedSet.Distance] = value;
                                          }
-                                        break;
+                                    }
+                                    else
+                                    {
+                                        combined[fixedSet.Distance] = (RegexCharClass.Parse(fixedSet.Set), fixedSet.CaseInsensitive, 1);
+                                    }
+                                }
+                            }
+
+                            foreach (KeyValuePair<int, (RegexCharClass Set, bool CaseInsensitive, int Count)> pair in combined)
+                            {
+                                if (results.Count >= MaxFixedResults)
+                                {
+                                    allSameSize = false;
+                                    break;
+                                }
  
-                                    default: // nothing else supported
-                                        i = concatChildren; // stop looking at additional nodes
-                                        break;
+                                if (pair.Value.Count == childCount)
+                                {
+                                    results.Add((null, pair.Value.Set.ToStringClass(), pair.Key + distance, pair.Value.CaseInsensitive));
                                  }
                              }
  
-                            maxChars = Math.Min(maxChars, classPos);
+                            if (allSameSize)
+                            {
+                                Debug.Assert(sameDistance.HasValue);
+                                distance += sameDistance.Value;
+                                return true;
+                            }
+
+                            return false;
                          }
-                        continue;
  
                      default:
-                        // Any other node type as a branch in the alternation and we give up.  Note that we don't special-case One/Notone/Set
-                        // because that would mean the whole branch was a single char, in which case this computation provides
-                        // zero benefit over the ComputeFirstCharClass computation.
-                        return null;
+                        return false;
                  }
              }
+        }
  
-            // We've now examined all of the alternate branches and were able to successfully process them.
-            // Determine how many we can actually return.
-            for (int i = 0; i < maxChars; i++)
-            {
-                if (classes[i] is null)
-                {
-                    maxChars = i;
-                    break;
-                }
-            }
+        // Computes a character class for the first character in tree.  This uses a more robust algorithm
+        // than is used by TryFindFixedLiterals and thus can find starting sets it couldn't.  For example,
+        // fixed literals won't find the starting set for a*b, as the a isn't guaranteed and the b is at a
+        // variable position, but this will find [ab] as it's instead looking for anything that under any
+        // circumstance could possibly start a match.
+        public static (string CharClass, bool CaseInsensitive)? FindFirstCharClass(RegexTree tree, CultureInfo culture)
+        {
+            var s = new RegexPrefixAnalyzer(stackalloc int[StackBufferSize]);
+            RegexFC? fc = s.RegexFCFromRegexTree(tree);
+            s.Dispose();
  
-            // Make sure we got something.
-            if (maxChars == 0)
+            if (fc == null || fc._nullable)
              {
                  return null;
              }
  
-            // Create and return the RegexPrefix objects.
-            var prefixes = new (string CharClass, bool CaseInsensitive)[maxChars];
-
-            CultureInfo? ci = null;
-            if (caseInsensitive)
-            {
-                ci = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
-            }
-
-            for (int i = 0; i < prefixes.Length; i++)
+            if (fc.CaseInsensitive)
              {
-                if (caseInsensitive)
-                {
-                    classes[i]!.AddLowercase(ci!);
-                }
-                prefixes[i] = (classes[i]!.ToStringClass(), caseInsensitive);
+                fc.AddLowercase(culture);
              }
  
-            return prefixes;
+            return (fc.GetFirstChars(), fc.CaseInsensitive);
          }
  
          /// <summary>Takes a RegexTree and computes the leading anchor that it encounters.</summary>
@@ -619,6 +883,84 @@ namespace System.Text.RegularExpressions
                      throw new ArgumentException(SR.Format(SR.UnexpectedOpcode, NodeType.ToString(CultureInfo.CurrentCulture)));
              }
          }
+
+        /// <summary>Percent occurrences in source text (100 * char count / total count).</summary>
+        private static readonly float[] s_frequency = new float[]
+        {
+            0.000f /* '\x00' */, 0.000f /* '\x01' */, 0.000f /* '\x02' */, 0.000f /* '\x03' */, 0.000f /* '\x04' */, 0.000f /* '\x05' */, 0.000f /* '\x06' */, 0.000f /* '\x07' */,
+            0.000f /* '\x08' */, 0.001f /* '\x09' */, 0.000f /* '\x0A' */, 0.000f /* '\x0B' */, 0.000f /* '\x0C' */, 0.000f /* '\x0D' */, 0.000f /* '\x0E' */, 0.000f /* '\x0F' */,
+            0.000f /* '\x10' */, 0.000f /* '\x11' */, 0.000f /* '\x12' */, 0.000f /* '\x13' */, 0.003f /* '\x14' */, 0.000f /* '\x15' */, 0.000f /* '\x16' */, 0.000f /* '\x17' */,
+            0.000f /* '\x18' */, 0.004f /* '\x19' */, 0.000f /* '\x1A' */, 0.000f /* '\x1B' */, 0.006f /* '\x1C' */, 0.006f /* '\x1D' */, 0.000f /* '\x1E' */, 0.000f /* '\x1F' */,
+            8.952f /* '    ' */, 0.065f /* '   !' */, 0.420f /* '   "' */, 0.010f /* '   #' */, 0.011f /* '   $' */, 0.005f /* '   %' */, 0.070f /* '   &' */, 0.050f /* '   '' */,
+            3.911f /* '   (' */, 3.910f /* '   )' */, 0.356f /* '   *' */, 2.775f /* '   +' */, 1.411f /* '   ,' */, 0.173f /* '   -' */, 2.054f /* '   .' */, 0.677f /* '   /' */,
+            1.199f /* '   0' */, 0.870f /* '   1' */, 0.729f /* '   2' */, 0.491f /* '   3' */, 0.335f /* '   4' */, 0.269f /* '   5' */, 0.435f /* '   6' */, 0.240f /* '   7' */,
+            0.234f /* '   8' */, 0.196f /* '   9' */, 0.144f /* '   :' */, 0.983f /* '   ;' */, 0.357f /* '   <' */, 0.661f /* '   =' */, 0.371f /* '   >' */, 0.088f /* '   ?' */,
+            0.007f /* '   @' */, 0.763f /* '   A' */, 0.229f /* '   B' */, 0.551f /* '   C' */, 0.306f /* '   D' */, 0.449f /* '   E' */, 0.337f /* '   F' */, 0.162f /* '   G' */,
+            0.131f /* '   H' */, 0.489f /* '   I' */, 0.031f /* '   J' */, 0.035f /* '   K' */, 0.301f /* '   L' */, 0.205f /* '   M' */, 0.253f /* '   N' */, 0.228f /* '   O' */,
+            0.288f /* '   P' */, 0.034f /* '   Q' */, 0.380f /* '   R' */, 0.730f /* '   S' */, 0.675f /* '   T' */, 0.265f /* '   U' */, 0.309f /* '   V' */, 0.137f /* '   W' */,
+            0.084f /* '   X' */, 0.023f /* '   Y' */, 0.023f /* '   Z' */, 0.591f /* '   [' */, 0.085f /* '   \' */, 0.590f /* '   ]' */, 0.013f /* '   ^' */, 0.797f /* '   _' */,
+            0.001f /* '   `' */, 4.596f /* '   a' */, 1.296f /* '   b' */, 2.081f /* '   c' */, 2.005f /* '   d' */, 6.903f /* '   e' */, 1.494f /* '   f' */, 1.019f /* '   g' */,
+            1.024f /* '   h' */, 3.750f /* '   i' */, 0.286f /* '   j' */, 0.439f /* '   k' */, 2.913f /* '   l' */, 1.459f /* '   m' */, 3.908f /* '   n' */, 3.230f /* '   o' */,
+            1.444f /* '   p' */, 0.231f /* '   q' */, 4.220f /* '   r' */, 3.924f /* '   s' */, 5.312f /* '   t' */, 2.112f /* '   u' */, 0.737f /* '   v' */, 0.573f /* '   w' */,
+            0.992f /* '   x' */, 1.067f /* '   y' */, 0.181f /* '   z' */, 0.391f /* '   {' */, 0.056f /* '   |' */, 0.391f /* '   }' */, 0.002f /* '   ~' */, 0.000f /* '\x7F' */,
+            0.000f /* '\x80' */, 0.000f /* '\x81' */, 0.000f /* '\x82' */, 0.000f /* '\x83' */, 0.000f /* '\x84' */, 0.000f /* '\x85' */, 0.000f /* '\x86' */, 0.000f /* '\x87' */,
+            0.000f /* '\x88' */, 0.000f /* '\x89' */, 0.000f /* '\x8A' */, 0.000f /* '\x8B' */, 0.000f /* '\x8C' */, 0.000f /* '\x8D' */, 0.000f /* '\x8E' */, 0.000f /* '\x8F' */,
+            0.000f /* '\x90' */, 0.000f /* '\x91' */, 0.000f /* '\x92' */, 0.000f /* '\x93' */, 0.000f /* '\x94' */, 0.000f /* '\x95' */, 0.000f /* '\x96' */, 0.000f /* '\x97' */,
+            0.000f /* '\x98' */, 0.000f /* '\x99' */, 0.000f /* '\x9A' */, 0.000f /* '\x9B' */, 0.000f /* '\x9C' */, 0.000f /* '\x9D' */, 0.000f /* '\x9E' */, 0.000f /* '\x9F' */,
+            0.000f /* '\xA0' */, 0.000f /* '\xA1' */, 0.000f /* '\xA2' */, 0.000f /* '\xA3' */, 0.000f /* '\xA4' */, 0.000f /* '\xA5' */, 0.000f /* '\xA6' */, 0.000f /* '\xA7' */,
+            0.000f /* '\xA8' */, 0.000f /* '\xA9' */, 0.000f /* '\xAA' */, 0.000f /* '\xAB' */, 0.000f /* '\xAC' */, 0.000f /* '\xAD' */, 0.000f /* '\xAE' */, 0.000f /* '\xAF' */,
+            0.000f /* '\xB0' */, 0.000f /* '\xB1' */, 0.000f /* '\xB2' */, 0.000f /* '\xB3' */, 0.000f /* '\xB4' */, 0.000f /* '\xB5' */, 0.000f /* '\xB6' */, 0.000f /* '\xB7' */,
+            0.000f /* '\xB8' */, 0.000f /* '\xB9' */, 0.000f /* '\xBA' */, 0.000f /* '\xBB' */, 0.000f /* '\xBC' */, 0.000f /* '\xBD' */, 0.000f /* '\xBE' */, 0.000f /* '\xBF' */,
+            0.000f /* '\xC0' */, 0.000f /* '\xC1' */, 0.000f /* '\xC2' */, 0.000f /* '\xC3' */, 0.000f /* '\xC4' */, 0.000f /* '\xC5' */, 0.000f /* '\xC6' */, 0.000f /* '\xC7' */,
+            0.000f /* '\xC8' */, 0.000f /* '\xC9' */, 0.000f /* '\xCA' */, 0.000f /* '\xCB' */, 0.000f /* '\xCC' */, 0.000f /* '\xCD' */, 0.000f /* '\xCE' */, 0.000f /* '\xCF' */,
+            0.000f /* '\xD0' */, 0.000f /* '\xD1' */, 0.000f /* '\xD2' */, 0.000f /* '\xD3' */, 0.000f /* '\xD4' */, 0.000f /* '\xD5' */, 0.000f /* '\xD6' */, 0.000f /* '\xD7' */,
+            0.000f /* '\xD8' */, 0.000f /* '\xD9' */, 0.000f /* '\xDA' */, 0.000f /* '\xDB' */, 0.000f /* '\xDC' */, 0.000f /* '\xDD' */, 0.000f /* '\xDE' */, 0.000f /* '\xDF' */,
+            0.000f /* '\xE0' */, 0.000f /* '\xE1' */, 0.000f /* '\xE2' */, 0.000f /* '\xE3' */, 0.000f /* '\xE4' */, 0.000f /* '\xE5' */, 0.000f /* '\xE6' */, 0.000f /* '\xE7' */,
+            0.000f /* '\xE8' */, 0.000f /* '\xE9' */, 0.000f /* '\xEA' */, 0.000f /* '\xEB' */, 0.000f /* '\xEC' */, 0.000f /* '\xED' */, 0.000f /* '\xEE' */, 0.000f /* '\xEF' */,
+            0.000f /* '\xF0' */, 0.000f /* '\xF1' */, 0.000f /* '\xF2' */, 0.000f /* '\xF3' */, 0.000f /* '\xF4' */, 0.000f /* '\xF5' */, 0.000f /* '\xF6' */, 0.000f /* '\xF7' */,
+            0.000f /* '\xF8' */, 0.000f /* '\xF9' */, 0.000f /* '\xFA' */, 0.000f /* '\xFB' */, 0.000f /* '\xFC' */, 0.000f /* '\xFD' */, 0.000f /* '\xFE' */, 0.000f /* '\xFF' */,
+        };
+
+        // The above table was generated programmatically with the following.  This can be augmented to incorporate additional data sources,
+        // though it is only intended to be a rough approximation use when tie-breaking and we'd otherwise be picking randomly, so, it's something.
+        // The frequencies may be wildly inaccurate when used with data sources different in nature than the training set, in which case we shouldn't
+        // be much worse off than just picking randomly:
+        //
+        // using System.Runtime.InteropServices;
+        //
+        // var counts = new Dictionary<byte, long>();
+        //
+        // (string, string)[] rootsAndExtensions = new[]
+        // {
+        //     (@"d:\repos\runtime\src\", "*.cs"),   // C# files in dotnet/runtime
+        //     (@"d:\Top25GutenbergBooks", "*.txt"), // Top 25 most popular books on Project Gutenberg
+        // };
+        //
+        // foreach ((string root, string ext) in rootsAndExtensions)
+        //     foreach (string path in Directory.EnumerateFiles(root, ext, SearchOption.AllDirectories))
+        //         foreach (string line in File.ReadLines(path))
+        //             foreach (char c in line.AsSpan().Trim())
+        //                 CollectionsMarshal.GetValueRefOrAddDefault(counts, (byte)c, out _)++;
+        //
+        // long total = counts.Sum(i => i.Value);
+        //
+        // Console.WriteLine("/// <summary>Percent occurrences in source text (100 * char count / total count).</summary>");
+        // Console.WriteLine("private static readonly float[] s_frequency = new float[]");
+        // Console.WriteLine("{");
+        // int i = 0;
+        // for (int row = 0; row < 32; row++)
+        // {
+        //     Console.Write("   ");
+        //     for (int col = 0; col < 8; col++)
+        //     {
+        //         counts.TryGetValue((byte)i, out long charCount);
+        //         float frequency = (float)(charCount / (double)total) * 100;
+        //         Console.Write($" {frequency:N3}f /* '{(i >= 32 && i < 127 ? $"   {(char)i}" : $"\\x{i:X2}")}' */,");
+        //         i++;
+        //     }
+        //     Console.WriteLine();
+        // }
+        // Console.WriteLine("};");
      }
  
      internal sealed class RegexFC
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs

index 2154947..93420b2 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs
@@ -4,6 +4,7 @@
  using System.Collections;
  using System.Collections.Generic;
  using System.Globalization;
+using System.Runtime.InteropServices;
  
  namespace System.Text.RegularExpressions
  {
@@ -38,10 +39,10 @@ namespace System.Text.RegularExpressions
          /// This is the only function that should be called from outside.
          /// It takes a RegexTree and creates a corresponding RegexCode.
          /// </summary>
-        public static RegexCode Write(RegexTree tree)
+        public static RegexCode Write(RegexTree tree, CultureInfo culture)
          {
              var writer = new RegexWriter(stackalloc int[EmittedSize], stackalloc int[IntStackSize]);
-            RegexCode code = writer.RegexCodeFromRegexTree(tree);
+            RegexCode code = writer.RegexCodeFromRegexTree(tree, culture);
              writer.Dispose();
  
  #if DEBUG
@@ -71,7 +72,7 @@ namespace System.Text.RegularExpressions
          /// It also computes various information about the tree, such as
          /// prefix data to help with optimizations.
          /// </summary>
-        public RegexCode RegexCodeFromRegexTree(RegexTree tree)
+        public RegexCode RegexCodeFromRegexTree(RegexTree tree, CultureInfo culture)
          {
              // Construct sparse capnum mapping if some numbers are unused.
              int capsize;
@@ -131,46 +132,6 @@ namespace System.Text.RegularExpressions
              Emit(RegexCode.Stop);
              int[] emitted = _emitted.AsSpan().ToArray();
  
-            bool rtl = (tree.Options & RegexOptions.RightToLeft) != 0;
-            bool compiled = (tree.Options & RegexOptions.Compiled) != 0;
-
-            // Compute prefixes to help optimize FindFirstChar.
-            RegexBoyerMoore? boyerMoorePrefix = null;
-            (string CharClass, bool CaseInsensitive)[]? leadingCharClasses = null;
-            (string leadingSubstring, bool leadingSubstringCI) = RegexPrefixAnalyzer.ComputeLeadingSubstring(tree);
-            if (leadingSubstring.Length > 1 && // if it's <= 1, perf is better using leadingCharClasses
-                leadingSubstring.Length <= RegexBoyerMoore.MaxLimit)
-            {
-                // Compute a Boyer-Moore prefix if we find a single string of sufficient length that always begins the expression.
-                CultureInfo culture = (tree.Options & RegexOptions.CultureInvariant) != 0 ? CultureInfo.InvariantCulture : CultureInfo.CurrentCulture;
-                boyerMoorePrefix = new RegexBoyerMoore(leadingSubstring, leadingSubstringCI, rtl, culture);
-            }
-
-            // If we didn't find a single leading substring, or if we found one but we won't be able to use it for a Boyer-Moore
-            // search, try to compute the characters set that might begin the string.
-            if (boyerMoorePrefix is null ||
-                (boyerMoorePrefix.NegativeUnicode != null && compiled)) // compilation won't use Boyer-Moore if it has a negative Unicode table
-            {
-                boyerMoorePrefix = null;
-
-                // First we employ a less aggressive but more valuable computation to see if we can find sets for each of the first N
-                // characters in the string.  If that's unsuccessful, we employ a more aggressive check to compute a set for just
-                // the first character in the string.
-
-                if ((tree.Options & RegexOptions.Compiled) != 0) // currently not utilized by the interpreter
-                {
-                    leadingCharClasses = RegexPrefixAnalyzer.ComputeMultipleCharClasses(tree, maxChars: 5); // limit of 5 is based on experimentation and can be tweaked as needed
-                }
-
-                if (leadingCharClasses is null)
-                {
-                    leadingCharClasses = RegexPrefixAnalyzer.ComputeFirstCharClass(tree);
-                }
-            }
-
-            // Compute any anchors starting the expression.
-            int leadingAnchor = RegexPrefixAnalyzer.FindLeadingAnchor(tree);
-
              // Convert the string table into an ordered string array.
              var strings = new string[_stringTable.Count];
              foreach (KeyValuePair<string, int> stringEntry in _stringTable)
@@ -179,7 +140,7 @@ namespace System.Text.RegularExpressions
              }
  
              // Return all that in a RegexCode object.
-            return new RegexCode(tree, emitted, strings, _trackCount, _caps, capsize, boyerMoorePrefix, leadingCharClasses, leadingAnchor, rtl);
+            return new RegexCode(tree, culture, emitted, strings, _trackCount, _caps, capsize);
          }
  
          /// <summary>
@@ -233,16 +194,23 @@ namespace System.Text.RegularExpressions
  
          /// <summary>
          /// Returns an index in the string table for a string;
-        /// uses a hashtable to eliminate duplicates.
+        /// uses a dictionary to eliminate duplicates.
          /// </summary>
          private int StringCode(string str)
          {
+#if REGEXGENERATOR
              if (!_stringTable.TryGetValue(str, out int i))
              {
                  i = _stringTable.Count;
                  _stringTable.Add(str, i);
              }
-
+#else
+            ref int i = ref CollectionsMarshal.GetValueRefOrAddDefault(_stringTable, str, out bool exists);
+            if (!exists)
+            {
+                i = _stringTable.Count - 1;
+            }
+#endif
              return i;
          }
  
@@ -265,7 +233,7 @@ namespace System.Text.RegularExpressions
          private void EmitFragment(int nodetype, RegexNode node, int curIndex)
          {
              int bits = 0;
-            if (node.UseOptionR())
+            if ((node.Options & RegexOptions.RightToLeft) != 0)
              {
                  bits |= RegexCode.Rtl;
              }
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Algebras/BDD.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Algebras/BDD.cs

index a5f79f2..5ffe1da 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Algebras/BDD.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Algebras/BDD.cs
@@ -240,6 +240,7 @@ namespace System.Text.RegularExpressions.Symbolic
          /// Serializer uses more compacted representations when fewer bits are needed, which is reflected in the first
          /// two numbers of the return value. MTBDD terminals are represented by negated numbers as -id.
          /// </summary>
+        [ExcludeFromCodeCoverage]
          public long[] Serialize()
          {
              if (IsEmpty)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs

index bf75d21..1fec095 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs
@@ -3,7 +3,6 @@
  
  using System.Collections.Generic;
  using System.Diagnostics;
-using System.Runtime.CompilerServices;
  using System.Threading;
  
  namespace System.Text.RegularExpressions.Symbolic
@@ -406,7 +405,6 @@ namespace System.Text.RegularExpressions.Symbolic
              lock (this)
              {
                  state.Id = _stateCache.Count;
-                int k = state.GetHashCode();
                  _stateCache.Add(state);
  
                  Debug.Assert(_statearray is not null);
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs

index 3bd803e..ba522d5 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs
@@ -19,28 +19,6 @@ namespace System.Text.RegularExpressions.Symbolic
  
          private SymbolicRegexInfo(uint i) => _info = i;
  
-        /// <summary>Optimized lookup array for most common combinations.</summary>
-        /// <remarks>Most common cases will be 0 (no anchors and not nullable) and 1 (no anchors and nullable)</remarks>
-        private static readonly SymbolicRegexInfo[] s_infos = CreateSymbolicRegexInfos();
-
-        private static SymbolicRegexInfo[] CreateSymbolicRegexInfos()
-        {
-            var infos = new SymbolicRegexInfo[128];
-            for (uint i = 0; i < infos.Length; i++)
-            {
-                infos[i] = new SymbolicRegexInfo(i);
-            }
-            return infos;
-        }
-
-        private static SymbolicRegexInfo Mk(uint i)
-        {
-            SymbolicRegexInfo[] infos = s_infos;
-            return i < infos.Length ?
-                infos[i] :
-                new SymbolicRegexInfo(i);
-        }
-
          internal static SymbolicRegexInfo Mk(bool isAlwaysNullable = false, bool canBeNullable = false, bool startsWithLineAnchor = false,
              bool startsWithBoundaryAnchor = false, bool containsSomeAnchor = false,
              bool containsLineAnchor = false, bool containsSomeCharacter = false, bool isLazy = true)
@@ -87,7 +65,7 @@ namespace System.Text.RegularExpressions.Symbolic
                  i |= IsLazyMask;
              }
  
-            return Mk(i);
+            return new SymbolicRegexInfo(i);
          }
  
          public bool IsNullable => (_info & IsAlwaysNullableMask) != 0;
@@ -121,7 +99,7 @@ namespace System.Text.RegularExpressions.Symbolic
              }
  
              i = (i & ~IsLazyMask) | isLazy;
-            return Mk(i);
+            return new SymbolicRegexInfo(i);
          }
  
          public static SymbolicRegexInfo And(params SymbolicRegexInfo[] infos)
@@ -140,7 +118,7 @@ namespace System.Text.RegularExpressions.Symbolic
  
              i = (i & ~IsLazyMask) | isLazy;
              i = (i & ~(IsAlwaysNullableMask | CanBeNullableMask)) | isNullable;
-            return Mk(i);
+            return new SymbolicRegexInfo(i);
          }
  
          public static SymbolicRegexInfo Concat(SymbolicRegexInfo left_info, SymbolicRegexInfo right_info)
@@ -164,7 +142,10 @@ namespace System.Text.RegularExpressions.Symbolic
              uint i = body_info._info;
  
              // The loop is nullable if either the body is nullable or if the lower boud is 0
-            i |= lowerBound == 0 ? (IsAlwaysNullableMask | CanBeNullableMask) : 0;
+            if (lowerBound == 0)
+            {
+                i |= IsAlwaysNullableMask | CanBeNullableMask;
+            }
  
              // The loop is lazy iff it is marked lazy
              if (isLazy)
@@ -176,7 +157,7 @@ namespace System.Text.RegularExpressions.Symbolic
                  i &= ~IsLazyMask;
              }
  
-            return Mk(i);
+            return new SymbolicRegexInfo(i);
          }
  
          public static SymbolicRegexInfo Not(SymbolicRegexInfo info) =>
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs

index a8cec12..21c0233 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs
@@ -11,15 +11,8 @@ using System.Threading;
  namespace System.Text.RegularExpressions.Symbolic
  {
      /// <summary>Represents a regex matching engine that performs regex matching using symbolic derivatives.</summary>
-    internal abstract class SymbolicRegexMatcher
+    internal interface ISymbolicRegexMatcher
      {
-        /// <summary>Returns the next match index and length in the input string.</summary>
-        /// <param name="isMatch">Whether to return once we know there's a match without determining where exactly it matched.</param>
-        /// <param name="input">The input string.</param>
-        /// <param name="startat">The start position in the input.</param>
-        /// <param name="endat">The end position in the input.</param>
-        public abstract SymbolicMatch FindMatch(bool isMatch, string input, int startat, int endat);
-
  #if DEBUG
          /// <summary>Unwind the regex of the matcher and save the resulting state graph in DGML</summary>
          /// <param name="bound">roughly the maximum number of states, 0 means no bound</param>
@@ -30,8 +23,7 @@ namespace System.Text.RegularExpressions.Symbolic
          /// <param name="writer">dgml output is written here</param>
          /// <param name="maxLabelLength">maximum length of labels in nodes anything over that length is indicated with .. </param>
          /// <param name="asNFA">if true creates NFA instead of DFA</param>
-        public abstract void SaveDGML(TextWriter writer, int bound, bool hideStateInfo, bool addDotStar, bool inReverse, bool onlyDFAinfo, int maxLabelLength, bool asNFA);
-
+        void SaveDGML(TextWriter writer, int bound, bool hideStateInfo, bool addDotStar, bool inReverse, bool onlyDFAinfo, int maxLabelLength, bool asNFA);
  
          /// <summary>
          /// Generates up to k random strings matched by the regex
@@ -40,13 +32,13 @@ namespace System.Text.RegularExpressions.Symbolic
          /// <param name="randomseed">random seed for the generator, 0 means no random seed</param>
          /// <param name="negative">if true then generate inputs that do not match</param>
          /// <returns></returns>
-        public abstract IEnumerable<string> GenerateRandomMembers(int k, int randomseed, bool negative);
+        IEnumerable<string> GenerateRandomMembers(int k, int randomseed, bool negative);
  #endif
      }
  
      /// <summary>Represents a regex matching engine that performs regex matching using symbolic derivatives.</summary>
      /// <typeparam name="TSetType">Character set type.</typeparam>
-    internal sealed class SymbolicRegexMatcher<TSetType> : SymbolicRegexMatcher where TSetType : notnull
+    internal sealed class SymbolicRegexMatcher<TSetType> : ISymbolicRegexMatcher where TSetType : notnull
      {
          /// <summary>Maximum number of states before switching over to Antimirov mode.</summary>
          /// <remarks>
@@ -127,54 +119,24 @@ namespace System.Text.RegularExpressions.Symbolic
          /// <summary>Timeout in milliseconds. This is only used if <see cref="_checkTimeout"/> is true.</summary>
          private readonly int _timeout;
  
-        /// <summary>Classifier used to say whether a particular character can start a match for <see cref="_pattern"/>.</summary>
-        internal readonly BooleanClassifier _startSetClassifier;
-
-        /// <summary>Predicate over characters that make some progress</summary>
-        private readonly TSetType _startSet;
-
-        /// <summary>Maximum allowed size of <see cref="_startSetArray"/>.</summary>
-        private const int StartSetArrayMaxSize = 5;
-
-        /// <summary>String of at most <see cref="StartSetArrayMaxSize"/> many characters</summary>
-        private readonly char[] _startSetArray;
-
-        /// <summary>Number of elements in <see cref="_startSetClassifier"/></summary>
-        private readonly int _startSetSize;
-
-        /// <summary>If nonempty then <see cref="_pattern"/> has that fixed prefix</summary>
-        private readonly string _prefix;
+        /// <summary>Data and routines for skipping ahead to the next place a match could potentially start.</summary>
+        private readonly RegexFindOptimizations? _findOpts;
  
-        /// <summary>Non-null when <see cref="_prefix"/> is nonempty</summary>
-        private readonly RegexBoyerMoore? _prefixBoyerMoore;
+        /// <summary>The initial states for the original pattern, keyed off of the previous character kind.</summary>
+        /// <remarks>If the pattern doesn't contain any anchors, there will only be a single initial state.</remarks>
+        private readonly DfaMatchingState<TSetType>[] _initialStates;
  
-        /// <summary>If true then the fixed prefix of <see cref="_pattern"/> is idependent of case</summary>
-        private readonly bool _isPrefixCaseInsensitive;
+        /// <summary>The initial states for the dot-star pattern, keyed off of the previous character kind.</summary>
+        /// <remarks>If the pattern doesn't contain any anchors, there will only be a single initial state.</remarks>
+        private readonly DfaMatchingState<TSetType>[] _dotstarredInitialStates;
  
-        /// <summary>Cached skip states from the initial state of <see cref="_dotStarredPattern"/> for the 5 possible previous character kinds.</summary>
-        private readonly DfaMatchingState<TSetType>?[] _prefixSkipStates = new DfaMatchingState<TSetType>[CharKind.CharKindCount];
-        /// <summary>Cached skip states from the initial state of Ar for the 5 possible previous character kinds.</summary>
-        private readonly DfaMatchingState<TSetType>?[] _reversePrefixSkipStates = new DfaMatchingState<TSetType>[CharKind.CharKindCount];
+        /// <summary>The initial states for the reverse pattern, keyed off of the previous character kind.</summary>
+        /// <remarks>If the pattern doesn't contain any anchors, there will only be a single initial state.</remarks>
+        private readonly DfaMatchingState<TSetType>[] _reverseInitialStates;
  
-        private readonly string _reversePrefix;
-
-        private readonly DfaMatchingState<TSetType>[] _initialStates = new DfaMatchingState<TSetType>[CharKind.CharKindCount];
-        private readonly DfaMatchingState<TSetType>[] _dotstarredInitialStates = new DfaMatchingState<TSetType>[CharKind.CharKindCount];
-        private readonly DfaMatchingState<TSetType>[] _reverseInitialStates = new DfaMatchingState<TSetType>[CharKind.CharKindCount];
-
-        private readonly uint[] _asciiCharKinds = new uint[128];
-
-        internal readonly CultureInfo _culture;
-
-        private DfaMatchingState<TSetType> GetSkipState(uint prevCharKind) =>
-            Volatile.Read(ref _prefixSkipStates[prevCharKind]) ??
-            Interlocked.CompareExchange(ref _prefixSkipStates[prevCharKind], DeltaPlus<BrzozowskiTransition>(_prefix, _dotstarredInitialStates[prevCharKind]), null) ??
-            _prefixSkipStates[prevCharKind]!;
-
-        private DfaMatchingState<TSetType> GetReverseSkipState(uint prevCharKind) =>
-            Volatile.Read(ref _reversePrefixSkipStates[prevCharKind]) ??
-            Interlocked.CompareExchange(ref _reversePrefixSkipStates[prevCharKind], DeltaPlus<BrzozowskiTransition>(_reversePrefix, _reverseInitialStates[prevCharKind]), null) ??
-            _reversePrefixSkipStates[prevCharKind]!;
+        /// <summary>Lookup table to quickly determine the character kind for ASCII characters.</summary>
+        /// <remarks>Non-null iff the pattern contains anchors; otherwise, it's unused.</remarks>
+        private readonly uint[]? _asciiCharKinds;
  
          /// <summary>Get the minterm of <paramref name="c"/>.</summary>
          /// <param name="c">character code</param>
@@ -186,16 +148,14 @@ namespace System.Text.RegularExpressions.Symbolic
          }
  
          /// <summary>Constructs matcher for given symbolic regex.</summary>
-        internal SymbolicRegexMatcher(SymbolicRegexNode<TSetType> sr, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture)
+        internal SymbolicRegexMatcher(SymbolicRegexNode<TSetType> sr, RegexCode code, CharSetSolver css, BDD[] minterms, TimeSpan matchTimeout, CultureInfo culture)
          {
+            Debug.Assert(sr._builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {sr._builder._solver}");
+
              _pattern = sr;
              _builder = sr._builder;
-
              _checkTimeout = Regex.InfiniteMatchTimeout != matchTimeout;
              _timeout = (int)(matchTimeout.TotalMilliseconds + 0.5); // Round up, so it will be at least 1ms
-            _culture = culture;
-
-            Debug.Assert(_builder._solver is BV64Algebra or BVAlgebra or CharSetSolver, $"Unsupported algebra: {_builder._solver}");
              _partitions = _builder._solver switch
              {
                  BV64Algebra bv64 => bv64._classifier,
@@ -203,44 +163,57 @@ namespace System.Text.RegularExpressions.Symbolic
                  _ => new MintermClassifier((CharSetSolver)(object)_builder._solver, minterms),
              };
  
-            _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern);
-            _reversePattern = _pattern.Reverse();
-            ConfigureRegexes();
-
-            _startSet = _pattern.GetStartSet();
-            if (!_builder._solver.IsSatisfiable(_startSet) || _pattern.CanBeNullable)
+            if (code.FindOptimizations.FindMode != FindNextStartingPositionMode.NoSearch &&
+                code.FindOptimizations.LeadingAnchor == 0) // If there are any anchors, we're better off letting the DFA quickly do its job of determining whether there's a match.
              {
-                // If the startset is empty make it full instead by including all characters
-                // this is to ensure that startset is nonempty -- as an invariant assumed by operations using it
-                //
-                // Also, if A can be nullable then effectively disable use of startset by making it true
-                // because it may force search of next character in startset and fail to recognize an empty match
-                // because (by definition) an empty match has no start character.
-                //
-                // For example (this is also a unit test):
-                // for pattern "\B\W*?" or "\B\W*" or "\B\W?" and input "e.g:abc" there is an empty match in position 5
-                // but startset \W will force search beyond position 5 and fails to find that match
-                _startSet = _builder._solver.True;
+                _findOpts = code.FindOptimizations;
              }
  
-            _startSetSize = (int)_builder._solver.ComputeDomainSize(_startSet);
+            // Determine the number of initial states. If there's no anchor, only the default previous
+            // character kind 0 is ever going to be used for all initial states.
+            int statesCount = _pattern._info.ContainsSomeAnchor ? CharKind.CharKindCount : 1;
  
-            BDD startbdd = _builder._solver.ConvertToCharSet(css, _startSet);
-            _startSetClassifier = new BooleanClassifier(css, startbdd);
-
-            //store the start characters in the A_startset_array if there are not too many characters
-            _startSetArray = _startSetSize <= StartSetArrayMaxSize ?
-                new List<char>(css.GenerateAllCharacters(startbdd)).ToArray() :
-                Array.Empty<char>();
+            // Create the initial states for the original pattern.
+            var initialStates = new DfaMatchingState<TSetType>[statesCount];
+            for (uint i = 0; i < initialStates.Length; i++)
+            {
+                initialStates[i] = _builder.MkState(_pattern, i);
+            }
+            _initialStates = initialStates;
  
-            _prefix = _pattern.GetFixedPrefix(css, culture.Name, out _isPrefixCaseInsensitive);
-            _reversePrefix = _reversePattern.GetFixedPrefix(css, culture.Name, out _);
+            // Create the dot-star pattern (a concatenation of any* with the original pattern)
+            // and all of its initial states.
+            _dotStarredPattern = _builder.MkConcat(_builder._anyStar, _pattern);
+            var dotstarredInitialStates = new DfaMatchingState<TSetType>[statesCount];
+            for (uint i = 0; i < dotstarredInitialStates.Length; i++)
+            {
+                // Used to detect if initial state was reentered,
+                // but observe that the behavior from the state may ultimately depend on the previous
+                // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor,
+                // in that sense there can be several "versions" (not more than StateCount) of the initial state.
+                DfaMatchingState<TSetType> state = _builder.MkState(_dotStarredPattern, i);
+                state.IsInitialState = true;
+                dotstarredInitialStates[i] = state;
+            }
+            _dotstarredInitialStates = dotstarredInitialStates;
  
-            _prefixBoyerMoore = InitializePrefixBoyerMoore();
+            // Create the reverse pattern (the original pattern in reverse order) and all of its
+            // initial states.
+            _reversePattern = _pattern.Reverse();
+            var reverseInitialStates = new DfaMatchingState<TSetType>[statesCount];
+            for (uint i = 0; i < reverseInitialStates.Length; i++)
+            {
+                reverseInitialStates[i] = _builder.MkState(_reversePattern, i);
+            }
+            _reverseInitialStates = reverseInitialStates;
  
+            // Initialize our fast-lookup for determining the character kind of ASCII characters.
+            // This is only required when the pattern contains anchors, as otherwise there's only
+            // ever a single kind used.
              if (_pattern._info.ContainsSomeAnchor)
              {
-                for (int i = 0; i < 128; i++)
+                var asciiCharKinds = new uint[128];
+                for (int i = 0; i < asciiCharKinds.Length; i++)
                  {
                      TSetType predicate2;
                      uint charKind;
@@ -256,68 +229,12 @@ namespace System.Text.RegularExpressions.Symbolic
                          charKind = CharKind.WordLetter;
                      }
  
-                    _asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind;
+                    asciiCharKinds[i] = _builder._solver.And(GetMinterm(i), predicate2).Equals(_builder._solver.False) ? 0 : charKind;
                  }
+                _asciiCharKinds = asciiCharKinds;
              }
          }
  
-        private RegexBoyerMoore? InitializePrefixBoyerMoore()
-        {
-            if (_prefix != string.Empty && _prefix.Length <= RegexBoyerMoore.MaxLimit && _prefix.Length > 1)
-            {
-                // RegexBoyerMoore expects the prefix to be lower case when case is ignored.
-                // Use the culture of the matcher.
-                string prefix = _isPrefixCaseInsensitive ? _prefix.ToLower(_culture) : _prefix;
-                return new RegexBoyerMoore(prefix, _isPrefixCaseInsensitive, rightToLeft: false, _culture);
-            }
-
-            return null;
-        }
-
-        private void ConfigureRegexes()
-        {
-            void Configure(uint i)
-            {
-                _initialStates[i] = _builder.MkState(_pattern, i);
-
-                // Used to detect if initial state was reentered, then startset can be triggered
-                // but observe that the behavior from the state may ultimately depend on the previous
-                // input char e.g. possibly causing nullability of \b or \B or of a start-of-line anchor,
-                // in that sense there can be several "versions" (not more than StateCount) of the initial state.
-                _dotstarredInitialStates[i] = _builder.MkState(_dotStarredPattern, i);
-                _dotstarredInitialStates[i].IsInitialState = true;
-
-                _reverseInitialStates[i] = _builder.MkState(_reversePattern, i);
-            }
-
-            // Create initial states for A, A1 and Ar.
-            if (!_pattern._info.ContainsSomeAnchor)
-            {
-                // Only the default previous character kind 0 is ever going to be used for all initial states.
-                // _A1q0[0] is recognized as special initial state.
-                // This information is used for search optimization based on start set and prefix of A.
-                Configure(0);
-            }
-            else
-            {
-                for (uint i = 0; i < CharKind.CharKindCount; i++)
-                {
-                    Configure(i);
-                }
-            }
-        }
-
-        /// <summary>Return the state after the given <paramref name="pattern"/> string from the given state <paramref name="state"/>.</summary>
-        private DfaMatchingState<TSetType> DeltaPlus<TTransition>(string pattern, DfaMatchingState<TSetType> state) where TTransition : struct, ITransition
-        {
-            for (int i = 0; i < pattern.Length; i++)
-            {
-                state = Delta<TTransition>(pattern, i, state);
-            }
-
-            return state;
-        }
-
          /// <summary>Interface for transitions used by the <see cref="Delta"/> method.</summary>
          private interface ITransition
          {
@@ -341,7 +258,7 @@ namespace System.Text.RegularExpressions.Symbolic
                  minterms.Length : // mintermId = minterms.Length represents \Z (last \n)
                  _partitions.GetMintermID(c);
  
-            TSetType minterm = (uint)mintermId < minterms.Length ?
+            TSetType minterm = (uint)mintermId < (uint)minterms.Length ?
                  minterms[mintermId] :
                  _builder._solver.False; // minterm=False represents \Z
  
@@ -428,30 +345,21 @@ namespace System.Text.RegularExpressions.Symbolic
  
          private void DoCheckTimeout(int timeoutOccursAt)
          {
-            // This code is identical to RegexRunner.DoCheckTimeout(),
-            // with the exception of check skipping. RegexRunner calls
-            // DoCheckTimeout potentially on every iteration of a loop,
-            // whereas this calls it only once per transition.
-
+            // This logic is identical to RegexRunner.DoCheckTimeout, with the exception of check skipping. RegexRunner calls
+            // DoCheckTimeout potentially on every iteration of a loop, whereas this calls it only once per transition.
              int currentMillis = Environment.TickCount;
-
-            if (currentMillis < timeoutOccursAt)
-                return;
-
-            if (0 > timeoutOccursAt && 0 < currentMillis)
-                return;
-
-            //regex pattern is in general not available in srm and
-            //the input is not available here but could be passed as argument to DoCheckTimeout
-            throw new RegexMatchTimeoutException(string.Empty, string.Empty, TimeSpan.FromMilliseconds(_timeout));
+            if (currentMillis >= timeoutOccursAt && (0 <= timeoutOccursAt || 0 >= currentMillis))
+            {
+                throw new RegexMatchTimeoutException(string.Empty, string.Empty, TimeSpan.FromMilliseconds(_timeout));
+            }
          }
  
          /// <summary>Find a match.</summary>
          /// <param name="isMatch">Whether to return once we know there's a match without determining where exactly it matched.</param>
-        /// <param name="input">input string</param>
-        /// <param name="startat">the position to start search in the input string</param>
-        /// <param name="k">the next position after the end position in the input</param>
-        public override SymbolicMatch FindMatch(bool isMatch, string input, int startat, int k)
+        /// <param name="input">The input string</param>
+        /// <param name="startat">The position to start search in the input string.</param>
+        /// <param name="end">The non-inclusive position to end the search in the input string.</param>
+        public SymbolicMatch FindMatch(bool isMatch, string input, int startat, int end)
          {
              int timeoutOccursAt = 0;
              if (_checkTimeout)
@@ -460,18 +368,16 @@ namespace System.Text.RegularExpressions.Symbolic
                  timeoutOccursAt = Environment.TickCount + (int)(_timeout + 0.5);
              }
  
-            if (startat == k)
+            if (startat == end)
              {
-                //covers the special case when the remaining input suffix
-                //where a match is sought is empty (for example when the input is empty)
-                //in this case the only possible match is an empty match
+                // Covers the special-case of an empty match at the end of the input.
                  uint prevKind = GetCharKind(input, startat - 1);
                  uint nextKind = GetCharKind(input, startat);
  
                  bool emptyMatchExists = _pattern.IsNullableFor(CharKind.Context(prevKind, nextKind));
-                return
-                    !emptyMatchExists ? SymbolicMatch.NoMatch :
-                    new SymbolicMatch(startat, 0);
+                return emptyMatchExists ?
+                    new SymbolicMatch(startat, 0) :
+                    SymbolicMatch.NoMatch;
              }
  
              // Find the first accepting state. Initial start position in the input is i == 0.
@@ -479,7 +385,7 @@ namespace System.Text.RegularExpressions.Symbolic
  
              // May return -1 as a legitimate value when the initial state is nullable and startat == 0.
              // Returns NoMatchExists when there is no match.
-            i = FindFinalStatePosition(input, k, i, timeoutOccursAt, out int i_q0_A1, out int watchdog);
+            i = FindFinalStatePosition(input, end, i, timeoutOccursAt, out int i_q0_A1, out int watchdog);
  
              if (i == NoMatchExists)
              {
@@ -502,24 +408,17 @@ namespace System.Text.RegularExpressions.Symbolic
              }
              else
              {
-                if (i < startat)
-                {
-                    Debug.Assert(i == startat - 1);
-                    i_start = startat;
-                }
-                else
-                {
-                    // Walk in reverse to locate the start position of the match
-                    i_start = FindStartPosition(input, i, i_q0_A1);
-                }
-
-                i_end = FindEndPosition(input, k, i_start);
+                Debug.Assert(i >= startat - 1);
+                i_start = i < startat ?
+                    startat :
+                    FindStartPosition(input, i, i_q0_A1); // Walk in reverse to locate the start position of the match
+                i_end = FindEndPosition(input, end, i_start);
              }
  
              return new SymbolicMatch(i_start, i_end + 1 - i_start);
          }
  
-        /// <summary>Find match end position using A, end position is known to exist.</summary>
+        /// <summary>Find match end position using the original pattern, end position is known to exist.</summary>
          /// <param name="input">input array</param>
          /// <param name="i">inclusive start position</param>
          /// <param name="exclusiveEnd">exclusive end position</param>
@@ -561,7 +460,7 @@ namespace System.Text.RegularExpressions.Symbolic
              return i_end;
          }
  
-        // Inner loop for FindEndPosition parameterized by an ITransition type.
+        /// <summary>Inner loop for FindEndPosition parameterized by an ITransition type.</summary>
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
          private bool FindEndPositionDeltas<TTransition>(string input, ref int i, int j, ref DfaMatchingState<TSetType> q, ref int i_end) where TTransition : struct, ITransition
          {
@@ -582,7 +481,7 @@ namespace System.Text.RegularExpressions.Symbolic
                  }
                  else if (q.IsDeadend)
                  {
-                    // Nonaccepting sink state (deadend) has been reached in A.
+                    // Non-accepting sink state (deadend) has been reached in the original pattern.
                      // So the match ended when the last i_end was updated.
                      return true;
                  }
@@ -594,26 +493,18 @@ namespace System.Text.RegularExpressions.Symbolic
              return false;
          }
  
-        /// <summary>Walk back in reverse using Ar to find the start position of match, start position is known to exist.</summary>
+        /// <summary>Walk back in reverse using the reverse pattern to find the start position of match, start position is known to exist.</summary>
          /// <param name="input">the input string</param>
          /// <param name="i">position to start walking back from, i points at the last character of the match</param>
          /// <param name="match_start_boundary">do not pass this boundary when walking back</param>
          /// <returns></returns>
          private int FindStartPosition(string input, int i, int match_start_boundary)
          {
-            // Fetch the correct start state for Ar.
+            // Fetch the correct start state for the reverse pattern.
              // This depends on previous character --- which, because going backwards, is character number i+1.
              uint prevKind = GetCharKind(input, i + 1);
              DfaMatchingState<TSetType> q = _reverseInitialStates[prevKind];
  
-            // Ar may have a fixed prefix sequence
-            if (_reversePrefix.Length > 0)
-            {
-                //skip past the prefix portion of Ar
-                q = GetReverseSkipState(prevKind);
-                i -= _reversePrefix.Length;
-            }
-
              if (i == -1)
              {
                  Debug.Assert(q.IsNullable(GetCharKind(input, i)), "we reached the beginning of the input, thus the state q must be accepting");
@@ -623,12 +514,12 @@ namespace System.Text.RegularExpressions.Symbolic
              int last_start = -1;
              if (q.IsNullable(GetCharKind(input, i)))
              {
-                // The whole prefix of Ar was in reverse a prefix of A,
-                // for example when the pattern of A is concrete word such as "abc"
+                // The whole prefix of the reverse pattern was in reverse a prefix of the original pattern,
+                // for example when the original pattern is concrete word such as "abc"
                  last_start = i + 1;
              }
  
-            //walk back to the accepting state of Ar
+            // Walk back to the accepting state of the reverse pattern
              while (i >= match_start_boundary)
              {
                  int j = Math.Max(match_start_boundary, i - AntimirovThresholdLeeway);
@@ -663,7 +554,7 @@ namespace System.Text.RegularExpressions.Symbolic
                  if (q.IsNullable(GetCharKind(input, i - 1)))
                  {
                      // Earliest start point so far. This must happen at some point
-                    // or else A1 would not have reached a final state after match_start_boundary.
+                    // or else the dot-star pattern would not have reached a final state after match_start_boundary.
                      last_start = i;
                  }
  
@@ -683,7 +574,7 @@ namespace System.Text.RegularExpressions.Symbolic
          /// <param name="watchdog">length of match when positive</param>
          private int FindFinalStatePosition(string input, int k, int i, int timeoutOccursAt, out int initialStateIndex, out int watchdog)
          {
-            // Get the correct start state of A1, which in general depends on the previous character kind in the input.
+            // Get the correct start state of the dot-star pattern, which in general depends on the previous character kind in the input.
              uint prevCharKindId = GetCharKind(input, i - 1);
              DfaMatchingState<TSetType> q = _dotstarredInitialStates[prevCharKindId];
              initialStateIndex = i;
@@ -712,53 +603,13 @@ namespace System.Text.RegularExpressions.Symbolic
              {
                  if (q.IsInitialState)
                  {
-                    // i_q0_A1 is the most recent position in the input when A1 is in the initial state
+                    // i_q0_A1 is the most recent position in the input when the dot-star pattern is in the initial state
                      initialStateIndex = i;
  
-                    if (_prefixBoyerMoore != null)
+                    if (_findOpts is RegexFindOptimizations findOpts)
                      {
-                        // Stay in the initial state if the prefix does not match.
-                        // Thus advance the current position to the first position where the prefix does match.
-                        i = _prefixBoyerMoore.Scan(input, i, 0, input.Length);
-
-                        if (i == -1) // Scan returns -1 when a matching position does not exist
-                        {
-                            watchdog = -1;
-                            return -2;
-                        }
-
-                        // Compute the end state for the A prefix.
-                        // Skip directly to the resulting state
-                        //  --- i.e. do the loop ---
-                        // for (int j = 0; j < prefix.Length; j++)
-                        //     q = Delta(prefix[j], q, out regex);
-                        //  ---
-                        q = GetSkipState(q.PrevCharKind);
-
-                        // skip the prefix
-                        i += _prefix.Length;
-
-                        // here i points at the next character (the character immediately following the prefix)
-                        if (q.IsNullable(GetCharKind(input, i)))
-                        {
-                            // Return the last position of the match
-                            watchdog = q.WatchDog;
-                            return i - 1;
-                        }
-
-                        if (i == k)
-                        {
-                            // no match was found
-                            return -2;
-                        }
-                    }
-                    else
-                    {
-                        // we are still in the initial state, when the prefix is empty
-                        // find the first position i that matches with some character in the start set
-                        i = IndexOfStartSet(input, i);
-
-                        if (i == -1)
+                        // Find the first position i that matches with some likely character.
+                        if (!findOpts.TryFindNextStartingPosition(input, ref i, 0, 0, k))
                          {
                              // no match was found
                              return NoMatchExists;
@@ -833,68 +684,45 @@ namespace System.Text.RegularExpressions.Symbolic
          [MethodImpl(MethodImplOptions.AggressiveInlining)]
          private uint GetCharKind(string input, int i)
          {
-            if (!_pattern._info.ContainsSomeAnchor)
-            {
-                // The previous character kind is irrelevant when anchors are not used.
-                return CharKind.General;
-            }
-
-            if (i == -1 || i == input.Length)
-            {
-                return CharKind.StartStop;
-            }
+            return !_pattern._info.ContainsSomeAnchor ?
+                CharKind.General : // The previous character kind is irrelevant when anchors are not used.
+                GetCharKindWithAnchor(input, i);
  
-            char nextChar = input[i];
-            if (nextChar == '\n')
+            uint GetCharKindWithAnchor(string input, int i)
              {
-                return
-                    _builder._newLinePredicate.Equals(_builder._solver.False) ? 0 : // ignore \n
-                    i == 0 || i == input.Length - 1 ? CharKind.NewLineS : // very first or very last \n. Detection of very first \n is needed for rev(\Z).
-                    CharKind.Newline;
-            }
-
-            uint[] asciiCharKinds = _asciiCharKinds;
-            return
-                nextChar < asciiCharKinds.Length ? asciiCharKinds[nextChar] :
-                _builder._solver.And(GetMinterm(nextChar), _builder._wordLetterPredicateForAnchors).Equals(_builder._solver.False) ? 0 : //apply the wordletter predicate to compute the kind of the next character
-                CharKind.WordLetter;
-        }
+                Debug.Assert(_asciiCharKinds is not null);
  
-        /// <summary>
-        /// Find first occurrence of startset element in input starting from index i.
-        /// Startset here is assumed to consist of a few characters.
-        /// </summary>
-        /// <param name="input">input string to search in</param>
-        /// <param name="i">the start index in input to search from</param>
-        /// <returns></returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        private int IndexOfStartSet(string input, int i)
-        {
-            if (_startSetSize <= StartSetArrayMaxSize)
-            {
-                return input.IndexOfAny(_startSetArray, i);
-            }
+                if ((uint)i >= (uint)input.Length)
+                {
+                    return CharKind.StartStop;
+                }
  
-            for (int j = i; j < input.Length; j++)
-            {
-                if (_startSetClassifier.IsTrue(input[j]))
+                char nextChar = input[i];
+                if (nextChar == '\n')
                  {
-                    return j;
+                    return
+                        _builder._newLinePredicate.Equals(_builder._solver.False) ? 0 : // ignore \n
+                        i == 0 || i == input.Length - 1 ? CharKind.NewLineS : // very first or very last \n. Detection of very first \n is needed for rev(\Z).
+                        CharKind.Newline;
                  }
-            }
  
-            return -1;
+                uint[] asciiCharKinds = _asciiCharKinds;
+                return
+                    nextChar < (uint)asciiCharKinds.Length ? asciiCharKinds[nextChar] :
+                    _builder._solver.And(GetMinterm(nextChar), _builder._wordLetterPredicateForAnchors).Equals(_builder._solver.False) ? 0 : //apply the wordletter predicate to compute the kind of the next character
+                    CharKind.WordLetter;
+            }
          }
  
  #if DEBUG
-        public override void SaveDGML(TextWriter writer, int bound, bool hideStateInfo, bool addDotStar, bool inReverse, bool onlyDFAinfo, int maxLabelLength, bool asNFA)
+        public void SaveDGML(TextWriter writer, int bound, bool hideStateInfo, bool addDotStar, bool inReverse, bool onlyDFAinfo, int maxLabelLength, bool asNFA)
          {
              var graph = new DGML.RegexAutomaton<TSetType>(this, bound, addDotStar, inReverse, asNFA);
              var dgml = new DGML.DgmlWriter(writer, hideStateInfo, maxLabelLength, onlyDFAinfo);
              dgml.Write(graph);
          }
  
-        public override IEnumerable<string> GenerateRandomMembers(int k, int randomseed, bool negative) =>
+        public IEnumerable<string> GenerateRandomMembers(int k, int randomseed, bool negative) =>
              new SymbolicRegexSampler<TSetType>(_pattern, randomseed, negative).GenerateRandomMembers(k);
  #endif
      }
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs

index 5ecadca..f7992bf 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs
@@ -1424,152 +1424,6 @@ namespace System.Text.RegularExpressions.Symbolic
              };
          }
  
-        /// <summary>
-        /// Gets the string prefix that the regex must match or the empty string if such a prefix does not exist.
-        /// Sets ignoreCase = true when the prefix works under case-insensitivity.
-        /// For example if the input prefix is "---" it sets ignoreCase=false,
-        /// if the prefix is "---[aA][bB]" it returns "---AB" and sets ignoreCase=true
-        /// </summary>
-        internal string GetFixedPrefix(CharSetSolver css, string culture, out bool ignoreCase)
-        {
-            ignoreCase = false;
-            StringBuilder prefix = new();
-            bool doneWithoutIgnoreCase = false;
-            bool doneWithIgnoreCase = false;
-            foreach (S x in GetPrefixSequence())
-            {
-                BDD bdd = _builder._solver.ConvertToCharSet(css, x);
-                char character = (char)bdd.GetMin();
-                // Check if the prefix extends without ignore case: the set is a single character
-                if (!doneWithoutIgnoreCase && !css.IsSingleton(bdd))
-                {
-                    doneWithoutIgnoreCase = true;
-                }
-                if (!doneWithIgnoreCase)
-                {
-                    // Check if the prefix extends with ignore case: ignoring case doesn't change the set
-                    if (css.ApplyIgnoreCase(css.CharConstraint(character), culture).Equals(bdd))
-                    {
-                        // Turn ignoreCase on when the prefix extends only under ignore case
-                        if (doneWithoutIgnoreCase)
-                        {
-                            ignoreCase = true;
-                        }
-                    }
-                    else
-                    {
-                        doneWithIgnoreCase = true;
-                    }
-                }
-                // Append the character when the prefix extends in either of the ways
-                if (!doneWithoutIgnoreCase || !doneWithIgnoreCase)
-                    prefix.Append(character);
-                else
-                    break;
-            }
-            return prefix.ToString();
-        }
-
-        private IEnumerable<S> GetPrefixSequence()
-        {
-            List<SymbolicRegexNode<S>> paths = new();
-            HashSet<SymbolicRegexNode<S>> nextPaths = new();
-
-            paths.Add(this);
-            while (true)
-            {
-                bool done = false;
-                Debug.Assert(paths.Count > 0, "The generator should have ended when any path fails to extend.");
-                // Generate the next set from one path
-                S next;
-                if (!GetNextPrefixSet(ref paths, ref nextPaths, ref done, out next))
-                {
-                    // A path didn't have a next set as supported by this algorithm
-                    yield break;
-                }
-                if (!_builder._solver.IsSatisfiable(next))
-                {
-                    yield break;
-                }
-                while (paths.Count > 0)
-                {
-                    // For all other paths check that they produce the same set
-                    S newSet;
-                    if (!GetNextPrefixSet(ref paths, ref nextPaths, ref done, out newSet) || !newSet.Equals(next))
-                    {
-                        // Either a path didn't have a next set as supported by this algorithm, or the next set was not equal
-                        yield break;
-                    }
-                }
-                // At this point all paths generated equal next sets
-                yield return next;
-                if (done)
-                {
-                    // Some path had no continuation, end the prefix
-                    yield break;
-                }
-                else
-                {
-                    Debug.Assert(paths.Count == 0, "Not all paths were considered for next set.");
-                    paths.AddRange(nextPaths);
-                    nextPaths.Clear();
-                }
-            }
-        }
-
-        private bool GetNextPrefixSet(ref List<SymbolicRegexNode<S>> paths, ref HashSet<SymbolicRegexNode<S>> nextPaths, ref bool done, out S set)
-        {
-            while (paths.Count > 0)
-            {
-                SymbolicRegexNode<S> node = paths[paths.Count - 1];
-                paths.RemoveAt(paths.Count - 1);
-                switch (node._kind)
-                {
-                    case SymbolicRegexKind.Singleton:
-                        Debug.Assert(node._set is not null);
-                        set = node._set;
-                        done = true; // No continuation, done after the next set
-                        return true;
-                    case SymbolicRegexKind.Concat:
-                        Debug.Assert(node._left is not null && node._right is not null);
-                        if (!node._left.CanBeNullable)
-                        {
-                            if (node._left.GetFixedLength() == 1)
-                            {
-                                set = node._left.GetStartSet();
-                                // Left side had just one character, can use just right side as path
-                                nextPaths.Add(node._right);
-                                return true;
-                            }
-                            else
-                            {
-                                // Left side may need multiple steps to get through. However, it is safe
-                                // (though not complete) to forget the right side and just expand the path
-                                // for the left side.
-                                paths.Add(node._left);
-                                break;
-                            }
-                        }
-                        else
-                        {
-                            // Left side may be nullable, can't extend the prefix
-                            set = _builder._solver.False; // Not going to be used
-                            return false;
-                        }
-                    case SymbolicRegexKind.Or:
-                    case SymbolicRegexKind.And:
-                        Debug.Assert(node._alts is not null);
-                        // Handle alternatives as separate paths
-                        paths.AddRange(node._alts);
-                        break;
-                    default:
-                        set = _builder._solver.False; // Not going to be used
-                        return false; // Cut prefix immediately for unhandled node
-                }
-            }
-            set = _builder._solver.False; // Not going to be used
-            return false;
-        }
  
          /// <summary>Get the predicate that covers all elements that make some progress.</summary>
          internal S GetStartSet() => _startSet;
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs

index 83906ff..b0d9de7 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs
@@ -12,10 +12,8 @@ namespace System.Text.RegularExpressions.Symbolic
          /// <summary>The unicode component, including the BDD algebra.</summary>
          internal static readonly UnicodeCategoryTheory<BDD> s_unicode = new UnicodeCategoryTheory<BDD>(new CharSetSolver());
  
-        /// <summary>The matching engine.</summary>
-        internal readonly SymbolicRegexMatcher _matcher;
-        /// <summary>Minimum length computed</summary>
-        private readonly int _minRequiredLength;
+        /// <summary>The matching engine, for 64 or fewer minterms. A SymbolicRegexMatcher of ulong or VB</summary>
+        internal readonly ISymbolicRegexMatcher _matcher;
  
          /// <summary>Initializes the factory.</summary>
          public SymbolicRegexRunnerFactory(RegexCode code, RegexOptions options, TimeSpan matchTimeout, CultureInfo culture)
@@ -32,23 +30,22 @@ namespace System.Text.RegularExpressions.Symbolic
              var solver = (CharSetSolver)s_unicode._solver;
              SymbolicRegexNode<BDD> root = converter.Convert(code.Tree.Root, topLevel: true);
  
-            _minRequiredLength = code.Tree.MinRequiredLength;
-
              BDD[] minterms = root.ComputeMinterms();
              if (minterms.Length > 64)
              {
                  // Use BV to represent a predicate
                  var algBV = new BVAlgebra(solver, minterms);
-                var builderBV = new SymbolicRegexBuilder<BV>(algBV);
-
-                // The default constructor sets the following predicates to False; this update happens after the fact.
-                // It depends on whether anchors where used in the regex whether the predicates are actually different from False.
-                builderBV._wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors);
-                builderBV._newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate);
+                var builderBV = new SymbolicRegexBuilder<BV>(algBV)
+                {
+                    // The default constructor sets the following predicates to False; this update happens after the fact.
+                    // It depends on whether anchors where used in the regex whether the predicates are actually different from False.
+                    _wordLetterPredicateForAnchors = algBV.ConvertFromCharSet(solver, converter._builder._wordLetterPredicateForAnchors),
+                    _newLinePredicate = algBV.ConvertFromCharSet(solver, converter._builder._newLinePredicate)
+                };
  
-                //Convert the BDD based AST to BV based AST
+                // Convert the BDD-based AST to BV-based AST
                  SymbolicRegexNode<BV> rootBV = converter._builder.Transform(root, builderBV, bdd => builderBV._solver.ConvertFromCharSet(solver, bdd));
-                _matcher = new SymbolicRegexMatcher<BV>(rootBV, solver, minterms, matchTimeout, culture);
+                _matcher = new SymbolicRegexMatcher<BV>(rootBV, code, solver, minterms, matchTimeout, culture);
              }
              else
              {
@@ -64,37 +61,31 @@ namespace System.Text.RegularExpressions.Symbolic
  
                  // Convert the BDD-based AST to ulong-based AST
                  SymbolicRegexNode<ulong> root64 = converter._builder.Transform(root, builder64, bdd => builder64._solver.ConvertFromCharSet(solver, bdd));
-                _matcher = new SymbolicRegexMatcher<ulong>(root64, solver, minterms, matchTimeout, culture);
+                _matcher = new SymbolicRegexMatcher<ulong>(root64, code, solver, minterms, matchTimeout, culture);
              }
          }
  
          /// <summary>Creates a <see cref="RegexRunner"/> object.</summary>
-        protected internal override RegexRunner CreateInstance() => new Runner(_matcher, _minRequiredLength);
+        protected internal override RegexRunner CreateInstance() => _matcher is SymbolicRegexMatcher<ulong> srmUInt64 ?
+            new Runner<ulong>(srmUInt64) :
+            new Runner<BV>((SymbolicRegexMatcher<BV>)_matcher);
  
          /// <summary>Runner type produced by this factory.</summary>
          /// <remarks>
-        /// The wrapped <see cref="SymbolicRegexMatcher"/> is itself thread-safe and can be shared across
+        /// The wrapped <see cref="ISymbolicRegexMatcher"/> is itself thread-safe and can be shared across
          /// all runner instances, but the runner itself has state (e.g. for captures, positions, etc.)
          /// and must not be shared between concurrent uses.
          /// </remarks>
-        private sealed class Runner : RegexRunner
+        private sealed class Runner<TSetType> : RegexRunner where TSetType : notnull
          {
              /// <summary>The matching engine.</summary>
-            private readonly SymbolicRegexMatcher _matcher;
-            /// <summary>Minimum length computed.</summary>
-            private readonly int _minRequiredLength;
+            private readonly SymbolicRegexMatcher<TSetType> _matcher;
  
-            internal Runner(SymbolicRegexMatcher matcher, int minRequiredLength)
-            {
-                _matcher = matcher;
-                _minRequiredLength = minRequiredLength;
-            }
+            internal Runner(SymbolicRegexMatcher<TSetType> matcher) => _matcher = matcher;
  
              protected override void InitTrackCount() { } // nop, no backtracking
  
-            protected override bool FindFirstChar() =>
-                // The real logic is all in Go.  Here we simply validate if there's enough text remaining to possibly match.
-                runtextpos <= runtextend - _minRequiredLength;
+            protected override bool FindFirstChar() => true; // The logic is all in Go.
  
              protected override void Go()
              {
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexSampler.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexSampler.cs

index 3f965ce..8269e12 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexSampler.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexSampler.cs
@@ -178,13 +178,11 @@ namespace System.Text.RegularExpressions.Symbolic
          }
  
          private BDD ToBDD(S pred) => _solver.ConvertToCharSet(SymbolicRegexRunnerFactory.s_unicode._solver, pred);
+
          private T Choose<T>(IList<T> elems) => elems[_random.Next(elems.Count)];
-        private T Choose<T>(IEnumerable<T> elems)
-        {
-            List<T> list = new List<T>(elems);
-            return list[_random.Next(list.Count)];
-        }
+
          private char ChooseChar((uint, uint) pair) => (char)_random.Next((int)pair.Item1, (int)pair.Item2 + 1);
+
          private char ChooseChar(BDD bdd)
          {
              Debug.Assert(!bdd.IsEmpty);
@@ -192,8 +190,10 @@ namespace System.Text.RegularExpressions.Symbolic
              BDD bdd1 = SymbolicRegexRunnerFactory.s_unicode._solver.And(bdd, _ascii);
              return ChooseChar(Choose(((CharSetSolver)SymbolicRegexRunnerFactory.s_unicode._solver).ToRanges(bdd1.IsEmpty ? bdd : bdd1)));
          }
+
          private bool ChooseRandomlyTrueOrFalse() => _random.Next(100) < 50;
          /// <summary>Returns true if some state is unconditionally final</summary>
+
          private bool IsFinal(IEnumerable<SymbolicRegexNode<S>> states)
          {
              foreach (SymbolicRegexNode<S> state in states)
@@ -205,6 +205,7 @@ namespace System.Text.RegularExpressions.Symbolic
              }
              return false;
          }
+
          /// <summary>Returns true if some state can be final</summary>
          private bool CanBeFinal(IEnumerable<SymbolicRegexNode<S>> states)
          {
@@ -217,6 +218,7 @@ namespace System.Text.RegularExpressions.Symbolic
              }
              return false;
          }
+
          /// <summary>Returns true if some state is final in the given context</summary>
          private bool IsFinal(IEnumerable<SymbolicRegexNode<S>> states, uint context)
          {
@@ -229,7 +231,9 @@ namespace System.Text.RegularExpressions.Symbolic
              }
              return false;
          }
+
          private bool IsWordchar(S pred) => _solver.IsSatisfiable(_solver.And(pred, _root._builder._wordLetterPredicateForAnchors));
+
          private bool IsNewline(S pred) => _solver.IsSatisfiable(_solver.And(pred, _root._builder._newLinePredicate));
      }
  }
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/GeneratorHelper.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/GeneratorHelper.cs

index 73b7249..5900e44 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/GeneratorHelper.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/GeneratorHelper.cs
@@ -1,11 +1,13 @@
  // Licensed to the .NET Foundation under one or more agreements.
  // The .NET Foundation licenses this file to you under the MIT license.
  
+using System.Diagnostics.CodeAnalysis;
  using System.IO;
  
  namespace System.Text.RegularExpressions.Symbolic.Unicode
  {
  #if DEBUG
+    [ExcludeFromCodeCoverage]
      internal static class GeneratorHelper
      {
          public static void WriteInt64ArrayInitSyntax(StreamWriter sw, long[] values)
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/IgnoreCaseRelationGenerator.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/IgnoreCaseRelationGenerator.cs

index 00098b7..24d4ae4 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/IgnoreCaseRelationGenerator.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/IgnoreCaseRelationGenerator.cs
@@ -3,12 +3,14 @@
  
  using System.Collections.Generic;
  using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
  using System.Globalization;
  using System.IO;
  
  namespace System.Text.RegularExpressions.Symbolic.Unicode
  {
  #if DEBUG
+    [ExcludeFromCodeCoverage]
      internal static class IgnoreCaseRelationGenerator
      {
          private const string DefaultCultureName = "en-US";
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/UnicodeCategoryRangesGenerator.cs b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/UnicodeCategoryRangesGenerator.cs

index 32b09bf..c3ced75 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/UnicodeCategoryRangesGenerator.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/UnicodeCategoryRangesGenerator.cs
@@ -3,6 +3,7 @@
  
  using System.Collections.Generic;
  using System.Diagnostics;
+using System.Diagnostics.CodeAnalysis;
  using System.Globalization;
  using System.IO;
  
@@ -10,6 +11,7 @@ namespace System.Text.RegularExpressions.Symbolic.Unicode
  {
  #if DEBUG
      /// <summary>Utility for generating unicode category ranges and corresponing binary decision diagrams.</summary>
+    [ExcludeFromCodeCoverage]
      internal static class UnicodeCategoryRangesGenerator
      {
          /// <summary>Generator for BDD Unicode category definitions.</summary>
@@ -88,6 +90,7 @@ namespace {namespacename}
      }
  
      /// <summary>Used internally for creating a collection of ranges for serialization.</summary>
+    [ExcludeFromCodeCoverage]
      internal sealed class Ranges
      {
          public readonly List<int[]> ranges = new List<int[]>();
diff --git a/src/libraries/System.Text.RegularExpressions/src/System/Threading/StackHelper.cs b/src/libraries/System.Text.RegularExpressions/src/System/Threading/StackHelper.cs

index 247c60f..e15d49c 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/src/System/Threading/StackHelper.cs
+++ b/src/libraries/System.Text.RegularExpressions/src/System/Threading/StackHelper.cs
@@ -114,5 +114,21 @@ namespace System.Threading
              Task.Run(() => func(arg1, arg2, arg3))
                  .ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
                  .GetAwaiter().GetResult();
+
+        /// <summary>Calls the provided function on the stack of a different thread pool thread.</summary>
+        /// <typeparam name="TArg1">The type of the first argument to pass to the function.</typeparam>
+        /// <typeparam name="TArg2">The type of the second argument to pass to the function.</typeparam>
+        /// <typeparam name="TArg3">The type of the third argument to pass to the function.</typeparam>
+        /// <typeparam name="TArg4">The type of the fourth argument to pass to the function.</typeparam>
+        /// <typeparam name="TResult">The return type of the function.</typeparam>
+        /// <param name="func">The function to invoke.</param>
+        /// <param name="arg1">The first argument to pass to the function.</param>
+        /// <param name="arg2">The second argument to pass to the function.</param>
+        /// <param name="arg3">The third argument to pass to the function.</param>
+        /// <param name="arg4">The fourth argument to pass to the function.</param>
+        public static TResult CallOnEmptyStack<TArg1, TArg2, TArg3, TArg4, TResult>(Func<TArg1, TArg2, TArg3, TArg4, TResult> func, TArg1 arg1, TArg2 arg2, TArg3 arg3, TArg4 arg4) =>
+            Task.Run(() => func(arg1, arg2, arg3, arg4))
+                .ContinueWith(t => t.GetAwaiter().GetResult(), CancellationToken.None, TaskContinuationOptions.ExecuteSynchronously, TaskScheduler.Default)
+                .GetAwaiter().GetResult();
      }
  }
diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs

index 5b43fab..3204d7a 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs
@@ -13,119 +13,121 @@ namespace System.Text.RegularExpressions.Tests
      {
          public static IEnumerable<object[]> Groups_Basic_TestData()
          {
-            // (A - B) B is a subset of A(ie B only contains chars that are in A)
-            yield return new object[] { null, "[abcd-[d]]+", "dddaabbccddd", RegexOptions.None, new string[] { "aabbcc" } };
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
+            {
+                // (A - B) B is a subset of A(ie B only contains chars that are in A)
+                yield return new object[] { engine, null, "[abcd-[d]]+", "dddaabbccddd", RegexOptions.None, new string[] { "aabbcc" } };
  
-            yield return new object[] { null, @"[\d-[357]]+", "33312468955", RegexOptions.None, new string[] { "124689" } };
-            yield return new object[] { null, @"[\d-[357]]+", "51246897", RegexOptions.None, new string[] { "124689" } };
-            yield return new object[] { null, @"[\d-[357]]+", "3312468977", RegexOptions.None, new string[] { "124689" } };
+                yield return new object[] { engine, null, @"[\d-[357]]+", "33312468955", RegexOptions.None, new string[] { "124689" } };
+                yield return new object[] { engine, null, @"[\d-[357]]+", "51246897", RegexOptions.None, new string[] { "124689" } };
+                yield return new object[] { engine, null, @"[\d-[357]]+", "3312468977", RegexOptions.None, new string[] { "124689" } };
  
-            yield return new object[] { null, @"[\w-[b-y]]+", "bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
+                yield return new object[] { engine, null, @"[\w-[b-y]]+", "bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
  
-            yield return new object[] { null, @"[\w-[\d]]+", "0AZaz9", RegexOptions.None, new string[] { "AZaz" } };
-            yield return new object[] { null, @"[\w-[\p{Ll}]]+", "a09AZz", RegexOptions.None, new string[] { "09AZ" } };
+                yield return new object[] { engine, null, @"[\w-[\d]]+", "0AZaz9", RegexOptions.None, new string[] { "AZaz" } };
+                yield return new object[] { engine, null, @"[\w-[\p{Ll}]]+", "a09AZz", RegexOptions.None, new string[] { "09AZ" } };
  
-            yield return new object[] { null, @"[\d-[13579]]+", "1024689", RegexOptions.ECMAScript, new string[] { "02468" } };
-            yield return new object[] { null, @"[\d-[13579]]+", "\x066102468\x0660", RegexOptions.ECMAScript, new string[] { "02468" } };
-            yield return new object[] { null, @"[\d-[13579]]+", "\x066102468\x0660", RegexOptions.None, new string[] { "\x066102468\x0660" } };
+                yield return new object[] { engine, null, @"[\d-[13579]]+", "1024689", RegexOptions.ECMAScript, new string[] { "02468" } };
+                yield return new object[] { engine, null, @"[\d-[13579]]+", "\x066102468\x0660", RegexOptions.ECMAScript, new string[] { "02468" } };
+                yield return new object[] { engine, null, @"[\d-[13579]]+", "\x066102468\x0660", RegexOptions.None, new string[] { "\x066102468\x0660" } };
  
-            yield return new object[] { null, @"[\p{Ll}-[ae-z]]+", "aaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
-            yield return new object[] { null, @"[\p{Nd}-[2468]]+", "20135798", RegexOptions.None, new string[] { "013579" } };
+                yield return new object[] { engine, null, @"[\p{Ll}-[ae-z]]+", "aaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
+                yield return new object[] { engine, null, @"[\p{Nd}-[2468]]+", "20135798", RegexOptions.None, new string[] { "013579" } };
  
-            yield return new object[] { null, @"[\P{Lu}-[ae-z]]+", "aaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
-            yield return new object[] { null, @"[\P{Nd}-[\p{Ll}]]+", "az09AZ'[]", RegexOptions.None, new string[] { "AZ'[]" } };
+                yield return new object[] { engine, null, @"[\P{Lu}-[ae-z]]+", "aaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
+                yield return new object[] { engine, null, @"[\P{Nd}-[\p{Ll}]]+", "az09AZ'[]", RegexOptions.None, new string[] { "AZ'[]" } };
  
-            // (A - B) B is a superset of A (ie B contains chars that are in A plus other chars that are not in A)
-            yield return new object[] { null, "[abcd-[def]]+", "fedddaabbccddd", RegexOptions.None, new string[] { "aabbcc" } };
+                // (A - B) B is a superset of A (ie B contains chars that are in A plus other chars that are not in A)
+                yield return new object[] { engine, null, "[abcd-[def]]+", "fedddaabbccddd", RegexOptions.None, new string[] { "aabbcc" } };
  
-            yield return new object[] { null, @"[\d-[357a-z]]+", "az33312468955", RegexOptions.None, new string[] { "124689" } };
-            yield return new object[] { null, @"[\d-[de357fgA-Z]]+", "AZ51246897", RegexOptions.None, new string[] { "124689" } };
-            yield return new object[] { null, @"[\d-[357\p{Ll}]]+", "az3312468977", RegexOptions.None, new string[] { "124689" } };
+                yield return new object[] { engine, null, @"[\d-[357a-z]]+", "az33312468955", RegexOptions.None, new string[] { "124689" } };
+                yield return new object[] { engine, null, @"[\d-[de357fgA-Z]]+", "AZ51246897", RegexOptions.None, new string[] { "124689" } };
+                yield return new object[] { engine, null, @"[\d-[357\p{Ll}]]+", "az3312468977", RegexOptions.None, new string[] { "124689" } };
  
-            yield return new object[] { null, @"[\w-[b-y\s]]+", " \tbbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
+                yield return new object[] { engine, null, @"[\w-[b-y\s]]+", " \tbbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
  
-            yield return new object[] { null, @"[\w-[\d\p{Po}]]+", "!#0AZaz9", RegexOptions.None, new string[] { "AZaz" } };
-            yield return new object[] { null, @"[\w-[\p{Ll}\s]]+", "a09AZz", RegexOptions.None, new string[] { "09AZ" } };
+                yield return new object[] { engine, null, @"[\w-[\d\p{Po}]]+", "!#0AZaz9", RegexOptions.None, new string[] { "AZaz" } };
+                yield return new object[] { engine, null, @"[\w-[\p{Ll}\s]]+", "a09AZz", RegexOptions.None, new string[] { "09AZ" } };
  
-            yield return new object[] { null, @"[\d-[13579a-zA-Z]]+", "AZ1024689", RegexOptions.ECMAScript, new string[] { "02468" } };
-            yield return new object[] { null, @"[\d-[13579abcd]]+", "abcd\x066102468\x0660", RegexOptions.ECMAScript, new string[] { "02468" } };
-            yield return new object[] { null, @"[\d-[13579\s]]+", " \t\x066102468\x0660", RegexOptions.None, new string[] { "\x066102468\x0660" } };
+                yield return new object[] { engine, null, @"[\d-[13579a-zA-Z]]+", "AZ1024689", RegexOptions.ECMAScript, new string[] { "02468" } };
+                yield return new object[] { engine, null, @"[\d-[13579abcd]]+", "abcd\x066102468\x0660", RegexOptions.ECMAScript, new string[] { "02468" } };
+                yield return new object[] { engine, null, @"[\d-[13579\s]]+", " \t\x066102468\x0660", RegexOptions.None, new string[] { "\x066102468\x0660" } };
  
-            yield return new object[] { null, @"[\w-[b-y\p{Po}]]+", "!#bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
+                yield return new object[] { engine, null, @"[\w-[b-y\p{Po}]]+", "!#bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
  
-            yield return new object[] { null, @"[\w-[b-y!.,]]+", "!.,bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
-            yield return new object[] { null, "[\\w-[b-y\x00-\x0F]]+", "\0bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
+                yield return new object[] { engine, null, @"[\w-[b-y!.,]]+", "!.,bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
+                yield return new object[] { engine, null, "[\\w-[b-y\x00-\x0F]]+", "\0bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "aaaABCD09zzz" } };
  
-            yield return new object[] { null, @"[\p{Ll}-[ae-z0-9]]+", "09aaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
-            yield return new object[] { null, @"[\p{Nd}-[2468az]]+", "az20135798", RegexOptions.None, new string[] { "013579" } };
+                yield return new object[] { engine, null, @"[\p{Ll}-[ae-z0-9]]+", "09aaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
+                yield return new object[] { engine, null, @"[\p{Nd}-[2468az]]+", "az20135798", RegexOptions.None, new string[] { "013579" } };
  
-            yield return new object[] { null, @"[\P{Lu}-[ae-zA-Z]]+", "AZaaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
-            yield return new object[] { null, @"[\P{Nd}-[\p{Ll}0123456789]]+", "09az09AZ'[]", RegexOptions.None, new string[] { "AZ'[]" } };
+                yield return new object[] { engine, null, @"[\P{Lu}-[ae-zA-Z]]+", "AZaaabbbcccdddeee", RegexOptions.None, new string[] { "bbbcccddd" } };
+                yield return new object[] { engine, null, @"[\P{Nd}-[\p{Ll}0123456789]]+", "09az09AZ'[]", RegexOptions.None, new string[] { "AZ'[]" } };
  
-            // (A - B) B only contains chars that are not in A
-            yield return new object[] { null, "[abc-[defg]]+", "dddaabbccddd", RegexOptions.None, new string[] { "aabbcc" } };
+                // (A - B) B only contains chars that are not in A
+                yield return new object[] { engine, null, "[abc-[defg]]+", "dddaabbccddd", RegexOptions.None, new string[] { "aabbcc" } };
  
-            yield return new object[] { null, @"[\d-[abc]]+", "abc09abc", RegexOptions.None, new string[] { "09" } };
-            yield return new object[] { null, @"[\d-[a-zA-Z]]+", "az09AZ", RegexOptions.None, new string[] { "09" } };
-            yield return new object[] { null, @"[\d-[\p{Ll}]]+", "az09az", RegexOptions.None, new string[] { "09" } };
+                yield return new object[] { engine, null, @"[\d-[abc]]+", "abc09abc", RegexOptions.None, new string[] { "09" } };
+                yield return new object[] { engine, null, @"[\d-[a-zA-Z]]+", "az09AZ", RegexOptions.None, new string[] { "09" } };
+                yield return new object[] { engine, null, @"[\d-[\p{Ll}]]+", "az09az", RegexOptions.None, new string[] { "09" } };
  
-            yield return new object[] { null, @"[\w-[\x00-\x0F]]+", "bbbaaaABYZ09zzzyyy", RegexOptions.None, new string[] { "bbbaaaABYZ09zzzyyy" } };
+                yield return new object[] { engine, null, @"[\w-[\x00-\x0F]]+", "bbbaaaABYZ09zzzyyy", RegexOptions.None, new string[] { "bbbaaaABYZ09zzzyyy" } };
  
-            yield return new object[] { null, @"[\w-[\s]]+", "0AZaz9", RegexOptions.None, new string[] { "0AZaz9" } };
-            yield return new object[] { null, @"[\w-[\W]]+", "0AZaz9", RegexOptions.None, new string[] { "0AZaz9" } };
-            yield return new object[] { null, @"[\w-[\p{Po}]]+", "#a09AZz!", RegexOptions.None, new string[] { "a09AZz" } };
+                yield return new object[] { engine, null, @"[\w-[\s]]+", "0AZaz9", RegexOptions.None, new string[] { "0AZaz9" } };
+                yield return new object[] { engine, null, @"[\w-[\W]]+", "0AZaz9", RegexOptions.None, new string[] { "0AZaz9" } };
+                yield return new object[] { engine, null, @"[\w-[\p{Po}]]+", "#a09AZz!", RegexOptions.None, new string[] { "a09AZz" } };
  
-            yield return new object[] { null, @"[\d-[\D]]+", "azAZ1024689", RegexOptions.ECMAScript, new string[] { "1024689" } };
-            yield return new object[] { null, @"[\d-[a-zA-Z]]+", "azAZ\x066102468\x0660", RegexOptions.ECMAScript, new string[] { "02468" } };
-            yield return new object[] { null, @"[\d-[\p{Ll}]]+", "\x066102468\x0660", RegexOptions.None, new string[] { "\x066102468\x0660" } };
+                yield return new object[] { engine, null, @"[\d-[\D]]+", "azAZ1024689", RegexOptions.ECMAScript, new string[] { "1024689" } };
+                yield return new object[] { engine, null, @"[\d-[a-zA-Z]]+", "azAZ\x066102468\x0660", RegexOptions.ECMAScript, new string[] { "02468" } };
+                yield return new object[] { engine, null, @"[\d-[\p{Ll}]]+", "\x066102468\x0660", RegexOptions.None, new string[] { "\x066102468\x0660" } };
  
-            yield return new object[] { null, @"[a-zA-Z0-9-[\s]]+", " \tazAZ09", RegexOptions.None, new string[] { "azAZ09" } };
+                yield return new object[] { engine, null, @"[a-zA-Z0-9-[\s]]+", " \tazAZ09", RegexOptions.None, new string[] { "azAZ09" } };
  
-            yield return new object[] { null, @"[a-zA-Z0-9-[\W]]+", "bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "bbbaaaABCD09zzzyyy" } };
-            yield return new object[] { null, @"[a-zA-Z0-9-[^a-zA-Z0-9]]+", "bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "bbbaaaABCD09zzzyyy" } };
+                yield return new object[] { engine, null, @"[a-zA-Z0-9-[\W]]+", "bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "bbbaaaABCD09zzzyyy" } };
+                yield return new object[] { engine, null, @"[a-zA-Z0-9-[^a-zA-Z0-9]]+", "bbbaaaABCD09zzzyyy", RegexOptions.None, new string[] { "bbbaaaABCD09zzzyyy" } };
  
-            yield return new object[] { null, @"[\p{Ll}-[A-Z]]+", "AZaz09", RegexOptions.None, new string[] { "az" } };
-            yield return new object[] { null, @"[\p{Nd}-[a-z]]+", "az09", RegexOptions.None, new string[] { "09" } };
+                yield return new object[] { engine, null, @"[\p{Ll}-[A-Z]]+", "AZaz09", RegexOptions.None, new string[] { "az" } };
+                yield return new object[] { engine, null, @"[\p{Nd}-[a-z]]+", "az09", RegexOptions.None, new string[] { "09" } };
  
-            yield return new object[] { null, @"[\P{Lu}-[\p{Lu}]]+", "AZazAZ", RegexOptions.None, new string[] { "az" } };
-            yield return new object[] { null, @"[\P{Lu}-[A-Z]]+", "AZazAZ", RegexOptions.None, new string[] { "az" } };
-            yield return new object[] { null, @"[\P{Nd}-[\p{Nd}]]+", "azAZ09", RegexOptions.None, new string[] { "azAZ" } };
-            yield return new object[] { null, @"[\P{Nd}-[2-8]]+", "1234567890azAZ1234567890", RegexOptions.None, new string[] { "azAZ" } };
+                yield return new object[] { engine, null, @"[\P{Lu}-[\p{Lu}]]+", "AZazAZ", RegexOptions.None, new string[] { "az" } };
+                yield return new object[] { engine, null, @"[\P{Lu}-[A-Z]]+", "AZazAZ", RegexOptions.None, new string[] { "az" } };
+                yield return new object[] { engine, null, @"[\P{Nd}-[\p{Nd}]]+", "azAZ09", RegexOptions.None, new string[] { "azAZ" } };
+                yield return new object[] { engine, null, @"[\P{Nd}-[2-8]]+", "1234567890azAZ1234567890", RegexOptions.None, new string[] { "azAZ" } };
  
-            // Alternating construct
-            yield return new object[] { null, @"([ ]|[\w-[0-9]])+", "09az AZ90", RegexOptions.None, new string[] { "az AZ", "Z" } };
-            yield return new object[] { null, @"([0-9-[02468]]|[0-9-[13579]])+", "az1234567890za", RegexOptions.None, new string[] { "1234567890", "0" } };
-            yield return new object[] { null, @"([^0-9-[a-zAE-Z]]|[\w-[a-zAF-Z]])+", "azBCDE1234567890BCDEFza", RegexOptions.None, new string[] { "BCDE1234567890BCDE", "E" } };
-            yield return new object[] { null, @"([\p{Ll}-[aeiou]]|[^\w-[\s]])+", "aeiobcdxyz!@#aeio", RegexOptions.None, new string[] { "bcdxyz!@#", "#" } };
-            yield return new object[] { null, @"(?:hello|hi){1,3}", "hello", RegexOptions.None, new string[] { "hello" } };
-            yield return new object[] { null, @"(hello|hi){1,3}", "hellohihey", RegexOptions.None, new string[] { "hellohi", "hi" } };
-            yield return new object[] { null, @"(?:hello|hi){1,3}", "hellohihey", RegexOptions.None, new string[] { "hellohi" } };
-            yield return new object[] { null, @"(?:hello|hi){2,2}", "hellohihey", RegexOptions.None, new string[] { "hellohi" } };
-            yield return new object[] { null, @"(?:hello|hi){2,2}?", "hellohihihello", RegexOptions.None, new string[] { "hellohi" } };
-            yield return new object[] { null, @"(?:abc|def|ghi|hij|klm|no){1,4}", "this is a test nonoabcxyz this is only a test", RegexOptions.None, new string[] { "nonoabc" } };
-            yield return new object[] { null, @"xyz(abc|def)xyz", "abcxyzdefxyzabc", RegexOptions.None, new string[] { "xyzdefxyz", "def" } };
-            yield return new object[] { null, @"abc|(?:def|ghi)", "ghi", RegexOptions.None, new string[] { "ghi" } };
-            yield return new object[] { null, @"abc|(def|ghi)", "def", RegexOptions.None, new string[] { "def", "def" } };
+                // Alternating construct
+                yield return new object[] { engine, null, @"([ ]|[\w-[0-9]])+", "09az AZ90", RegexOptions.None, new string[] { "az AZ", "Z" } };
+                yield return new object[] { engine, null, @"([0-9-[02468]]|[0-9-[13579]])+", "az1234567890za", RegexOptions.None, new string[] { "1234567890", "0" } };
+                yield return new object[] { engine, null, @"([^0-9-[a-zAE-Z]]|[\w-[a-zAF-Z]])+", "azBCDE1234567890BCDEFza", RegexOptions.None, new string[] { "BCDE1234567890BCDE", "E" } };
+                yield return new object[] { engine, null, @"([\p{Ll}-[aeiou]]|[^\w-[\s]])+", "aeiobcdxyz!@#aeio", RegexOptions.None, new string[] { "bcdxyz!@#", "#" } };
+                yield return new object[] { engine, null, @"(?:hello|hi){1,3}", "hello", RegexOptions.None, new string[] { "hello" } };
+                yield return new object[] { engine, null, @"(hello|hi){1,3}", "hellohihey", RegexOptions.None, new string[] { "hellohi", "hi" } };
+                yield return new object[] { engine, null, @"(?:hello|hi){1,3}", "hellohihey", RegexOptions.None, new string[] { "hellohi" } };
+                yield return new object[] { engine, null, @"(?:hello|hi){2,2}", "hellohihey", RegexOptions.None, new string[] { "hellohi" } };
+                yield return new object[] { engine, null, @"(?:hello|hi){2,2}?", "hellohihihello", RegexOptions.None, new string[] { "hellohi" } };
+                yield return new object[] { engine, null, @"(?:abc|def|ghi|hij|klm|no){1,4}", "this is a test nonoabcxyz this is only a test", RegexOptions.None, new string[] { "nonoabc" } };
+                yield return new object[] { engine, null, @"xyz(abc|def)xyz", "abcxyzdefxyzabc", RegexOptions.None, new string[] { "xyzdefxyz", "def" } };
+                yield return new object[] { engine, null, @"abc|(?:def|ghi)", "ghi", RegexOptions.None, new string[] { "ghi" } };
+                yield return new object[] { engine, null, @"abc|(def|ghi)", "def", RegexOptions.None, new string[] { "def", "def" } };
  
-            // Multiple character classes using character class subtraction
-            yield return new object[] { null, @"98[\d-[9]][\d-[8]][\d-[0]]", "98911 98881 98870 98871", RegexOptions.None, new string[] { "98871" } };
-            yield return new object[] { null, @"m[\w-[^aeiou]][\w-[^aeiou]]t", "mbbt mect meet", RegexOptions.None, new string[] { "meet" } };
+                // Multiple character classes using character class subtraction
+                yield return new object[] { engine, null, @"98[\d-[9]][\d-[8]][\d-[0]]", "98911 98881 98870 98871", RegexOptions.None, new string[] { "98871" } };
+                yield return new object[] { engine, null, @"m[\w-[^aeiou]][\w-[^aeiou]]t", "mbbt mect meet", RegexOptions.None, new string[] { "meet" } };
  
-            // Negation with character class subtraction
-            yield return new object[] { null, "[abcdef-[^bce]]+", "adfbcefda", RegexOptions.None, new string[] { "bce" } };
-            yield return new object[] { null, "[^cde-[ag]]+", "agbfxyzga", RegexOptions.None, new string[] { "bfxyz" } };
+                // Negation with character class subtraction
+                yield return new object[] { engine, null, "[abcdef-[^bce]]+", "adfbcefda", RegexOptions.None, new string[] { "bce" } };
+                yield return new object[] { engine, null, "[^cde-[ag]]+", "agbfxyzga", RegexOptions.None, new string[] { "bfxyz" } };
  
-            // Misc The idea here is come up with real world examples of char class subtraction. Things that
-            // would be difficult to define without it
-            yield return new object[] { null, @"[\p{L}-[^\p{Lu}]]+", "09',.abcxyzABCXYZ", RegexOptions.None, new string[] { "ABCXYZ" } };
+                // Misc The idea here is come up with real world examples of char class subtraction. Things that
+                // would be difficult to define without it
+                yield return new object[] { engine, null, @"[\p{L}-[^\p{Lu}]]+", "09',.abcxyzABCXYZ", RegexOptions.None, new string[] { "ABCXYZ" } };
  
-            yield return new object[] { null, @"[\p{IsGreek}-[\P{Lu}]]+", "\u0390\u03FE\u0386\u0388\u03EC\u03EE\u0400", RegexOptions.None, new string[] { "\u03FE\u0386\u0388\u03EC\u03EE" } };
-            yield return new object[] { null, @"[\p{IsBasicLatin}-[G-L]]+", "GAFMZL", RegexOptions.None, new string[] { "AFMZ" } };
+                yield return new object[] { engine, null, @"[\p{IsGreek}-[\P{Lu}]]+", "\u0390\u03FE\u0386\u0388\u03EC\u03EE\u0400", RegexOptions.None, new string[] { "\u03FE\u0386\u0388\u03EC\u03EE" } };
+                yield return new object[] { engine, null, @"[\p{IsBasicLatin}-[G-L]]+", "GAFMZL", RegexOptions.None, new string[] { "AFMZ" } };
  
-            yield return new object[] { null, "[a-zA-Z-[aeiouAEIOU]]+", "aeiouAEIOUbcdfghjklmnpqrstvwxyz", RegexOptions.None, new string[] { "bcdfghjklmnpqrstvwxyz" } };
+                yield return new object[] { engine, null, "[a-zA-Z-[aeiouAEIOU]]+", "aeiouAEIOUbcdfghjklmnpqrstvwxyz", RegexOptions.None, new string[] { "bcdfghjklmnpqrstvwxyz" } };
  
-            // The following is an overly complex way of matching an ip address using char class subtraction
-            yield return new object[] { null, @"^
+                // The following is an overly complex way of matching an ip address using char class subtraction
+                yield return new object[] { engine, null, @"^
              (?<octet>^
                  (
                      (
@@ -157,370 +159,370 @@ namespace System.Text.RegularExpressions.Tests
              )$"
              , "255", RegexOptions.IgnorePatternWhitespace, new string[] { "255", "255", "2", "5", "5", "", "255", "2", "5" } };
  
-            // Character Class Substraction
-            yield return new object[] { null, @"[abcd\-d-[bc]]+", "bbbaaa---dddccc", RegexOptions.None, new string[] { "aaa---ddd" } };
-            yield return new object[] { null, @"[^a-f-[\x00-\x60\u007B-\uFFFF]]+", "aaafffgggzzz{{{", RegexOptions.None, new string[] { "gggzzz" } };
-            yield return new object[] { null, @"[\[\]a-f-[[]]+", "gggaaafff]]][[[", RegexOptions.None, new string[] { "aaafff]]]" } };
-            yield return new object[] { null, @"[\[\]a-f-[]]]+", "gggaaafff[[[]]]", RegexOptions.None, new string[] { "aaafff[[[" } };
-
-            yield return new object[] { null, @"[ab\-\[cd-[-[]]]]", "a]]", RegexOptions.None, new string[] { "a]]" } };
-            yield return new object[] { null, @"[ab\-\[cd-[-[]]]]", "b]]", RegexOptions.None, new string[] { "b]]" } };
-            yield return new object[] { null, @"[ab\-\[cd-[-[]]]]", "c]]", RegexOptions.None, new string[] { "c]]" } };
-            yield return new object[] { null, @"[ab\-\[cd-[-[]]]]", "d]]", RegexOptions.None, new string[] { "d]]" } };
-
-            yield return new object[] { null, @"[ab\-\[cd-[[]]]]", "a]]", RegexOptions.None, new string[] { "a]]" } };
-            yield return new object[] { null, @"[ab\-\[cd-[[]]]]", "b]]", RegexOptions.None, new string[] { "b]]" } };
-            yield return new object[] { null, @"[ab\-\[cd-[[]]]]", "c]]", RegexOptions.None, new string[] { "c]]" } };
-            yield return new object[] { null, @"[ab\-\[cd-[[]]]]", "d]]", RegexOptions.None, new string[] { "d]]" } };
-            yield return new object[] { null, @"[ab\-\[cd-[[]]]]", "-]]", RegexOptions.None, new string[] { "-]]" } };
-
-            yield return new object[] { null, @"[a-[c-e]]+", "bbbaaaccc", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"[a-[c-e]]+", "```aaaccc", RegexOptions.None, new string[] { "aaa" } };
-
-            yield return new object[] { null, @"[a-d\--[bc]]+", "cccaaa--dddbbb", RegexOptions.None, new string[] { "aaa--ddd" } };
-
-            // Not Character class substraction
-            yield return new object[] { null, @"[\0- [bc]+", "!!!\0\0\t\t  [[[[bbbcccaaa", RegexOptions.None, new string[] { "\0\0\t\t  [[[[bbbccc" } };
-            yield return new object[] { null, "[[abcd]-[bc]]+", "a-b]", RegexOptions.None, new string[] { "a-b]" } };
-            yield return new object[] { null, "[-[e-g]+", "ddd[[[---eeefffggghhh", RegexOptions.None, new string[] { "[[[---eeefffggg" } };
-            yield return new object[] { null, "[-e-g]+", "ddd---eeefffggghhh", RegexOptions.None, new string[] { "---eeefffggg" } };
-            yield return new object[] { null, "[a-e - m-p]+", "---a b c d e m n o p---", RegexOptions.None, new string[] { "a b c d e m n o p" } };
-            yield return new object[] { null, "[^-[bc]]", "b] c] -] aaaddd]", RegexOptions.None, new string[] { "d]" } };
-            yield return new object[] { null, "[^-[bc]]", "b] c] -] aaa]ddd]", RegexOptions.None, new string[] { "a]" } };
-
-            // Make sure we correctly handle \-
-            yield return new object[] { null, @"[a\-[bc]+", "```bbbaaa---[[[cccddd", RegexOptions.None, new string[] { "bbbaaa---[[[ccc" } };
-            yield return new object[] { null, @"[a\-[\-\-bc]+", "```bbbaaa---[[[cccddd", RegexOptions.None, new string[] { "bbbaaa---[[[ccc" } };
-            yield return new object[] { null, @"[a\-\[\-\[\-bc]+", "```bbbaaa---[[[cccddd", RegexOptions.None, new string[] { "bbbaaa---[[[ccc" } };
-            yield return new object[] { null, @"[abc\--[b]]+", "[[[```bbbaaa---cccddd", RegexOptions.None, new string[] { "aaa---ccc" } };
-            yield return new object[] { null, @"[abc\-z-[b]]+", "```aaaccc---zzzbbb", RegexOptions.None, new string[] { "aaaccc---zzz" } };
-            yield return new object[] { null, @"[a-d\-[b]+", "```aaabbbcccddd----[[[[]]]", RegexOptions.None, new string[] { "aaabbbcccddd----[[[[" } };
-            yield return new object[] { null, @"[abcd\-d\-[bc]+", "bbbaaa---[[[dddccc", RegexOptions.None, new string[] { "bbbaaa---[[[dddccc" } };
-
-            // Everything works correctly with option RegexOptions.IgnorePatternWhitespace
-            yield return new object[] { null, "[a - c - [ b ] ]+", "dddaaa   ccc [[[[ bbb ]]]", RegexOptions.IgnorePatternWhitespace, new string[] { " ]]]" } };
-            yield return new object[] { null, "[a - c - [ b ] +", "dddaaa   ccc [[[[ bbb ]]]", RegexOptions.IgnorePatternWhitespace, new string[] { "aaa   ccc [[[[ bbb " } };
-
-            // Unicode Char Classes
-            yield return new object[] { null, @"(\p{Lu}\w*)\s(\p{Lu}\w*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
-            yield return new object[] { null, @"(\p{Lu}\p{Ll}*)\s(\p{Lu}\p{Ll}*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
-            yield return new object[] { null, @"(\P{Ll}\p{Ll}*)\s(\P{Ll}\p{Ll}*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
-            yield return new object[] { null, @"(\P{Lu}+\p{Lu})\s(\P{Lu}+\p{Lu})", "hellO worlD", RegexOptions.None, new string[] { "hellO worlD", "hellO", "worlD" } };
-            yield return new object[] { null, @"(\p{Lt}\w*)\s(\p{Lt}*\w*)", "\u01C5ello \u01C5orld", RegexOptions.None, new string[] { "\u01C5ello \u01C5orld", "\u01C5ello", "\u01C5orld" } };
-            yield return new object[] { null, @"(\P{Lt}\w*)\s(\P{Lt}*\w*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
-
-            // Character ranges IgnoreCase
-            yield return new object[] { null, @"[@-D]+", "eE?@ABCDabcdeE", RegexOptions.IgnoreCase, new string[] { "@ABCDabcd" } };
-            yield return new object[] { null, @"[>-D]+", "eE=>?@ABCDabcdeE", RegexOptions.IgnoreCase, new string[] { ">?@ABCDabcd" } };
-            yield return new object[] { null, @"[\u0554-\u0557]+", "\u0583\u0553\u0554\u0555\u0556\u0584\u0585\u0586\u0557\u0558", RegexOptions.IgnoreCase, new string[] { "\u0554\u0555\u0556\u0584\u0585\u0586\u0557" } };
-            yield return new object[] { null, @"[X-\]]+", "wWXYZxyz[\\]^", RegexOptions.IgnoreCase, new string[] { "XYZxyz[\\]" } };
-            yield return new object[] { null, @"[X-\u0533]+", "\u0551\u0554\u0560AXYZaxyz\u0531\u0532\u0533\u0561\u0562\u0563\u0564", RegexOptions.IgnoreCase, new string[] { "AXYZaxyz\u0531\u0532\u0533\u0561\u0562\u0563" } };
-            yield return new object[] { null, @"[X-a]+", "wWAXYZaxyz", RegexOptions.IgnoreCase, new string[] { "AXYZaxyz" } };
-            yield return new object[] { null, @"[X-c]+", "wWABCXYZabcxyz", RegexOptions.IgnoreCase, new string[] { "ABCXYZabcxyz" } };
-            yield return new object[] { null, @"[X-\u00C0]+", "\u00C1\u00E1\u00C0\u00E0wWABCXYZabcxyz", RegexOptions.IgnoreCase, new string[] { "\u00C0\u00E0wWABCXYZabcxyz" } };
-            yield return new object[] { null, @"[\u0100\u0102\u0104]+", "\u00FF \u0100\u0102\u0104\u0101\u0103\u0105\u0106", RegexOptions.IgnoreCase, new string[] { "\u0100\u0102\u0104\u0101\u0103\u0105" } };
-            yield return new object[] { null, @"[B-D\u0130]+", "aAeE\u0129\u0131\u0068 BCDbcD\u0130\u0069\u0070", RegexOptions.IgnoreCase, new string[] { "BCDbcD\u0130\u0069" } };
-            yield return new object[] { null, @"[\u013B\u013D\u013F]+", "\u013A\u013B\u013D\u013F\u013C\u013E\u0140\u0141", RegexOptions.IgnoreCase, new string[] { "\u013B\u013D\u013F\u013C\u013E\u0140" } };
-
-            // Escape Chars
-            yield return new object[] { null, "(Cat)\r(Dog)", "Cat\rDog", RegexOptions.None, new string[] { "Cat\rDog", "Cat", "Dog" } };
-            yield return new object[] { null, "(Cat)\t(Dog)", "Cat\tDog", RegexOptions.None, new string[] { "Cat\tDog", "Cat", "Dog" } };
-            yield return new object[] { null, "(Cat)\f(Dog)", "Cat\fDog", RegexOptions.None, new string[] { "Cat\fDog", "Cat", "Dog" } };
-
-            // Miscellaneous { witout matching }
-            yield return new object[] { null, @"{5", "hello {5 world", RegexOptions.None, new string[] { "{5" } };
-            yield return new object[] { null, @"{5,", "hello {5, world", RegexOptions.None, new string[] { "{5," } };
-            yield return new object[] { null, @"{5,6", "hello {5,6 world", RegexOptions.None, new string[] { "{5,6" } };
-
-            // Miscellaneous inline options
-            yield return new object[] { null, @"(?n:(?<cat>cat)(\s+)(?<dog>dog))", "cat   dog", RegexOptions.None, new string[] { "cat   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(?n:(cat)(\s+)(dog))", "cat   dog", RegexOptions.None, new string[] { "cat   dog" } };
-            yield return new object[] { null, @"(?n:(cat)(?<SpaceChars>\s+)(dog))", "cat   dog", RegexOptions.None, new string[] { "cat   dog", "   " } };
-            yield return new object[] { null, @"(?x:
+                // Character Class Substraction
+                yield return new object[] { engine, null, @"[abcd\-d-[bc]]+", "bbbaaa---dddccc", RegexOptions.None, new string[] { "aaa---ddd" } };
+                yield return new object[] { engine, null, @"[^a-f-[\x00-\x60\u007B-\uFFFF]]+", "aaafffgggzzz{{{", RegexOptions.None, new string[] { "gggzzz" } };
+                yield return new object[] { engine, null, @"[\[\]a-f-[[]]+", "gggaaafff]]][[[", RegexOptions.None, new string[] { "aaafff]]]" } };
+                yield return new object[] { engine, null, @"[\[\]a-f-[]]]+", "gggaaafff[[[]]]", RegexOptions.None, new string[] { "aaafff[[[" } };
+
+                yield return new object[] { engine, null, @"[ab\-\[cd-[-[]]]]", "a]]", RegexOptions.None, new string[] { "a]]" } };
+                yield return new object[] { engine, null, @"[ab\-\[cd-[-[]]]]", "b]]", RegexOptions.None, new string[] { "b]]" } };
+                yield return new object[] { engine, null, @"[ab\-\[cd-[-[]]]]", "c]]", RegexOptions.None, new string[] { "c]]" } };
+                yield return new object[] { engine, null, @"[ab\-\[cd-[-[]]]]", "d]]", RegexOptions.None, new string[] { "d]]" } };
+
+                yield return new object[] { engine, null, @"[ab\-\[cd-[[]]]]", "a]]", RegexOptions.None, new string[] { "a]]" } };
+                yield return new object[] { engine, null, @"[ab\-\[cd-[[]]]]", "b]]", RegexOptions.None, new string[] { "b]]" } };
+                yield return new object[] { engine, null, @"[ab\-\[cd-[[]]]]", "c]]", RegexOptions.None, new string[] { "c]]" } };
+                yield return new object[] { engine, null, @"[ab\-\[cd-[[]]]]", "d]]", RegexOptions.None, new string[] { "d]]" } };
+                yield return new object[] { engine, null, @"[ab\-\[cd-[[]]]]", "-]]", RegexOptions.None, new string[] { "-]]" } };
+
+                yield return new object[] { engine, null, @"[a-[c-e]]+", "bbbaaaccc", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"[a-[c-e]]+", "```aaaccc", RegexOptions.None, new string[] { "aaa" } };
+
+                yield return new object[] { engine, null, @"[a-d\--[bc]]+", "cccaaa--dddbbb", RegexOptions.None, new string[] { "aaa--ddd" } };
+
+                // Not Character class substraction
+                yield return new object[] { engine, null, @"[\0- [bc]+", "!!!\0\0\t\t  [[[[bbbcccaaa", RegexOptions.None, new string[] { "\0\0\t\t  [[[[bbbccc" } };
+                yield return new object[] { engine, null, "[[abcd]-[bc]]+", "a-b]", RegexOptions.None, new string[] { "a-b]" } };
+                yield return new object[] { engine, null, "[-[e-g]+", "ddd[[[---eeefffggghhh", RegexOptions.None, new string[] { "[[[---eeefffggg" } };
+                yield return new object[] { engine, null, "[-e-g]+", "ddd---eeefffggghhh", RegexOptions.None, new string[] { "---eeefffggg" } };
+                yield return new object[] { engine, null, "[a-e - m-p]+", "---a b c d e m n o p---", RegexOptions.None, new string[] { "a b c d e m n o p" } };
+                yield return new object[] { engine, null, "[^-[bc]]", "b] c] -] aaaddd]", RegexOptions.None, new string[] { "d]" } };
+                yield return new object[] { engine, null, "[^-[bc]]", "b] c] -] aaa]ddd]", RegexOptions.None, new string[] { "a]" } };
+
+                // Make sure we correctly handle \-
+                yield return new object[] { engine, null, @"[a\-[bc]+", "```bbbaaa---[[[cccddd", RegexOptions.None, new string[] { "bbbaaa---[[[ccc" } };
+                yield return new object[] { engine, null, @"[a\-[\-\-bc]+", "```bbbaaa---[[[cccddd", RegexOptions.None, new string[] { "bbbaaa---[[[ccc" } };
+                yield return new object[] { engine, null, @"[a\-\[\-\[\-bc]+", "```bbbaaa---[[[cccddd", RegexOptions.None, new string[] { "bbbaaa---[[[ccc" } };
+                yield return new object[] { engine, null, @"[abc\--[b]]+", "[[[```bbbaaa---cccddd", RegexOptions.None, new string[] { "aaa---ccc" } };
+                yield return new object[] { engine, null, @"[abc\-z-[b]]+", "```aaaccc---zzzbbb", RegexOptions.None, new string[] { "aaaccc---zzz" } };
+                yield return new object[] { engine, null, @"[a-d\-[b]+", "```aaabbbcccddd----[[[[]]]", RegexOptions.None, new string[] { "aaabbbcccddd----[[[[" } };
+                yield return new object[] { engine, null, @"[abcd\-d\-[bc]+", "bbbaaa---[[[dddccc", RegexOptions.None, new string[] { "bbbaaa---[[[dddccc" } };
+
+                // Everything works correctly with option RegexOptions.IgnorePatternWhitespace
+                yield return new object[] { engine, null, "[a - c - [ b ] ]+", "dddaaa   ccc [[[[ bbb ]]]", RegexOptions.IgnorePatternWhitespace, new string[] { " ]]]" } };
+                yield return new object[] { engine, null, "[a - c - [ b ] +", "dddaaa   ccc [[[[ bbb ]]]", RegexOptions.IgnorePatternWhitespace, new string[] { "aaa   ccc [[[[ bbb " } };
+
+                // Unicode Char Classes
+                yield return new object[] { engine, null, @"(\p{Lu}\w*)\s(\p{Lu}\w*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
+                yield return new object[] { engine, null, @"(\p{Lu}\p{Ll}*)\s(\p{Lu}\p{Ll}*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
+                yield return new object[] { engine, null, @"(\P{Ll}\p{Ll}*)\s(\P{Ll}\p{Ll}*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
+                yield return new object[] { engine, null, @"(\P{Lu}+\p{Lu})\s(\P{Lu}+\p{Lu})", "hellO worlD", RegexOptions.None, new string[] { "hellO worlD", "hellO", "worlD" } };
+                yield return new object[] { engine, null, @"(\p{Lt}\w*)\s(\p{Lt}*\w*)", "\u01C5ello \u01C5orld", RegexOptions.None, new string[] { "\u01C5ello \u01C5orld", "\u01C5ello", "\u01C5orld" } };
+                yield return new object[] { engine, null, @"(\P{Lt}\w*)\s(\P{Lt}*\w*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
+
+                // Character ranges IgnoreCase
+                yield return new object[] { engine, null, @"[@-D]+", "eE?@ABCDabcdeE", RegexOptions.IgnoreCase, new string[] { "@ABCDabcd" } };
+                yield return new object[] { engine, null, @"[>-D]+", "eE=>?@ABCDabcdeE", RegexOptions.IgnoreCase, new string[] { ">?@ABCDabcd" } };
+                yield return new object[] { engine, null, @"[\u0554-\u0557]+", "\u0583\u0553\u0554\u0555\u0556\u0584\u0585\u0586\u0557\u0558", RegexOptions.IgnoreCase, new string[] { "\u0554\u0555\u0556\u0584\u0585\u0586\u0557" } };
+                yield return new object[] { engine, null, @"[X-\]]+", "wWXYZxyz[\\]^", RegexOptions.IgnoreCase, new string[] { "XYZxyz[\\]" } };
+                yield return new object[] { engine, null, @"[X-\u0533]+", "\u0551\u0554\u0560AXYZaxyz\u0531\u0532\u0533\u0561\u0562\u0563\u0564", RegexOptions.IgnoreCase, new string[] { "AXYZaxyz\u0531\u0532\u0533\u0561\u0562\u0563" } };
+                yield return new object[] { engine, null, @"[X-a]+", "wWAXYZaxyz", RegexOptions.IgnoreCase, new string[] { "AXYZaxyz" } };
+                yield return new object[] { engine, null, @"[X-c]+", "wWABCXYZabcxyz", RegexOptions.IgnoreCase, new string[] { "ABCXYZabcxyz" } };
+                yield return new object[] { engine, null, @"[X-\u00C0]+", "\u00C1\u00E1\u00C0\u00E0wWABCXYZabcxyz", RegexOptions.IgnoreCase, new string[] { "\u00C0\u00E0wWABCXYZabcxyz" } };
+                yield return new object[] { engine, null, @"[\u0100\u0102\u0104]+", "\u00FF \u0100\u0102\u0104\u0101\u0103\u0105\u0106", RegexOptions.IgnoreCase, new string[] { "\u0100\u0102\u0104\u0101\u0103\u0105" } };
+                yield return new object[] { engine, null, @"[B-D\u0130]+", "aAeE\u0129\u0131\u0068 BCDbcD\u0130\u0069\u0070", RegexOptions.IgnoreCase, new string[] { "BCDbcD\u0130\u0069" } };
+                yield return new object[] { engine, null, @"[\u013B\u013D\u013F]+", "\u013A\u013B\u013D\u013F\u013C\u013E\u0140\u0141", RegexOptions.IgnoreCase, new string[] { "\u013B\u013D\u013F\u013C\u013E\u0140" } };
+
+                // Escape Chars
+                yield return new object[] { engine, null, "(Cat)\r(Dog)", "Cat\rDog", RegexOptions.None, new string[] { "Cat\rDog", "Cat", "Dog" } };
+                yield return new object[] { engine, null, "(Cat)\t(Dog)", "Cat\tDog", RegexOptions.None, new string[] { "Cat\tDog", "Cat", "Dog" } };
+                yield return new object[] { engine, null, "(Cat)\f(Dog)", "Cat\fDog", RegexOptions.None, new string[] { "Cat\fDog", "Cat", "Dog" } };
+
+                // Miscellaneous { witout matching }
+                yield return new object[] { engine, null, @"{5", "hello {5 world", RegexOptions.None, new string[] { "{5" } };
+                yield return new object[] { engine, null, @"{5,", "hello {5, world", RegexOptions.None, new string[] { "{5," } };
+                yield return new object[] { engine, null, @"{5,6", "hello {5,6 world", RegexOptions.None, new string[] { "{5,6" } };
+
+                // Miscellaneous inline options
+                yield return new object[] { engine, null, @"(?n:(?<cat>cat)(\s+)(?<dog>dog))", "cat   dog", RegexOptions.None, new string[] { "cat   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?n:(cat)(\s+)(dog))", "cat   dog", RegexOptions.None, new string[] { "cat   dog" } };
+                yield return new object[] { engine, null, @"(?n:(cat)(?<SpaceChars>\s+)(dog))", "cat   dog", RegexOptions.None, new string[] { "cat   dog", "   " } };
+                yield return new object[] { engine, null, @"(?x:
                              (?<cat>cat) # Cat statement
                              (\s+) # Whitespace chars
                              (?<dog>dog # Dog statement
                              ))", "cat   dog", RegexOptions.None, new string[] { "cat   dog", "   ", "cat", "dog" } };
-            yield return new object[] { null, @"(?+i:cat)", "CAT", RegexOptions.None, new string[] { "CAT" } };
-
-            // \d, \D, \s, \S, \w, \W, \P, \p inside character range
-            yield return new object[] { null, @"cat([\d]*)dog", "hello123cat230927dog1412d", RegexOptions.None, new string[] { "cat230927dog", "230927" } };
-            yield return new object[] { null, @"([\D]*)dog", "65498catdog58719", RegexOptions.None, new string[] { "catdog", "cat" } };
-            yield return new object[] { null, @"cat([\s]*)dog", "wiocat   dog3270", RegexOptions.None, new string[] { "cat   dog", "   " } };
-            yield return new object[] { null, @"cat([\S]*)", "sfdcatdog    3270", RegexOptions.None, new string[] { "catdog", "dog" } };
-            yield return new object[] { null, @"cat([\w]*)", "sfdcatdog    3270", RegexOptions.None, new string[] { "catdog", "dog" } };
-            yield return new object[] { null, @"cat([\W]*)dog", "wiocat   dog3270", RegexOptions.None, new string[] { "cat   dog", "   " } };
-            yield return new object[] { null, @"([\p{Lu}]\w*)\s([\p{Lu}]\w*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
-            yield return new object[] { null, @"([\P{Ll}][\p{Ll}]*)\s([\P{Ll}][\p{Ll}]*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
-
-            // \x, \u, \a, \b, \e, \f, \n, \r, \t, \v, \c, inside character range
-            yield return new object[] { null, @"(cat)([\x41]*)(dog)", "catAAAdog", RegexOptions.None, new string[] { "catAAAdog", "cat", "AAA", "dog" } };
-            yield return new object[] { null, @"(cat)([\u0041]*)(dog)", "catAAAdog", RegexOptions.None, new string[] { "catAAAdog", "cat", "AAA", "dog" } };
-            yield return new object[] { null, @"(cat)([\a]*)(dog)", "cat\a\a\adog", RegexOptions.None, new string[] { "cat\a\a\adog", "cat", "\a\a\a", "dog" } };
-            yield return new object[] { null, @"(cat)([\b]*)(dog)", "cat\b\b\bdog", RegexOptions.None, new string[] { "cat\b\b\bdog", "cat", "\b\b\b", "dog" } };
-            yield return new object[] { null, @"(cat)([\e]*)(dog)", "cat\u001B\u001B\u001Bdog", RegexOptions.None, new string[] { "cat\u001B\u001B\u001Bdog", "cat", "\u001B\u001B\u001B", "dog" } };
-            yield return new object[] { null, @"(cat)([\f]*)(dog)", "cat\f\f\fdog", RegexOptions.None, new string[] { "cat\f\f\fdog", "cat", "\f\f\f", "dog" } };
-            yield return new object[] { null, @"(cat)([\r]*)(dog)", "cat\r\r\rdog", RegexOptions.None, new string[] { "cat\r\r\rdog", "cat", "\r\r\r", "dog" } };
-            yield return new object[] { null, @"(cat)([\v]*)(dog)", "cat\v\v\vdog", RegexOptions.None, new string[] { "cat\v\v\vdog", "cat", "\v\v\v", "dog" } };
-
-            // \d, \D, \s, \S, \w, \W, \P, \p inside character range ([0-5]) with ECMA Option
-            yield return new object[] { null, @"cat([\d]*)dog", "hello123cat230927dog1412d", RegexOptions.ECMAScript, new string[] { "cat230927dog", "230927" } };
-            yield return new object[] { null, @"([\D]*)dog", "65498catdog58719", RegexOptions.ECMAScript, new string[] { "catdog", "cat" } };
-            yield return new object[] { null, @"cat([\s]*)dog", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "   " } };
-            yield return new object[] { null, @"cat([\S]*)", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "dog" } };
-            yield return new object[] { null, @"cat([\w]*)", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "dog" } };
-            yield return new object[] { null, @"cat([\W]*)dog", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "   " } };
-            yield return new object[] { null, @"([\p{Lu}]\w*)\s([\p{Lu}]\w*)", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World", "Hello", "World" } };
-            yield return new object[] { null, @"([\P{Ll}][\p{Ll}]*)\s([\P{Ll}][\p{Ll}]*)", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World", "Hello", "World" } };
-
-            // \d, \D, \s, \S, \w, \W, \P, \p outside character range ([0-5]) with ECMA Option
-            yield return new object[] { null, @"(cat)\d*dog", "hello123cat230927dog1412d", RegexOptions.ECMAScript, new string[] { "cat230927dog", "cat" } };
-            yield return new object[] { null, @"\D*(dog)", "65498catdog58719", RegexOptions.ECMAScript, new string[] { "catdog", "dog" } };
-            yield return new object[] { null, @"(cat)\s*(dog)", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\S*", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "cat" } };
-            yield return new object[] { null, @"(cat)\w*", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "cat" } };
-            yield return new object[] { null, @"(cat)\W*(dog)", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "cat", "dog" } };
-            yield return new object[] { null, @"\p{Lu}(\w*)\s\p{Lu}(\w*)", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World", "ello", "orld" } };
-            yield return new object[] { null, @"\P{Ll}\p{Ll}*\s\P{Ll}\p{Ll}*", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World" } };
-
-            // Use < in a group
-            yield return new object[] { null, @"cat(?<dog121>dog)", "catcatdogdogcat", RegexOptions.None, new string[] { "catdog", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s*(?<cat>dog)", "catcat    dogdogcat", RegexOptions.None, new string[] { "cat    dog", "dog" } };
-            yield return new object[] { null, @"(?<1>cat)\s*(?<1>dog)", "catcat    dogdogcat", RegexOptions.None, new string[] { "cat    dog", "dog" } };
-            yield return new object[] { null, @"(?<2048>cat)\s*(?<2048>dog)", "catcat    dogdogcat", RegexOptions.None, new string[] { "cat    dog", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\w+(?<dog-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
-            yield return new object[] { null, @"(?<cat>cat)\w+(?<-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "" } };
-            yield return new object[] { null, @"(?<cat>cat)\w+(?<cat-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "_Hello_World_" } };
-            yield return new object[] { null, @"(?<1>cat)\w+(?<dog-1>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
-            yield return new object[] { null, @"(?<cat>cat)\w+(?<2-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
-            yield return new object[] { null, @"(?<1>cat)\w+(?<2-1>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
-
-            // Quantifiers
-            yield return new object[] { null, @"(?<cat>cat){", "STARTcat{", RegexOptions.None, new string[] { "cat{", "cat" } };
-            yield return new object[] { null, @"(?<cat>cat){fdsa", "STARTcat{fdsa", RegexOptions.None, new string[] { "cat{fdsa", "cat" } };
-            yield return new object[] { null, @"(?<cat>cat){1", "STARTcat{1", RegexOptions.None, new string[] { "cat{1", "cat" } };
-            yield return new object[] { null, @"(?<cat>cat){1END", "STARTcat{1END", RegexOptions.None, new string[] { "cat{1END", "cat" } };
-            yield return new object[] { null, @"(?<cat>cat){1,", "STARTcat{1,", RegexOptions.None, new string[] { "cat{1,", "cat" } };
-            yield return new object[] { null, @"(?<cat>cat){1,END", "STARTcat{1,END", RegexOptions.None, new string[] { "cat{1,END", "cat" } };
-            yield return new object[] { null, @"(?<cat>cat){1,2", "STARTcat{1,2", RegexOptions.None, new string[] { "cat{1,2", "cat" } };
-            yield return new object[] { null, @"(?<cat>cat){1,2END", "STARTcat{1,2END", RegexOptions.None, new string[] { "cat{1,2END", "cat" } };
-
-            // Use IgnorePatternWhitespace
-            yield return new object[] { null, @"(cat) #cat
+                yield return new object[] { engine, null, @"(?+i:cat)", "CAT", RegexOptions.None, new string[] { "CAT" } };
+
+                // \d, \D, \s, \S, \w, \W, \P, \p inside character range
+                yield return new object[] { engine, null, @"cat([\d]*)dog", "hello123cat230927dog1412d", RegexOptions.None, new string[] { "cat230927dog", "230927" } };
+                yield return new object[] { engine, null, @"([\D]*)dog", "65498catdog58719", RegexOptions.None, new string[] { "catdog", "cat" } };
+                yield return new object[] { engine, null, @"cat([\s]*)dog", "wiocat   dog3270", RegexOptions.None, new string[] { "cat   dog", "   " } };
+                yield return new object[] { engine, null, @"cat([\S]*)", "sfdcatdog    3270", RegexOptions.None, new string[] { "catdog", "dog" } };
+                yield return new object[] { engine, null, @"cat([\w]*)", "sfdcatdog    3270", RegexOptions.None, new string[] { "catdog", "dog" } };
+                yield return new object[] { engine, null, @"cat([\W]*)dog", "wiocat   dog3270", RegexOptions.None, new string[] { "cat   dog", "   " } };
+                yield return new object[] { engine, null, @"([\p{Lu}]\w*)\s([\p{Lu}]\w*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
+                yield return new object[] { engine, null, @"([\P{Ll}][\p{Ll}]*)\s([\P{Ll}][\p{Ll}]*)", "Hello World", RegexOptions.None, new string[] { "Hello World", "Hello", "World" } };
+
+                // \x, \u, \a, \b, \e, \f, \n, \r, \t, \v, \c, inside character range
+                yield return new object[] { engine, null, @"(cat)([\x41]*)(dog)", "catAAAdog", RegexOptions.None, new string[] { "catAAAdog", "cat", "AAA", "dog" } };
+                yield return new object[] { engine, null, @"(cat)([\u0041]*)(dog)", "catAAAdog", RegexOptions.None, new string[] { "catAAAdog", "cat", "AAA", "dog" } };
+                yield return new object[] { engine, null, @"(cat)([\a]*)(dog)", "cat\a\a\adog", RegexOptions.None, new string[] { "cat\a\a\adog", "cat", "\a\a\a", "dog" } };
+                yield return new object[] { engine, null, @"(cat)([\b]*)(dog)", "cat\b\b\bdog", RegexOptions.None, new string[] { "cat\b\b\bdog", "cat", "\b\b\b", "dog" } };
+                yield return new object[] { engine, null, @"(cat)([\e]*)(dog)", "cat\u001B\u001B\u001Bdog", RegexOptions.None, new string[] { "cat\u001B\u001B\u001Bdog", "cat", "\u001B\u001B\u001B", "dog" } };
+                yield return new object[] { engine, null, @"(cat)([\f]*)(dog)", "cat\f\f\fdog", RegexOptions.None, new string[] { "cat\f\f\fdog", "cat", "\f\f\f", "dog" } };
+                yield return new object[] { engine, null, @"(cat)([\r]*)(dog)", "cat\r\r\rdog", RegexOptions.None, new string[] { "cat\r\r\rdog", "cat", "\r\r\r", "dog" } };
+                yield return new object[] { engine, null, @"(cat)([\v]*)(dog)", "cat\v\v\vdog", RegexOptions.None, new string[] { "cat\v\v\vdog", "cat", "\v\v\v", "dog" } };
+
+                // \d, \D, \s, \S, \w, \W, \P, \p inside character range ([0-5]) with ECMA Option
+                yield return new object[] { engine, null, @"cat([\d]*)dog", "hello123cat230927dog1412d", RegexOptions.ECMAScript, new string[] { "cat230927dog", "230927" } };
+                yield return new object[] { engine, null, @"([\D]*)dog", "65498catdog58719", RegexOptions.ECMAScript, new string[] { "catdog", "cat" } };
+                yield return new object[] { engine, null, @"cat([\s]*)dog", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "   " } };
+                yield return new object[] { engine, null, @"cat([\S]*)", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "dog" } };
+                yield return new object[] { engine, null, @"cat([\w]*)", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "dog" } };
+                yield return new object[] { engine, null, @"cat([\W]*)dog", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "   " } };
+                yield return new object[] { engine, null, @"([\p{Lu}]\w*)\s([\p{Lu}]\w*)", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World", "Hello", "World" } };
+                yield return new object[] { engine, null, @"([\P{Ll}][\p{Ll}]*)\s([\P{Ll}][\p{Ll}]*)", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World", "Hello", "World" } };
+
+                // \d, \D, \s, \S, \w, \W, \P, \p outside character range ([0-5]) with ECMA Option
+                yield return new object[] { engine, null, @"(cat)\d*dog", "hello123cat230927dog1412d", RegexOptions.ECMAScript, new string[] { "cat230927dog", "cat" } };
+                yield return new object[] { engine, null, @"\D*(dog)", "65498catdog58719", RegexOptions.ECMAScript, new string[] { "catdog", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s*(dog)", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\S*", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "cat" } };
+                yield return new object[] { engine, null, @"(cat)\w*", "sfdcatdog    3270", RegexOptions.ECMAScript, new string[] { "catdog", "cat" } };
+                yield return new object[] { engine, null, @"(cat)\W*(dog)", "wiocat   dog3270", RegexOptions.ECMAScript, new string[] { "cat   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"\p{Lu}(\w*)\s\p{Lu}(\w*)", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World", "ello", "orld" } };
+                yield return new object[] { engine, null, @"\P{Ll}\p{Ll}*\s\P{Ll}\p{Ll}*", "Hello World", RegexOptions.ECMAScript, new string[] { "Hello World" } };
+
+                // Use < in a group
+                yield return new object[] { engine, null, @"cat(?<dog121>dog)", "catcatdogdogcat", RegexOptions.None, new string[] { "catdog", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s*(?<cat>dog)", "catcat    dogdogcat", RegexOptions.None, new string[] { "cat    dog", "dog" } };
+                yield return new object[] { engine, null, @"(?<1>cat)\s*(?<1>dog)", "catcat    dogdogcat", RegexOptions.None, new string[] { "cat    dog", "dog" } };
+                yield return new object[] { engine, null, @"(?<2048>cat)\s*(?<2048>dog)", "catcat    dogdogcat", RegexOptions.None, new string[] { "cat    dog", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\w+(?<dog-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\w+(?<-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\w+(?<cat-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "_Hello_World_" } };
+                yield return new object[] { engine, null, @"(?<1>cat)\w+(?<dog-1>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\w+(?<2-cat>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
+                yield return new object[] { engine, null, @"(?<1>cat)\w+(?<2-1>dog)", "cat_Hello_World_dog", RegexOptions.None, new string[] { "cat_Hello_World_dog", "", "_Hello_World_" } };
+
+                // Quantifiers
+                yield return new object[] { engine, null, @"(?<cat>cat){", "STARTcat{", RegexOptions.None, new string[] { "cat{", "cat" } };
+                yield return new object[] { engine, null, @"(?<cat>cat){fdsa", "STARTcat{fdsa", RegexOptions.None, new string[] { "cat{fdsa", "cat" } };
+                yield return new object[] { engine, null, @"(?<cat>cat){1", "STARTcat{1", RegexOptions.None, new string[] { "cat{1", "cat" } };
+                yield return new object[] { engine, null, @"(?<cat>cat){1END", "STARTcat{1END", RegexOptions.None, new string[] { "cat{1END", "cat" } };
+                yield return new object[] { engine, null, @"(?<cat>cat){1,", "STARTcat{1,", RegexOptions.None, new string[] { "cat{1,", "cat" } };
+                yield return new object[] { engine, null, @"(?<cat>cat){1,END", "STARTcat{1,END", RegexOptions.None, new string[] { "cat{1,END", "cat" } };
+                yield return new object[] { engine, null, @"(?<cat>cat){1,2", "STARTcat{1,2", RegexOptions.None, new string[] { "cat{1,2", "cat" } };
+                yield return new object[] { engine, null, @"(?<cat>cat){1,2END", "STARTcat{1,2END", RegexOptions.None, new string[] { "cat{1,2END", "cat" } };
+
+                // Use IgnorePatternWhitespace
+                yield return new object[] { engine, null, @"(cat) #cat
                              \s+ #followed by 1 or more whitespace
                              (dog)  #followed by dog
                              ", "cat    dog", RegexOptions.IgnorePatternWhitespace, new string[] { "cat    dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat) #cat
+                yield return new object[] { engine, null, @"(cat) #cat
                              \s+ #followed by 1 or more whitespace
                              (dog)  #followed by dog", "cat    dog", RegexOptions.IgnorePatternWhitespace, new string[] { "cat    dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat) (?#cat)    \s+ (?#followed by 1 or more whitespace) (dog)  (?#followed by dog)", "cat    dog", RegexOptions.IgnorePatternWhitespace, new string[] { "cat    dog", "cat", "dog" } };
-
-            // Back Reference
-            yield return new object[] { null, @"(?<cat>cat)(?<dog>dog)\k<cat>", "asdfcatdogcatdog", RegexOptions.None, new string[] { "catdogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\k<cat>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\k'cat'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\<cat>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\'cat'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\k<1>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\k'1'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\<1>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\'1'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\1", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\1", "asdfcat   dogcat   dog", RegexOptions.ECMAScript, new string[] { "cat   dogcat", "cat", "dog" } };
-
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\k<dog>", "asdfcat   dogdog   dog", RegexOptions.None, new string[] { "cat   dogdog", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\2", "asdfcat   dogdog   dog", RegexOptions.None, new string[] { "cat   dogdog", "cat", "dog" } };
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\2", "asdfcat   dogdog   dog", RegexOptions.ECMAScript, new string[] { "cat   dogdog", "cat", "dog" } };
-
-            // Octal
-            yield return new object[] { null, @"(cat)(\077)", "hellocat?dogworld", RegexOptions.None, new string[] { "cat?", "cat", "?" } };
-            yield return new object[] { null, @"(cat)(\77)", "hellocat?dogworld", RegexOptions.None, new string[] { "cat?", "cat", "?" } };
-            yield return new object[] { null, @"(cat)(\176)", "hellocat~dogworld", RegexOptions.None, new string[] { "cat~", "cat", "~" } };
-            yield return new object[] { null, @"(cat)(\400)", "hellocat\0dogworld", RegexOptions.None, new string[] { "cat\0", "cat", "\0" } };
-            yield return new object[] { null, @"(cat)(\300)", "hellocat\u00C0dogworld", RegexOptions.None, new string[] { "cat\u00C0", "cat", "\u00C0" } };
-            yield return new object[] { null, @"(cat)(\477)", "hellocat\u003Fdogworld", RegexOptions.None, new string[] { "cat\u003F", "cat", "\u003F" } };
-            yield return new object[] { null, @"(cat)(\777)", "hellocat\u00FFdogworld", RegexOptions.None, new string[] { "cat\u00FF", "cat", "\u00FF" } };
-            yield return new object[] { null, @"(cat)(\7770)", "hellocat\u00FF0dogworld", RegexOptions.None, new string[] { "cat\u00FF0", "cat", "\u00FF0" } };
-
-            yield return new object[] { null, @"(cat)(\077)", "hellocat?dogworld", RegexOptions.ECMAScript, new string[] { "cat?", "cat", "?" } };
-            yield return new object[] { null, @"(cat)(\77)", "hellocat?dogworld", RegexOptions.ECMAScript, new string[] { "cat?", "cat", "?" } };
-            yield return new object[] { null, @"(cat)(\7)", "hellocat\adogworld", RegexOptions.ECMAScript, new string[] { "cat\a", "cat", "\a" } };
-            yield return new object[] { null, @"(cat)(\40)", "hellocat dogworld", RegexOptions.ECMAScript, new string[] { "cat ", "cat", " " } };
-            yield return new object[] { null, @"(cat)(\040)", "hellocat dogworld", RegexOptions.ECMAScript, new string[] { "cat ", "cat", " " } };
-            yield return new object[] { null, @"(cat)(\176)", "hellocatcat76dogworld", RegexOptions.ECMAScript, new string[] { "catcat76", "cat", "cat76" } };
-            yield return new object[] { null, @"(cat)(\377)", "hellocat\u00FFdogworld", RegexOptions.ECMAScript, new string[] { "cat\u00FF", "cat", "\u00FF" } };
-            yield return new object[] { null, @"(cat)(\400)", "hellocat 0Fdogworld", RegexOptions.ECMAScript, new string[] { "cat 0", "cat", " 0" } };
-
-            // Decimal
-            yield return new object[] { null, @"(cat)\s+(?<2147483646>dog)", "asdlkcat  dogiwod", RegexOptions.None, new string[] { "cat  dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(?<2147483647>dog)", "asdlkcat  dogiwod", RegexOptions.None, new string[] { "cat  dog", "cat", "dog" } };
-
-            // Hex
-            yield return new object[] { null, @"(cat)(\x2a*)(dog)", "asdlkcat***dogiwod", RegexOptions.None, new string[] { "cat***dog", "cat", "***", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2b*)(dog)", "asdlkcat+++dogiwod", RegexOptions.None, new string[] { "cat+++dog", "cat", "+++", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2c*)(dog)", "asdlkcat,,,dogiwod", RegexOptions.None, new string[] { "cat,,,dog", "cat", ",,,", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2d*)(dog)", "asdlkcat---dogiwod", RegexOptions.None, new string[] { "cat---dog", "cat", "---", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2e*)(dog)", "asdlkcat...dogiwod", RegexOptions.None, new string[] { "cat...dog", "cat", "...", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2f*)(dog)", "asdlkcat///dogiwod", RegexOptions.None, new string[] { "cat///dog", "cat", "///", "dog" } };
-
-            yield return new object[] { null, @"(cat)(\x2A*)(dog)", "asdlkcat***dogiwod", RegexOptions.None, new string[] { "cat***dog", "cat", "***", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2B*)(dog)", "asdlkcat+++dogiwod", RegexOptions.None, new string[] { "cat+++dog", "cat", "+++", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2C*)(dog)", "asdlkcat,,,dogiwod", RegexOptions.None, new string[] { "cat,,,dog", "cat", ",,,", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2D*)(dog)", "asdlkcat---dogiwod", RegexOptions.None, new string[] { "cat---dog", "cat", "---", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2E*)(dog)", "asdlkcat...dogiwod", RegexOptions.None, new string[] { "cat...dog", "cat", "...", "dog" } };
-            yield return new object[] { null, @"(cat)(\x2F*)(dog)", "asdlkcat///dogiwod", RegexOptions.None, new string[] { "cat///dog", "cat", "///", "dog" } };
-
-            // ScanControl
-            yield return new object[] { null, @"(cat)(\c@*)(dog)", "asdlkcat\0\0dogiwod", RegexOptions.None, new string[] { "cat\0\0dog", "cat", "\0\0", "dog" } };
-            yield return new object[] { null, @"(cat)(\cA*)(dog)", "asdlkcat\u0001dogiwod", RegexOptions.None, new string[] { "cat\u0001dog", "cat", "\u0001", "dog" } };
-            yield return new object[] { null, @"(cat)(\ca*)(dog)", "asdlkcat\u0001dogiwod", RegexOptions.None, new string[] { "cat\u0001dog", "cat", "\u0001", "dog" } };
-
-            yield return new object[] { null, @"(cat)(\cC*)(dog)", "asdlkcat\u0003dogiwod", RegexOptions.None, new string[] { "cat\u0003dog", "cat", "\u0003", "dog" } };
-            yield return new object[] { null, @"(cat)(\cc*)(dog)", "asdlkcat\u0003dogiwod", RegexOptions.None, new string[] { "cat\u0003dog", "cat", "\u0003", "dog" } };
-
-            yield return new object[] { null, @"(cat)(\cD*)(dog)", "asdlkcat\u0004dogiwod", RegexOptions.None, new string[] { "cat\u0004dog", "cat", "\u0004", "dog" } };
-            yield return new object[] { null, @"(cat)(\cd*)(dog)", "asdlkcat\u0004dogiwod", RegexOptions.None, new string[] { "cat\u0004dog", "cat", "\u0004", "dog" } };
-
-            yield return new object[] { null, @"(cat)(\cX*)(dog)", "asdlkcat\u0018dogiwod", RegexOptions.None, new string[] { "cat\u0018dog", "cat", "\u0018", "dog" } };
-            yield return new object[] { null, @"(cat)(\cx*)(dog)", "asdlkcat\u0018dogiwod", RegexOptions.None, new string[] { "cat\u0018dog", "cat", "\u0018", "dog" } };
-
-            yield return new object[] { null, @"(cat)(\cZ*)(dog)", "asdlkcat\u001adogiwod", RegexOptions.None, new string[] { "cat\u001adog", "cat", "\u001a", "dog" } };
-            yield return new object[] { null, @"(cat)(\cz*)(dog)", "asdlkcat\u001adogiwod", RegexOptions.None, new string[] { "cat\u001adog", "cat", "\u001a", "dog" } };
-
-            if (!PlatformDetection.IsNetFramework) // missing fix for https://github.com/dotnet/runtime/issues/24759
-            {
-                yield return new object[] { null, @"(cat)(\c[*)(dog)", "asdlkcat\u001bdogiwod", RegexOptions.None, new string[] { "cat\u001bdog", "cat", "\u001b", "dog" } };
-            }
+                yield return new object[] { engine, null, @"(cat) (?#cat)    \s+ (?#followed by 1 or more whitespace) (dog)  (?#followed by dog)", "cat    dog", RegexOptions.IgnorePatternWhitespace, new string[] { "cat    dog", "cat", "dog" } };
+
+                // Back Reference
+                yield return new object[] { engine, null, @"(?<cat>cat)(?<dog>dog)\k<cat>", "asdfcatdogcatdog", RegexOptions.None, new string[] { "catdogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\k<cat>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\k'cat'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\<cat>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\'cat'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\k<1>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\k'1'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\<1>", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\'1'", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\1", "asdfcat   dogcat   dog", RegexOptions.None, new string[] { "cat   dogcat", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\1", "asdfcat   dogcat   dog", RegexOptions.ECMAScript, new string[] { "cat   dogcat", "cat", "dog" } };
+
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\k<dog>", "asdfcat   dogdog   dog", RegexOptions.None, new string[] { "cat   dogdog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\2", "asdfcat   dogdog   dog", RegexOptions.None, new string[] { "cat   dogdog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\2", "asdfcat   dogdog   dog", RegexOptions.ECMAScript, new string[] { "cat   dogdog", "cat", "dog" } };
+
+                // Octal
+                yield return new object[] { engine, null, @"(cat)(\077)", "hellocat?dogworld", RegexOptions.None, new string[] { "cat?", "cat", "?" } };
+                yield return new object[] { engine, null, @"(cat)(\77)", "hellocat?dogworld", RegexOptions.None, new string[] { "cat?", "cat", "?" } };
+                yield return new object[] { engine, null, @"(cat)(\176)", "hellocat~dogworld", RegexOptions.None, new string[] { "cat~", "cat", "~" } };
+                yield return new object[] { engine, null, @"(cat)(\400)", "hellocat\0dogworld", RegexOptions.None, new string[] { "cat\0", "cat", "\0" } };
+                yield return new object[] { engine, null, @"(cat)(\300)", "hellocat\u00C0dogworld", RegexOptions.None, new string[] { "cat\u00C0", "cat", "\u00C0" } };
+                yield return new object[] { engine, null, @"(cat)(\477)", "hellocat\u003Fdogworld", RegexOptions.None, new string[] { "cat\u003F", "cat", "\u003F" } };
+                yield return new object[] { engine, null, @"(cat)(\777)", "hellocat\u00FFdogworld", RegexOptions.None, new string[] { "cat\u00FF", "cat", "\u00FF" } };
+                yield return new object[] { engine, null, @"(cat)(\7770)", "hellocat\u00FF0dogworld", RegexOptions.None, new string[] { "cat\u00FF0", "cat", "\u00FF0" } };
+
+                yield return new object[] { engine, null, @"(cat)(\077)", "hellocat?dogworld", RegexOptions.ECMAScript, new string[] { "cat?", "cat", "?" } };
+                yield return new object[] { engine, null, @"(cat)(\77)", "hellocat?dogworld", RegexOptions.ECMAScript, new string[] { "cat?", "cat", "?" } };
+                yield return new object[] { engine, null, @"(cat)(\7)", "hellocat\adogworld", RegexOptions.ECMAScript, new string[] { "cat\a", "cat", "\a" } };
+                yield return new object[] { engine, null, @"(cat)(\40)", "hellocat dogworld", RegexOptions.ECMAScript, new string[] { "cat ", "cat", " " } };
+                yield return new object[] { engine, null, @"(cat)(\040)", "hellocat dogworld", RegexOptions.ECMAScript, new string[] { "cat ", "cat", " " } };
+                yield return new object[] { engine, null, @"(cat)(\176)", "hellocatcat76dogworld", RegexOptions.ECMAScript, new string[] { "catcat76", "cat", "cat76" } };
+                yield return new object[] { engine, null, @"(cat)(\377)", "hellocat\u00FFdogworld", RegexOptions.ECMAScript, new string[] { "cat\u00FF", "cat", "\u00FF" } };
+                yield return new object[] { engine, null, @"(cat)(\400)", "hellocat 0Fdogworld", RegexOptions.ECMAScript, new string[] { "cat 0", "cat", " 0" } };
+
+                // Decimal
+                yield return new object[] { engine, null, @"(cat)\s+(?<2147483646>dog)", "asdlkcat  dogiwod", RegexOptions.None, new string[] { "cat  dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(?<2147483647>dog)", "asdlkcat  dogiwod", RegexOptions.None, new string[] { "cat  dog", "cat", "dog" } };
+
+                // Hex
+                yield return new object[] { engine, null, @"(cat)(\x2a*)(dog)", "asdlkcat***dogiwod", RegexOptions.None, new string[] { "cat***dog", "cat", "***", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2b*)(dog)", "asdlkcat+++dogiwod", RegexOptions.None, new string[] { "cat+++dog", "cat", "+++", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2c*)(dog)", "asdlkcat,,,dogiwod", RegexOptions.None, new string[] { "cat,,,dog", "cat", ",,,", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2d*)(dog)", "asdlkcat---dogiwod", RegexOptions.None, new string[] { "cat---dog", "cat", "---", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2e*)(dog)", "asdlkcat...dogiwod", RegexOptions.None, new string[] { "cat...dog", "cat", "...", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2f*)(dog)", "asdlkcat///dogiwod", RegexOptions.None, new string[] { "cat///dog", "cat", "///", "dog" } };
+
+                yield return new object[] { engine, null, @"(cat)(\x2A*)(dog)", "asdlkcat***dogiwod", RegexOptions.None, new string[] { "cat***dog", "cat", "***", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2B*)(dog)", "asdlkcat+++dogiwod", RegexOptions.None, new string[] { "cat+++dog", "cat", "+++", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2C*)(dog)", "asdlkcat,,,dogiwod", RegexOptions.None, new string[] { "cat,,,dog", "cat", ",,,", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2D*)(dog)", "asdlkcat---dogiwod", RegexOptions.None, new string[] { "cat---dog", "cat", "---", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2E*)(dog)", "asdlkcat...dogiwod", RegexOptions.None, new string[] { "cat...dog", "cat", "...", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\x2F*)(dog)", "asdlkcat///dogiwod", RegexOptions.None, new string[] { "cat///dog", "cat", "///", "dog" } };
+
+                // ScanControl
+                yield return new object[] { engine, null, @"(cat)(\c@*)(dog)", "asdlkcat\0\0dogiwod", RegexOptions.None, new string[] { "cat\0\0dog", "cat", "\0\0", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\cA*)(dog)", "asdlkcat\u0001dogiwod", RegexOptions.None, new string[] { "cat\u0001dog", "cat", "\u0001", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\ca*)(dog)", "asdlkcat\u0001dogiwod", RegexOptions.None, new string[] { "cat\u0001dog", "cat", "\u0001", "dog" } };
+
+                yield return new object[] { engine, null, @"(cat)(\cC*)(dog)", "asdlkcat\u0003dogiwod", RegexOptions.None, new string[] { "cat\u0003dog", "cat", "\u0003", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\cc*)(dog)", "asdlkcat\u0003dogiwod", RegexOptions.None, new string[] { "cat\u0003dog", "cat", "\u0003", "dog" } };
+
+                yield return new object[] { engine, null, @"(cat)(\cD*)(dog)", "asdlkcat\u0004dogiwod", RegexOptions.None, new string[] { "cat\u0004dog", "cat", "\u0004", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\cd*)(dog)", "asdlkcat\u0004dogiwod", RegexOptions.None, new string[] { "cat\u0004dog", "cat", "\u0004", "dog" } };
+
+                yield return new object[] { engine, null, @"(cat)(\cX*)(dog)", "asdlkcat\u0018dogiwod", RegexOptions.None, new string[] { "cat\u0018dog", "cat", "\u0018", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\cx*)(dog)", "asdlkcat\u0018dogiwod", RegexOptions.None, new string[] { "cat\u0018dog", "cat", "\u0018", "dog" } };
+
+                yield return new object[] { engine, null, @"(cat)(\cZ*)(dog)", "asdlkcat\u001adogiwod", RegexOptions.None, new string[] { "cat\u001adog", "cat", "\u001a", "dog" } };
+                yield return new object[] { engine, null, @"(cat)(\cz*)(dog)", "asdlkcat\u001adogiwod", RegexOptions.None, new string[] { "cat\u001adog", "cat", "\u001a", "dog" } };
+
+                if (!PlatformDetection.IsNetFramework) // missing fix for https://github.com/dotnet/runtime/issues/24759
+                {
+                    yield return new object[] { engine, null, @"(cat)(\c[*)(dog)", "asdlkcat\u001bdogiwod", RegexOptions.None, new string[] { "cat\u001bdog", "cat", "\u001b", "dog" } };
+                }
  
-            // Atomic Zero-Width Assertions \A \G ^ \Z \z \b \B
-            //\A
-            yield return new object[] { null, @"\Acat\s+dog", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"\Acat\s+dog", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"\A(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"\A(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-
-            //\G
-            yield return new object[] { null, @"\Gcat\s+dog", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"\Gcat\s+dog", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"\Gcat\s+dog", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"\G(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"\G(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"\G(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-
-            //^
-            yield return new object[] { null, @"^cat\s+dog", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"^cat\s+dog", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"mouse\s\n^cat\s+dog", "mouse\n\ncat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat   \n\n\n   dog" } };
-            yield return new object[] { null, @"^cat\s+dog", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(mouse)\s\n^(cat)\s+(dog)", "mouse\n\ncat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat   \n\n\n   dog", "mouse", "cat", "dog" } };
-            yield return new object[] { null, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-
-            //\Z
-            yield return new object[] { null, @"cat\s+dog\Z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"cat\s+dog\Z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"cat\s+dog\Z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"cat\s+dog\Z", "cat   \n\n\n   dog\n", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"cat\s+dog\Z", "cat   \n\n\n   dog\n", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"cat\s+dog\Z", "cat   \n\n\n   dog\n", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog\n", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog\n", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog\n", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-
-            //\z
-            yield return new object[] { null, @"cat\s+dog\z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"cat\s+dog\z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"cat\s+dog\z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-            yield return new object[] { null, @"(cat)\s+(dog)\z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
-
-            //\b
-            yield return new object[] { null, @"\bcat\b", "cat", RegexOptions.None, new string[] { "cat" } };
-            yield return new object[] { null, @"\bcat\b", "dog cat mouse", RegexOptions.None, new string[] { "cat" } };
-            yield return new object[] { null, @"\bcat\b", "cat", RegexOptions.ECMAScript, new string[] { "cat" } };
-            yield return new object[] { null, @"\bcat\b", "dog cat mouse", RegexOptions.ECMAScript, new string[] { "cat" } };
-            yield return new object[] { null, @".*\bcat\b", "cat", RegexOptions.None, new string[] { "cat" } };
-            yield return new object[] { null, @".*\bcat\b", "dog cat mouse", RegexOptions.None, new string[] { "dog cat" } };
-            yield return new object[] { null, @".*\bcat\b", "cat", RegexOptions.ECMAScript, new string[] { "cat" } };
-            yield return new object[] { null, @".*\bcat\b", "dog cat mouse", RegexOptions.ECMAScript, new string[] { "dog cat" } };
-            yield return new object[] { null, @"\b@cat", "123START123@catEND", RegexOptions.None, new string[] { "@cat" } };
-            yield return new object[] { null, @"\b\<cat", "123START123<catEND", RegexOptions.None, new string[] { "<cat" } };
-            yield return new object[] { null, @"\b,cat", "satwe,,,START,catEND", RegexOptions.None, new string[] { ",cat" } };
-            yield return new object[] { null, @"\b\[cat", "`12START123[catEND", RegexOptions.None, new string[] { "[cat" } };
-
-            //\B
-            yield return new object[] { null, @"\Bcat\B", "dogcatmouse", RegexOptions.None, new string[] { "cat" } };
-            yield return new object[] { null, @"dog\Bcat\B", "dogcatmouse", RegexOptions.None, new string[] { "dogcat" } };
-            yield return new object[] { null, @".*\Bcat\B", "dogcatmouse", RegexOptions.None, new string[] { "dogcat" } };
-            yield return new object[] { null, @"\Bcat\B", "dogcatmouse", RegexOptions.ECMAScript, new string[] { "cat" } };
-            yield return new object[] { null, @"dog\Bcat\B", "dogcatmouse", RegexOptions.ECMAScript, new string[] { "dogcat" } };
-            yield return new object[] { null, @".*\Bcat\B", "dogcatmouse", RegexOptions.ECMAScript, new string[] { "dogcat" } };
-            yield return new object[] { null, @"\B@cat", "123START123;@catEND", RegexOptions.None, new string[] { "@cat" } };
-            yield return new object[] { null, @"\B\<cat", "123START123'<catEND", RegexOptions.None, new string[] { "<cat" } };
-            yield return new object[] { null, @"\B,cat", "satwe,,,START',catEND", RegexOptions.None, new string[] { ",cat" } };
-            yield return new object[] { null, @"\B\[cat", "`12START123'[catEND", RegexOptions.None, new string[] { "[cat" } };
-
-            // \w matching \p{Lm} (Letter, Modifier)
-            yield return new object[] { null, @"\w+\s+\w+", "cat\u02b0 dog\u02b1", RegexOptions.None, new string[] { "cat\u02b0 dog\u02b1" } };
-            yield return new object[] { null, @"cat\w+\s+dog\w+", "STARTcat\u30FC dog\u3005END", RegexOptions.None, new string[] { "cat\u30FC dog\u3005END" } };
-            yield return new object[] { null, @"cat\w+\s+dog\w+", "STARTcat\uff9e dog\uff9fEND", RegexOptions.None, new string[] { "cat\uff9e dog\uff9fEND" } };
-            yield return new object[] { null, @"(\w+)\s+(\w+)", "cat\u02b0 dog\u02b1", RegexOptions.None, new string[] { "cat\u02b0 dog\u02b1", "cat\u02b0", "dog\u02b1" } };
-            yield return new object[] { null, @"(cat\w+)\s+(dog\w+)", "STARTcat\u30FC dog\u3005END", RegexOptions.None, new string[] { "cat\u30FC dog\u3005END", "cat\u30FC", "dog\u3005END" } };
-            yield return new object[] { null, @"(cat\w+)\s+(dog\w+)", "STARTcat\uff9e dog\uff9fEND", RegexOptions.None, new string[] { "cat\uff9e dog\uff9fEND", "cat\uff9e", "dog\uff9fEND" } };
-
-            // Positive and negative character classes [a-c]|[^b-c]
-            yield return new object[] { null, @"[^a]|d", "d", RegexOptions.None, new string[] { "d" } };
-            yield return new object[] { null, @"([^a]|[d])*", "Hello Worlddf", RegexOptions.None, new string[] { "Hello Worlddf", "f" } };
-            yield return new object[] { null, @"([^{}]|\n)+", "{{{{Hello\n World \n}END", RegexOptions.None, new string[] { "Hello\n World \n", "\n" } };
-            yield return new object[] { null, @"([a-d]|[^abcd])+", "\tonce\n upon\0 a- ()*&^%#time?", RegexOptions.None, new string[] { "\tonce\n upon\0 a- ()*&^%#time?", "?" } };
-            yield return new object[] { null, @"([^a]|[a])*", "once upon a time", RegexOptions.None, new string[] { "once upon a time", "e" } };
-            yield return new object[] { null, @"([a-d]|[^abcd]|[x-z]|^wxyz])+", "\tonce\n upon\0 a- ()*&^%#time?", RegexOptions.None, new string[] { "\tonce\n upon\0 a- ()*&^%#time?", "?" } };
-            yield return new object[] { null, @"([a-d]|[e-i]|[^e]|wxyz])+", "\tonce\n upon\0 a- ()*&^%#time?", RegexOptions.None, new string[] { "\tonce\n upon\0 a- ()*&^%#time?", "?" } };
-
-            // Canonical and noncanonical char class, where one group is in it's
-            // simplest form [a-e] and another is more complex.
-            yield return new object[] { null, @"^(([^b]+ )|(.* ))$", "aaa ", RegexOptions.None, new string[] { "aaa ", "aaa ", "aaa ", "" } };
-            yield return new object[] { null, @"^(([^b]+ )|(.*))$", "aaa", RegexOptions.None, new string[] { "aaa", "aaa", "", "aaa" } };
-            yield return new object[] { null, @"^(([^b]+ )|(.* ))$", "bbb ", RegexOptions.None, new string[] { "bbb ", "bbb ", "", "bbb " } };
-            yield return new object[] { null, @"^(([^b]+ )|(.*))$", "bbb", RegexOptions.None, new string[] { "bbb", "bbb", "", "bbb" } };
-            yield return new object[] { null, @"^((a*)|(.*))$", "aaa", RegexOptions.None, new string[] { "aaa", "aaa", "aaa", "" } };
-            yield return new object[] { null, @"^((a*)|(.*))$", "aaabbb", RegexOptions.None, new string[] { "aaabbb", "aaabbb", "", "aaabbb" } };
-
-            yield return new object[] { null, @"(([0-9])|([a-z])|([A-Z]))*", "{hello 1234567890 world}", RegexOptions.None, new string[] { "", "", "", "", "" } };
-            yield return new object[] { null, @"(([0-9])|([a-z])|([A-Z]))+", "{hello 1234567890 world}", RegexOptions.None, new string[] { "hello", "o", "", "o", "" } };
-            yield return new object[] { null, @"(([0-9])|([a-z])|([A-Z]))*", "{HELLO 1234567890 world}", RegexOptions.None, new string[] { "", "", "", "", "" } };
-            yield return new object[] { null, @"(([0-9])|([a-z])|([A-Z]))+", "{HELLO 1234567890 world}", RegexOptions.None, new string[] { "HELLO", "O", "", "", "O" } };
-            yield return new object[] { null, @"(([0-9])|([a-z])|([A-Z]))*", "{1234567890 hello  world}", RegexOptions.None, new string[] { "", "", "", "", "" } };
-            yield return new object[] { null, @"(([0-9])|([a-z])|([A-Z]))+", "{1234567890 hello world}", RegexOptions.None, new string[] { "1234567890", "0", "0", "", "" } };
-
-            yield return new object[] { null, @"^(([a-d]*)|([a-z]*))$", "aaabbbcccdddeeefff", RegexOptions.None, new string[] { "aaabbbcccdddeeefff", "aaabbbcccdddeeefff", "", "aaabbbcccdddeeefff" } };
-            yield return new object[] { null, @"^(([d-f]*)|([c-e]*))$", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "", "dddeeeccceee" } };
-            yield return new object[] { null, @"^(([c-e]*)|([d-f]*))$", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "dddeeeccceee", "" } };
-
-            // Different match in NonBackTracking when order of alternations does not matter
-            yield return new object[] { null, @"(([a-d]*)|([a-z]*))", "aaabbbcccdddeeefff", RegexOptions.None, new string[] { "aaabbbcccddd", "aaabbbcccddd", "aaabbbcccddd", "" }, "aaabbbcccdddeeefff" }; // <-- Nonbacktracking match same as for "(([a-z]*)|([a-d]*))"
-            yield return new object[] { null, @"(([d-f]*)|([c-e]*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeee", "dddeee", "dddeee", "" }, "dddeeeccceee" }; // <-- Nonbacktracking match same as for "(([c-e]*)|([d-f]*))"
-            yield return new object[] { null, @"(([c-e]*)|([d-f]*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "dddeeeccceee", "" } };
-
-            yield return new object[] { null, @"(([a-d]*)|(.*))", "aaabbbcccdddeeefff", RegexOptions.None, new string[] { "aaabbbcccddd", "aaabbbcccddd", "aaabbbcccddd", "" }, "aaabbbcccdddeeefff" }; // <-- Nonbacktracking match same as for ".*"
-            yield return new object[] { null, @"(([d-f]*)|(.*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeee", "dddeee", "dddeee", "" }, "dddeeeccceee" }; // <-- Nonbacktracking match same as for ".*"
-            yield return new object[] { null, @"(([c-e]*)|(.*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "dddeeeccceee", "" } };
-
-            // \p{Pi} (Punctuation Initial quote) \p{Pf} (Punctuation Final quote)
-            yield return new object[] { null, @"\p{Pi}(\w*)\p{Pf}", "\u00ABCat\u00BB   \u00BBDog\u00AB'", RegexOptions.None, new string[] { "\u00ABCat\u00BB", "Cat" } };
-            yield return new object[] { null, @"\p{Pi}(\w*)\p{Pf}", "\u2018Cat\u2019   \u2019Dog\u2018'", RegexOptions.None, new string[] { "\u2018Cat\u2019", "Cat" } };
-
-            // ECMAScript
-            yield return new object[] { null, @"(?<cat>cat)\s+(?<dog>dog)\s+\123\s+\234", "asdfcat   dog     cat23    dog34eia", RegexOptions.ECMAScript, new string[] { "cat   dog     cat23    dog34", "cat", "dog" } };
-
-            // Balanced Matching
-            yield return new object[] { null, @"<div>
+                // Atomic Zero-Width Assertions \A \G ^ \Z \z \b \B
+                //\A
+                yield return new object[] { engine, null, @"\Acat\s+dog", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"\Acat\s+dog", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"\A(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"\A(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+
+                //\G
+                yield return new object[] { engine, null, @"\Gcat\s+dog", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"\Gcat\s+dog", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"\Gcat\s+dog", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"\G(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"\G(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"\G(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+
+                //^
+                yield return new object[] { engine, null, @"^cat\s+dog", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"^cat\s+dog", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"mouse\s\n^cat\s+dog", "mouse\n\ncat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"^cat\s+dog", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(mouse)\s\n^(cat)\s+(dog)", "mouse\n\ncat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "mouse\n\ncat   \n\n\n   dog", "mouse", "cat", "dog" } };
+                yield return new object[] { engine, null, @"^(cat)\s+(dog)", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+
+                //\Z
+                yield return new object[] { engine, null, @"cat\s+dog\Z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"cat\s+dog\Z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"cat\s+dog\Z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"cat\s+dog\Z", "cat   \n\n\n   dog\n", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"cat\s+dog\Z", "cat   \n\n\n   dog\n", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"cat\s+dog\Z", "cat   \n\n\n   dog\n", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog\n", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog\n", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\Z", "cat   \n\n\n   dog\n", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+
+                //\z
+                yield return new object[] { engine, null, @"cat\s+dog\z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"cat\s+dog\z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"cat\s+dog\z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\z", "cat   \n\n\n   dog", RegexOptions.None, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\z", "cat   \n\n\n   dog", RegexOptions.Multiline, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+                yield return new object[] { engine, null, @"(cat)\s+(dog)\z", "cat   \n\n\n   dog", RegexOptions.ECMAScript, new string[] { "cat   \n\n\n   dog", "cat", "dog" } };
+
+                //\b
+                yield return new object[] { engine, null, @"\bcat\b", "cat", RegexOptions.None, new string[] { "cat" } };
+                yield return new object[] { engine, null, @"\bcat\b", "dog cat mouse", RegexOptions.None, new string[] { "cat" } };
+                yield return new object[] { engine, null, @"\bcat\b", "cat", RegexOptions.ECMAScript, new string[] { "cat" } };
+                yield return new object[] { engine, null, @"\bcat\b", "dog cat mouse", RegexOptions.ECMAScript, new string[] { "cat" } };
+                yield return new object[] { engine, null, @".*\bcat\b", "cat", RegexOptions.None, new string[] { "cat" } };
+                yield return new object[] { engine, null, @".*\bcat\b", "dog cat mouse", RegexOptions.None, new string[] { "dog cat" } };
+                yield return new object[] { engine, null, @".*\bcat\b", "cat", RegexOptions.ECMAScript, new string[] { "cat" } };
+                yield return new object[] { engine, null, @".*\bcat\b", "dog cat mouse", RegexOptions.ECMAScript, new string[] { "dog cat" } };
+                yield return new object[] { engine, null, @"\b@cat", "123START123@catEND", RegexOptions.None, new string[] { "@cat" } };
+                yield return new object[] { engine, null, @"\b\<cat", "123START123<catEND", RegexOptions.None, new string[] { "<cat" } };
+                yield return new object[] { engine, null, @"\b,cat", "satwe,,,START,catEND", RegexOptions.None, new string[] { ",cat" } };
+                yield return new object[] { engine, null, @"\b\[cat", "`12START123[catEND", RegexOptions.None, new string[] { "[cat" } };
+
+                //\B
+                yield return new object[] { engine, null, @"\Bcat\B", "dogcatmouse", RegexOptions.None, new string[] { "cat" } };
+                yield return new object[] { engine, null, @"dog\Bcat\B", "dogcatmouse", RegexOptions.None, new string[] { "dogcat" } };
+                yield return new object[] { engine, null, @".*\Bcat\B", "dogcatmouse", RegexOptions.None, new string[] { "dogcat" } };
+                yield return new object[] { engine, null, @"\Bcat\B", "dogcatmouse", RegexOptions.ECMAScript, new string[] { "cat" } };
+                yield return new object[] { engine, null, @"dog\Bcat\B", "dogcatmouse", RegexOptions.ECMAScript, new string[] { "dogcat" } };
+                yield return new object[] { engine, null, @".*\Bcat\B", "dogcatmouse", RegexOptions.ECMAScript, new string[] { "dogcat" } };
+                yield return new object[] { engine, null, @"\B@cat", "123START123;@catEND", RegexOptions.None, new string[] { "@cat" } };
+                yield return new object[] { engine, null, @"\B\<cat", "123START123'<catEND", RegexOptions.None, new string[] { "<cat" } };
+                yield return new object[] { engine, null, @"\B,cat", "satwe,,,START',catEND", RegexOptions.None, new string[] { ",cat" } };
+                yield return new object[] { engine, null, @"\B\[cat", "`12START123'[catEND", RegexOptions.None, new string[] { "[cat" } };
+
+                // \w matching \p{Lm} (Letter, Modifier)
+                yield return new object[] { engine, null, @"\w+\s+\w+", "cat\u02b0 dog\u02b1", RegexOptions.None, new string[] { "cat\u02b0 dog\u02b1" } };
+                yield return new object[] { engine, null, @"cat\w+\s+dog\w+", "STARTcat\u30FC dog\u3005END", RegexOptions.None, new string[] { "cat\u30FC dog\u3005END" } };
+                yield return new object[] { engine, null, @"cat\w+\s+dog\w+", "STARTcat\uff9e dog\uff9fEND", RegexOptions.None, new string[] { "cat\uff9e dog\uff9fEND" } };
+                yield return new object[] { engine, null, @"(\w+)\s+(\w+)", "cat\u02b0 dog\u02b1", RegexOptions.None, new string[] { "cat\u02b0 dog\u02b1", "cat\u02b0", "dog\u02b1" } };
+                yield return new object[] { engine, null, @"(cat\w+)\s+(dog\w+)", "STARTcat\u30FC dog\u3005END", RegexOptions.None, new string[] { "cat\u30FC dog\u3005END", "cat\u30FC", "dog\u3005END" } };
+                yield return new object[] { engine, null, @"(cat\w+)\s+(dog\w+)", "STARTcat\uff9e dog\uff9fEND", RegexOptions.None, new string[] { "cat\uff9e dog\uff9fEND", "cat\uff9e", "dog\uff9fEND" } };
+
+                // Positive and negative character classes [a-c]|[^b-c]
+                yield return new object[] { engine, null, @"[^a]|d", "d", RegexOptions.None, new string[] { "d" } };
+                yield return new object[] { engine, null, @"([^a]|[d])*", "Hello Worlddf", RegexOptions.None, new string[] { "Hello Worlddf", "f" } };
+                yield return new object[] { engine, null, @"([^{}]|\n)+", "{{{{Hello\n World \n}END", RegexOptions.None, new string[] { "Hello\n World \n", "\n" } };
+                yield return new object[] { engine, null, @"([a-d]|[^abcd])+", "\tonce\n upon\0 a- ()*&^%#time?", RegexOptions.None, new string[] { "\tonce\n upon\0 a- ()*&^%#time?", "?" } };
+                yield return new object[] { engine, null, @"([^a]|[a])*", "once upon a time", RegexOptions.None, new string[] { "once upon a time", "e" } };
+                yield return new object[] { engine, null, @"([a-d]|[^abcd]|[x-z]|^wxyz])+", "\tonce\n upon\0 a- ()*&^%#time?", RegexOptions.None, new string[] { "\tonce\n upon\0 a- ()*&^%#time?", "?" } };
+                yield return new object[] { engine, null, @"([a-d]|[e-i]|[^e]|wxyz])+", "\tonce\n upon\0 a- ()*&^%#time?", RegexOptions.None, new string[] { "\tonce\n upon\0 a- ()*&^%#time?", "?" } };
+
+                // Canonical and noncanonical char class, where one group is in it's
+                // simplest form [a-e] and another is more complex.
+                yield return new object[] { engine, null, @"^(([^b]+ )|(.* ))$", "aaa ", RegexOptions.None, new string[] { "aaa ", "aaa ", "aaa ", "" } };
+                yield return new object[] { engine, null, @"^(([^b]+ )|(.*))$", "aaa", RegexOptions.None, new string[] { "aaa", "aaa", "", "aaa" } };
+                yield return new object[] { engine, null, @"^(([^b]+ )|(.* ))$", "bbb ", RegexOptions.None, new string[] { "bbb ", "bbb ", "", "bbb " } };
+                yield return new object[] { engine, null, @"^(([^b]+ )|(.*))$", "bbb", RegexOptions.None, new string[] { "bbb", "bbb", "", "bbb" } };
+                yield return new object[] { engine, null, @"^((a*)|(.*))$", "aaa", RegexOptions.None, new string[] { "aaa", "aaa", "aaa", "" } };
+                yield return new object[] { engine, null, @"^((a*)|(.*))$", "aaabbb", RegexOptions.None, new string[] { "aaabbb", "aaabbb", "", "aaabbb" } };
+
+                yield return new object[] { engine, null, @"(([0-9])|([a-z])|([A-Z]))*", "{hello 1234567890 world}", RegexOptions.None, new string[] { "", "", "", "", "" } };
+                yield return new object[] { engine, null, @"(([0-9])|([a-z])|([A-Z]))+", "{hello 1234567890 world}", RegexOptions.None, new string[] { "hello", "o", "", "o", "" } };
+                yield return new object[] { engine, null, @"(([0-9])|([a-z])|([A-Z]))*", "{HELLO 1234567890 world}", RegexOptions.None, new string[] { "", "", "", "", "" } };
+                yield return new object[] { engine, null, @"(([0-9])|([a-z])|([A-Z]))+", "{HELLO 1234567890 world}", RegexOptions.None, new string[] { "HELLO", "O", "", "", "O" } };
+                yield return new object[] { engine, null, @"(([0-9])|([a-z])|([A-Z]))*", "{1234567890 hello  world}", RegexOptions.None, new string[] { "", "", "", "", "" } };
+                yield return new object[] { engine, null, @"(([0-9])|([a-z])|([A-Z]))+", "{1234567890 hello world}", RegexOptions.None, new string[] { "1234567890", "0", "0", "", "" } };
+
+                yield return new object[] { engine, null, @"^(([a-d]*)|([a-z]*))$", "aaabbbcccdddeeefff", RegexOptions.None, new string[] { "aaabbbcccdddeeefff", "aaabbbcccdddeeefff", "", "aaabbbcccdddeeefff" } };
+                yield return new object[] { engine, null, @"^(([d-f]*)|([c-e]*))$", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "", "dddeeeccceee" } };
+                yield return new object[] { engine, null, @"^(([c-e]*)|([d-f]*))$", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "dddeeeccceee", "" } };
+
+                // Different match in NonBackTracking when order of alternations does not matter
+                yield return new object[] { engine, null, @"(([a-d]*)|([a-z]*))", "aaabbbcccdddeeefff", RegexOptions.None, new string[] { "aaabbbcccddd", "aaabbbcccddd", "aaabbbcccddd", "" }, "aaabbbcccdddeeefff" }; // <-- Nonbacktracking match same as for "(([a-z]*)|([a-d]*))"
+                yield return new object[] { engine, null, @"(([d-f]*)|([c-e]*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeee", "dddeee", "dddeee", "" }, "dddeeeccceee" }; // <-- Nonbacktracking match same as for "(([c-e]*)|([d-f]*))"
+                yield return new object[] { engine, null, @"(([c-e]*)|([d-f]*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "dddeeeccceee", "" } };
+
+                yield return new object[] { engine, null, @"(([a-d]*)|(.*))", "aaabbbcccdddeeefff", RegexOptions.None, new string[] { "aaabbbcccddd", "aaabbbcccddd", "aaabbbcccddd", "" }, "aaabbbcccdddeeefff" }; // <-- Nonbacktracking match same as for ".*"
+                yield return new object[] { engine, null, @"(([d-f]*)|(.*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeee", "dddeee", "dddeee", "" }, "dddeeeccceee" }; // <-- Nonbacktracking match same as for ".*"
+                yield return new object[] { engine, null, @"(([c-e]*)|(.*))", "dddeeeccceee", RegexOptions.None, new string[] { "dddeeeccceee", "dddeeeccceee", "dddeeeccceee", "" } };
+
+                // \p{Pi} (Punctuation Initial quote) \p{Pf} (Punctuation Final quote)
+                yield return new object[] { engine, null, @"\p{Pi}(\w*)\p{Pf}", "\u00ABCat\u00BB   \u00BBDog\u00AB'", RegexOptions.None, new string[] { "\u00ABCat\u00BB", "Cat" } };
+                yield return new object[] { engine, null, @"\p{Pi}(\w*)\p{Pf}", "\u2018Cat\u2019   \u2019Dog\u2018'", RegexOptions.None, new string[] { "\u2018Cat\u2019", "Cat" } };
+
+                // ECMAScript
+                yield return new object[] { engine, null, @"(?<cat>cat)\s+(?<dog>dog)\s+\123\s+\234", "asdfcat   dog     cat23    dog34eia", RegexOptions.ECMAScript, new string[] { "cat   dog     cat23    dog34", "cat", "dog" } };
+
+                // Balanced Matching
+                yield return new object[] { engine, null, @"<div>
              (?>
                  <div>(?<DEPTH>) |
                  </div> (?<-DEPTH>) |
@@ -529,374 +531,390 @@ namespace System.Text.RegularExpressions.Tests
              (?(DEPTH)(?!))
              </div>", "<div>this is some <div>red</div> text</div></div></div>", RegexOptions.IgnorePatternWhitespace, new string[] { "<div>this is some <div>red</div> text</div>", "" } };
  
-            yield return new object[] { null, @"(
+                yield return new object[] { engine, null, @"(
              ((?'open'<+)[^<>]*)+
              ((?'close-open'>+)[^<>]*)+
              )+", "<01deep_01<02deep_01<03deep_01>><02deep_02><02deep_03<03deep_03>>>", RegexOptions.IgnorePatternWhitespace, new string[] { "<01deep_01<02deep_01<03deep_01>><02deep_02><02deep_03<03deep_03>>>", "<02deep_03<03deep_03>>>", "<03deep_03", ">>>", "<", "03deep_03" } };
  
-            yield return new object[] { null, @"(
+                yield return new object[] { engine, null, @"(
              (?<start><)?
              [^<>]?
              (?<end-start>>)?
              )*", "<01deep_01<02deep_01<03deep_01>><02deep_02><02deep_03<03deep_03>>>", RegexOptions.IgnorePatternWhitespace, new string[] { "<01deep_01<02deep_01<03deep_01>><02deep_02><02deep_03<03deep_03>>>", "", "", "01deep_01<02deep_01<03deep_01>><02deep_02><02deep_03<03deep_03>>" } };
  
-            yield return new object[] { null, @"(
+                yield return new object[] { engine, null, @"(
              (?<start><[^/<>]*>)?
              [^<>]?
              (?<end-start></[^/<>]*>)?
              )*", "<b><a>Cat</a></b>", RegexOptions.IgnorePatternWhitespace, new string[] { "<b><a>Cat</a></b>", "", "", "<a>Cat</a>" } };
  
-            yield return new object[] { null, @"(
+                yield return new object[] { engine, null, @"(
              (?<start><(?<TagName>[^/<>]*)>)?
              [^<>]?
              (?<end-start></\k<TagName>>)?
              )*", "<b>cat</b><a>dog</a>", RegexOptions.IgnorePatternWhitespace, new string[] { "<b>cat</b><a>dog</a>", "", "", "a", "dog" } };
  
-            // Balanced Matching With Backtracking
-            yield return new object[] { null, @"(
+                // Balanced Matching With Backtracking
+                yield return new object[] { engine, null, @"(
              (?<start><[^/<>]*>)?
              .?
              (?<end-start></[^/<>]*>)?
              )*
              (?(start)(?!)) ", "<b><a>Cat</a></b><<<<c>>>><<d><e<f>><g><<<>>>>", RegexOptions.IgnorePatternWhitespace, new string[] { "<b><a>Cat</a></b><<<<c>>>><<d><e<f>><g><<<>>>>", "", "", "<a>Cat" } };
  
-            // Character Classes and Lazy quantifier
-            yield return new object[] { null, @"([0-9]+?)([\w]+?)", "55488aheiaheiad", RegexOptions.ECMAScript, new string[] { "55", "5", "5" } };
-            yield return new object[] { null, @"([0-9]+?)([a-z]+?)", "55488aheiaheiad", RegexOptions.ECMAScript, new string[] { "55488a", "55488", "a" } };
+                // Character Classes and Lazy quantifier
+                yield return new object[] { engine, null, @"([0-9]+?)([\w]+?)", "55488aheiaheiad", RegexOptions.ECMAScript, new string[] { "55", "5", "5" } };
+                yield return new object[] { engine, null, @"([0-9]+?)([a-z]+?)", "55488aheiaheiad", RegexOptions.ECMAScript, new string[] { "55488a", "55488", "a" } };
  
-            // Miscellaneous/Regression scenarios
-            yield return new object[] { null, @"(?<openingtag>1)(?<content>.*?)(?=2)", "1" + Environment.NewLine + "<Projecaa DefaultTargets=\"x\"/>" + Environment.NewLine + "2", RegexOptions.Singleline | RegexOptions.ExplicitCapture,
+                // Miscellaneous/Regression scenarios
+                yield return new object[] { engine, null, @"(?<openingtag>1)(?<content>.*?)(?=2)", "1" + Environment.NewLine + "<Projecaa DefaultTargets=\"x\"/>" + Environment.NewLine + "2", RegexOptions.Singleline | RegexOptions.ExplicitCapture,
              new string[] { "1" + Environment.NewLine + "<Projecaa DefaultTargets=\"x\"/>" + Environment.NewLine, "1", Environment.NewLine + "<Projecaa DefaultTargets=\"x\"/>"+ Environment.NewLine } };
  
-            yield return new object[] { null, @"\G<%#(?<code>.*?)?%>", @"<%# DataBinder.Eval(this, ""MyNumber"") %>", RegexOptions.Singleline, new string[] { @"<%# DataBinder.Eval(this, ""MyNumber"") %>", @" DataBinder.Eval(this, ""MyNumber"") " } };
-
-            // Nested Quantifiers
-            yield return new object[] { null, @"^[abcd]{0,0x10}*$", "a{0,0x10}}}", RegexOptions.None, new string[] { "a{0,0x10}}}" } };
-
-            // Lazy operator Backtracking
-            yield return new object[] { null, @"http://([a-zA-z0-9\-]*\.?)*?(:[0-9]*)??/", "http://www.msn.com/", RegexOptions.IgnoreCase, new string[] { "http://www.msn.com/", "com", string.Empty } };
-            yield return new object[] { null, @"http://([a-zA-Z0-9\-]*\.?)*?/", @"http://www.google.com/", RegexOptions.IgnoreCase, new string[] { "http://www.google.com/", "com" } };
-
-            yield return new object[] { null, @"([a-z]*?)([\w])", "cat", RegexOptions.IgnoreCase, new string[] { "c", string.Empty, "c" } };
-            yield return new object[] { null, @"^([a-z]*?)([\w])$", "cat", RegexOptions.IgnoreCase, new string[] { "cat", "ca", "t" } };
-
-            // Backtracking
-            yield return new object[] { null, @"([a-z]*)([\w])", "cat", RegexOptions.IgnoreCase, new string[] { "cat", "ca", "t" } };
-            yield return new object[] { null, @"^([a-z]*)([\w])$", "cat", RegexOptions.IgnoreCase, new string[] { "cat", "ca", "t" } };
-
-            // Backtracking with multiple (.*) groups -- important ASP.NET scenario
-            yield return new object[] { null, @"(.*)/(.*).aspx", "/.aspx", RegexOptions.None, new string[] { "/.aspx", string.Empty, string.Empty } };
-            yield return new object[] { null, @"(.*)/(.*).aspx", "/homepage.aspx", RegexOptions.None, new string[] { "/homepage.aspx", string.Empty, "homepage" } };
-            yield return new object[] { null, @"(.*)/(.*).aspx", "pages/.aspx", RegexOptions.None, new string[] { "pages/.aspx", "pages", string.Empty } };
-            yield return new object[] { null, @"(.*)/(.*).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
-            yield return new object[] { null, @"(.*)/(.*).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
-            yield return new object[] { null, @"(.*)/(.*).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
-            yield return new object[] { null, @"(.*)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
-            yield return new object[] { null, @"(.*)/(.*)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
-
-            // Backtracking with multiple (.+) groups
-            yield return new object[] { null, @"(.+)/(.+).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
-            yield return new object[] { null, @"(.+)/(.+).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
-            yield return new object[] { null, @"(.+)/(.+).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
-            yield return new object[] { null, @"(.+)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
-            yield return new object[] { null, @"(.+)/(.+)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
-
-            // Backtracking with (.+) group followed by (.*)
-            yield return new object[] { null, @"(.+)/(.*).aspx", "pages/.aspx", RegexOptions.None, new string[] { "pages/.aspx", "pages", string.Empty } };
-            yield return new object[] { null, @"(.+)/(.*).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
-            yield return new object[] { null, @"(.+)/(.*).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
-            yield return new object[] { null, @"(.+)/(.*).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
-            yield return new object[] { null, @"(.+)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
-            yield return new object[] { null, @"(.+)/(.*)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
-
-            // Backtracking with (.*) group followed by (.+)
-            yield return new object[] { null, @"(.*)/(.+).aspx", "/homepage.aspx", RegexOptions.None, new string[] { "/homepage.aspx", string.Empty, "homepage" } };
-            yield return new object[] { null, @"(.*)/(.+).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
-            yield return new object[] { null, @"(.*)/(.+).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
-            yield return new object[] { null, @"(.*)/(.+).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
-            yield return new object[] { null, @"(.*)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
-            yield return new object[] { null, @"(.*)/(.+)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
-
-            // Quantifiers
-            yield return new object[] { null, @"a*", "", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"a*", "a", RegexOptions.None, new string[] { "a" } };
-            yield return new object[] { null, @"a*", "aa", RegexOptions.None, new string[] { "aa" } };
-            yield return new object[] { null, @"a*", "aaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"a*?", "", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"a*?", "a", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"a*?", "aa", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"a+?", "aa", RegexOptions.None, new string[] { "a" } };
-            yield return new object[] { null, @"a{1,", "a{1,", RegexOptions.None, new string[] { "a{1," } };
-            yield return new object[] { null, @"a{1,3}", "aaaaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"a{1,3}?", "aaaaa", RegexOptions.None, new string[] { "a" } };
-            yield return new object[] { null, @"a{2,2}", "aaaaa", RegexOptions.None, new string[] { "aa" } };
-            yield return new object[] { null, @"a{2,2}?", "aaaaa", RegexOptions.None, new string[] { "aa" } };
-            yield return new object[] { null, @".{1,3}", "bb\nba", RegexOptions.None, new string[] { "bb" } };
-            yield return new object[] { null, @".{1,3}?", "bb\nba", RegexOptions.None, new string[] { "b" } };
-            yield return new object[] { null, @".{2,2}", "bbb\nba", RegexOptions.None, new string[] { "bb" } };
-            yield return new object[] { null, @".{2,2}?", "bbb\nba", RegexOptions.None, new string[] { "bb" } };
-            yield return new object[] { null, @"[abc]{1,3}", "ccaba", RegexOptions.None, new string[] { "cca" } };
-            yield return new object[] { null, @"[abc]{1,3}?", "ccaba", RegexOptions.None, new string[] { "c" } };
-            yield return new object[] { null, @"[abc]{2,2}", "ccaba", RegexOptions.None, new string[] { "cc" } };
-            yield return new object[] { null, @"[abc]{2,2}?", "ccaba", RegexOptions.None, new string[] { "cc" } };
-            yield return new object[] { null, @"(?:[abc]def){1,3}xyz", "cdefxyz", RegexOptions.None, new string[] { "cdefxyz" } };
-            yield return new object[] { null, @"(?:[abc]def){1,3}xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "adefbdefcdefxyz" } };
-            yield return new object[] { null, @"(?:[abc]def){1,3}?xyz", "cdefxyz", RegexOptions.None, new string[] { "cdefxyz" } };
-            yield return new object[] { null, @"(?:[abc]def){1,3}?xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "adefbdefcdefxyz" } };
-            yield return new object[] { null, @"(?:[abc]def){2,2}xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "bdefcdefxyz" } };
-            yield return new object[] { null, @"(?:[abc]def){2,2}?xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "bdefcdefxyz" } };
-            foreach (string prefix in new[] { "", "xyz" })
-            {
-                yield return new object[] { null, prefix + @"(?:[abc]def){1,3}", prefix + "cdef", RegexOptions.None, new string[] { prefix + "cdef" } };
-                yield return new object[] { null, prefix + @"(?:[abc]def){1,3}", prefix + "cdefadefbdef", RegexOptions.None, new string[] { prefix + "cdefadefbdef" } };
-                yield return new object[] { null, prefix + @"(?:[abc]def){1,3}", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadefbdef" } };
-                yield return new object[] { null, prefix + @"(?:[abc]def){1,3}?", prefix + "cdef", RegexOptions.None, new string[] { prefix + "cdef" } };
-                yield return new object[] { null, prefix + @"(?:[abc]def){1,3}?", prefix + "cdefadefbdef", RegexOptions.None, new string[] { prefix + "cdef" } };
-                yield return new object[] { null, prefix + @"(?:[abc]def){2,2}", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadef" } };
-                yield return new object[] { null, prefix + @"(?:[abc]def){2,2}?", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadef" } };
-            }
-            yield return new object[] { null, @"(cat){", "cat{", RegexOptions.None, new string[] { "cat{", "cat" } };
-            yield return new object[] { null, @"(cat){}", "cat{}", RegexOptions.None, new string[] { "cat{}", "cat" } };
-            yield return new object[] { null, @"(cat){,", "cat{,", RegexOptions.None, new string[] { "cat{,", "cat" } };
-            yield return new object[] { null, @"(cat){,}", "cat{,}", RegexOptions.None, new string[] { "cat{,}", "cat" } };
-            yield return new object[] { null, @"(cat){cat}", "cat{cat}", RegexOptions.None, new string[] { "cat{cat}", "cat" } };
-            yield return new object[] { null, @"(cat){cat,5}", "cat{cat,5}", RegexOptions.None, new string[] { "cat{cat,5}", "cat" } };
-            yield return new object[] { null, @"(cat){5,dog}", "cat{5,dog}", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } };
-            yield return new object[] { null, @"(cat){cat,dog}", "cat{cat,dog}", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } };
-            yield return new object[] { null, @"(cat){,}?", "cat{,}?", RegexOptions.None, new string[] { "cat{,}", "cat" } };
-            yield return new object[] { null, @"(cat){cat}?", "cat{cat}?", RegexOptions.None, new string[] { "cat{cat}", "cat" } };
-            yield return new object[] { null, @"(cat){cat,5}?", "cat{cat,5}?", RegexOptions.None, new string[] { "cat{cat,5}", "cat" } };
-            yield return new object[] { null, @"(cat){5,dog}?", "cat{5,dog}?", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } };
-            yield return new object[] { null, @"(cat){cat,dog}?", "cat{cat,dog}?", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } };
-
-            // Atomic subexpressions
-            // Implicitly upgrading (or not) oneloop to be atomic
-            yield return new object[] { null, @"a*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*[^a]", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*[^a]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*[^a]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*(?>[^a]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*bcd", "aaabcd", RegexOptions.None, new string[] { "aaabcd" } };
-            yield return new object[] { null, @"a*[bcd]", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*[bcd]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*[bcd]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*(?>[bcd]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*[bcd]{1,3}", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"a*([bcd]ab|[bef]cd){1,3}", "aaababecdcac", RegexOptions.ExplicitCapture, new string[] { "aaababecd" } };
-            yield return new object[] { null, @"a*([bcd]|[aef]){1,3}", "befb", RegexOptions.ExplicitCapture, new string[] { "bef" } }; // can't upgrade
-            yield return new object[] { null, @"a*$", "aaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"a*$", "aaa", RegexOptions.Multiline, new string[] { "aaa" } };
-            yield return new object[] { null, @"a*\b", "aaa bbb", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"a*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
-            yield return new object[] { null, @"@*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
-            yield return new object[] { null, @"@*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
-            yield return new object[] { null, @"(?:abcd*|efgh)i", "efghi", RegexOptions.None, new string[] { "efghi" } };
-            yield return new object[] { null, @"(?:abcd|efgh*)i", "efgi", RegexOptions.None, new string[] { "efgi" } };
-            yield return new object[] { null, @"(?:abcd|efghj{2,}|j[klm]o+)i", "efghjjjjji", RegexOptions.None, new string[] { "efghjjjjji" } };
-            yield return new object[] { null, @"(?:abcd|efghi{2,}|j[klm]o+)i", "efghiii", RegexOptions.None, new string[] { "efghiii" } };
-            yield return new object[] { null, @"(?:abcd|efghi{2,}|j[klm]o+)i", "efghiiiiiiii", RegexOptions.None, new string[] { "efghiiiiiiii" } };
-            yield return new object[] { null, @"a?ba?ba?ba?b", "abbabab", RegexOptions.None, new string[] { "abbabab" } };
-            yield return new object[] { null, @"a?ba?ba?ba?b", "abBAbab", RegexOptions.IgnoreCase, new string[] { "abBAbab" } };
-            // Implicitly upgrading (or not) notoneloop to be atomic
-            yield return new object[] { null, @"[^b]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[^b]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[^b]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[^b]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[^b]*bac", "aaabac", RegexOptions.None, new string[] { "aaabac" } };
-            yield return new object[] { null, @"[^b]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"(?:abc[^b]*|efgh)i", "efghi", RegexOptions.None, new string[] { "efghi" } }; // can't upgrade
-            yield return new object[] { null, @"(?:abcd|efg[^b]*)b", "efgb", RegexOptions.None, new string[] { "efgb" } };
-            yield return new object[] { null, @"(?:abcd|efg[^b]*)i", "efgi", RegexOptions.None, new string[] { "efgi" } }; // can't upgrade
-            yield return new object[] { null, @"[^a]?a[^a]?a[^a]?a[^a]?a", "baababa", RegexOptions.None, new string[] { "baababa" } };
-            yield return new object[] { null, @"[^a]?a[^a]?a[^a]?a[^a]?a", "BAababa", RegexOptions.IgnoreCase, new string[] { "BAababa" } };
-            // Implicitly upgrading (or not) setloop to be atomic
-            yield return new object[] { null, @"[ac]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"[ac]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*[^a]", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*[^a]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*[^a]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*(?>[^a]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*bcd", "aaabcd", RegexOptions.None, new string[] { "aaabcd" } };
-            yield return new object[] { null, @"[ac]*[bd]", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*[bd]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*[bd]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*(?>[bd]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*[bd]{1,3}", "aaab", RegexOptions.None, new string[] { "aaab" } };
-            yield return new object[] { null, @"[ac]*$", "aaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"[ac]*$", "aaa", RegexOptions.Multiline, new string[] { "aaa" } };
-            yield return new object[] { null, @"[ac]*\b", "aaa bbb", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"[ac]*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
-            yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
-            yield return new object[] { null, @"[@']*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
-            yield return new object[] { null, @".*.", "@@@", RegexOptions.Singleline, new string[] { "@@@" } };
-            yield return new object[] { null, @"(?:abcd|efg[hij]*)h", "efgh", RegexOptions.None, new string[] { "efgh" } }; // can't upgrade
-            yield return new object[] { null, @"(?:abcd|efg[hij]*)ih", "efgjih", RegexOptions.None, new string[] { "efgjih" } }; // can't upgrade
-            yield return new object[] { null, @"(?:abcd|efg[hij]*)k", "efgjk", RegexOptions.None, new string[] { "efgjk" } };
-            yield return new object[] { null, @"[ace]?b[ace]?b[ace]?b[ace]?b", "cbbabeb", RegexOptions.None, new string[] { "cbbabeb" } };
-            yield return new object[] { null, @"[ace]?b[ace]?b[ace]?b[ace]?b", "cBbAbEb", RegexOptions.IgnoreCase, new string[] { "cBbAbEb" } };
-            yield return new object[] { null, @"a[^wz]*w", "abcdcdcdwz", RegexOptions.None, new string[] { "abcdcdcdw" } };
-            yield return new object[] { null, @"a[^wyz]*w", "abcdcdcdwz", RegexOptions.None, new string[] { "abcdcdcdw" } };
-            yield return new object[] { null, @"a[^wyz]*W", "abcdcdcdWz", RegexOptions.IgnoreCase, new string[] { "abcdcdcdW" } };
-            // Implicitly upgrading (or not) concat loops to be atomic
-            yield return new object[] { null, @"(?:[ab]c[de]f)*", "", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"(?:[ab]c[de]f)*", "acdf", RegexOptions.None, new string[] { "acdf" } };
-            yield return new object[] { null, @"(?:[ab]c[de]f)*", "acdfbcef", RegexOptions.None, new string[] { "acdfbcef" } };
-            yield return new object[] { null, @"(?:[ab]c[de]f)*", "cdfbcef", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"(?:[ab]c[de]f)+", "cdfbcef", RegexOptions.None, new string[] { "bcef" } };
-            yield return new object[] { null, @"(?:[ab]c[de]f)*", "bcefbcdfacfe", RegexOptions.None, new string[] { "bcefbcdf" } };
-            // Implicitly upgrading (or not) nested loops to be atomic
-            yield return new object[] { null, @"(?:a){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"(?:a){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
-            yield return new object[] { null, @"(?:a{2}){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } };
-            yield return new object[] { null, @"(?:a{2}?){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } };
-            yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}){2}", "acdfbcdfacefbcefbcefbcdfacdef", RegexOptions.None, new string[] { "acdfbcdfacefbcefbcefbcdf" } };
-            yield return new object[] { null, @"(?:(?:[ab]c[de]f){3}hello){2}", "aaaaaacdfbcdfacefhellobcefbcefbcdfhellooooo", RegexOptions.None, new string[] { "acdfbcdfacefhellobcefbcefbcdfhello" } };
-            yield return new object[] { null, @"CN=(.*[^,]+).*", "CN=localhost", RegexOptions.Singleline, new string[] { "CN=localhost", "localhost" } };
-            // Nested atomic
-            yield return new object[] { null, @"(?>abc[def]gh(i*))", "123abceghiii456", RegexOptions.None, new string[] { "abceghiii", "iii" } };
-            yield return new object[] { null, @"(?>(?:abc)*)", "abcabcabc", RegexOptions.None, new string[] { "abcabcabc" } };
-
-            // Anchoring loops beginning with .* / .+
-            yield return new object[] { null, @".*", "", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @".*", "\n\n\n\n", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @".*", "\n\n\n\n", RegexOptions.Singleline, new string[] { "\n\n\n\n" } };
-            yield return new object[] { null, @".*[1a]", "\n\n\n\n1", RegexOptions.None, new string[] { "1" } };
-            yield return new object[] { null, @"(?s).*(?-s)[1a]", "1\n\n\n\n", RegexOptions.None, new string[] { "1" } };
-            yield return new object[] { null, @"(?s).*(?-s)[1a]", "\n\n\n\n1", RegexOptions.None, new string[] { "\n\n\n\n1" } };
-            yield return new object[] { null, @".*|.*|.*", "", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @".*123|abc", "abc\n123", RegexOptions.None, new string[] { "abc" } };
-            yield return new object[] { null, @".*123|abc", "abc\n123", RegexOptions.Singleline, new string[] { "abc\n123" }, "abc" }; // <-- Nonbacktracking match same as for "abc|.*123"
-            yield return new object[] { null, @"abc|.*123", "abc\n123", RegexOptions.Singleline, new string[] { "abc" } };
-            yield return new object[] { null, @".*", "\n", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @".*\n", "\n", RegexOptions.None, new string[] { "\n" } };
-            yield return new object[] { null, @".*", "\n", RegexOptions.Singleline, new string[] { "\n" } };
-            yield return new object[] { null, @".*\n", "\n", RegexOptions.Singleline, new string[] { "\n" } };
-            yield return new object[] { null, @".*", "abc", RegexOptions.None, new string[] { "abc" } };
-            yield return new object[] { null, @".*abc", "abc", RegexOptions.None, new string[] { "abc" } };
-            yield return new object[] { null, @".*abc|ghi", "ghi", RegexOptions.None, new string[] { "ghi" } };
-            yield return new object[] { null, @".*abc|.*ghi", "abcghi", RegexOptions.None, new string[] { "abc" }, "abcghi" }; // <-- Nonbacktracking match same as for ".*ghi|.*abc"
-            yield return new object[] { null, @".*ghi|.*abc", "abcghi", RegexOptions.None, new string[] { "abcghi" } };
-            yield return new object[] { null, @".*abc|.*ghi", "bcghi", RegexOptions.None, new string[] { "bcghi" } };
-            yield return new object[] { null, @".*abc|.+c", " \n   \n   bc", RegexOptions.None, new string[] { "   bc" } };
-            yield return new object[] { null, @".*abc", "12345 abc", RegexOptions.None, new string[] { "12345 abc" } };
-            yield return new object[] { null, @".*abc", "12345\n abc", RegexOptions.None, new string[] { " abc" } };
-            yield return new object[] { null, @".*abc", "12345\n abc", RegexOptions.Singleline, new string[] { "12345\n abc" } };
-            yield return new object[] { null, @"(.*)abc\1", "\n12345abc12345", RegexOptions.Singleline, new string[] { "12345abc12345", "12345" } };
-            yield return new object[] { null, @".*\nabc", "\n123\nabc", RegexOptions.None, new string[] { "123\nabc" } };
-            yield return new object[] { null, @".*\nabc", "\n123\nabc", RegexOptions.Singleline, new string[] { "\n123\nabc" } };
-            yield return new object[] { null, @".*abc", "abc abc abc \nabc", RegexOptions.None, new string[] { "abc abc abc" } };
-            yield return new object[] { null, @".*abc", "abc abc abc \nabc", RegexOptions.Singleline, new string[] { "abc abc abc \nabc" } };
-            yield return new object[] { null, @".*?abc", "abc abc abc \nabc", RegexOptions.None, new string[] { "abc" } };
-            yield return new object[] { null, @"[^\n]*abc", "123abc\n456abc\n789abc", RegexOptions.None, new string[] { "123abc" } };
-            yield return new object[] { null, @"[^\n]*abc", "123abc\n456abc\n789abc", RegexOptions.Singleline, new string[] { "123abc" } };
-            yield return new object[] { null, @"[^\n]*abc", "123ab\n456abc\n789abc", RegexOptions.None, new string[] { "456abc" } };
-            yield return new object[] { null, @"[^\n]*abc", "123ab\n456abc\n789abc", RegexOptions.Singleline, new string[] { "456abc" } };
-            yield return new object[] { null, @".+", "a", RegexOptions.None, new string[] { "a" } };
-            yield return new object[] { null, @".+", "\nabc", RegexOptions.None, new string[] { "abc" } };
-            yield return new object[] { null, @".+", "\n", RegexOptions.Singleline, new string[] { "\n" } };
-            yield return new object[] { null, @".+", "\nabc", RegexOptions.Singleline, new string[] { "\nabc" } };
-            yield return new object[] { null, @".+abc", "aaaabc", RegexOptions.None, new string[] { "aaaabc" } };
-            yield return new object[] { null, @".+abc", "12345 abc", RegexOptions.None, new string[] { "12345 abc" } };
-            yield return new object[] { null, @".+abc", "12345\n abc", RegexOptions.None, new string[] { " abc" } };
-            yield return new object[] { null, @".+abc", "12345\n abc", RegexOptions.Singleline, new string[] { "12345\n abc" } };
-            yield return new object[] { null, @"(.+)abc\1", "\n12345abc12345", RegexOptions.Singleline, new string[] { "12345abc12345", "12345" } };
-
-            // Unanchored .*
-            yield return new object[] { null, @"\A\s*(?<name>\w+)(\s*\((?<arguments>.*)\))?\s*\Z", "Match(Name)", RegexOptions.None, new string[] { "Match(Name)", "(Name)", "Match", "Name" } };
-            yield return new object[] { null, @"\A\s*(?<name>\w+)(\s*\((?<arguments>.*)\))?\s*\Z", "Match(Na\nme)", RegexOptions.Singleline, new string[] { "Match(Na\nme)", "(Na\nme)", "Match", "Na\nme" } };
-            foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline })
-            {
-                yield return new object[] { null, @"abcd.*", @"abcabcd", options, new string[] { "abcd" } };
-                yield return new object[] { null, @"abcd.*", @"abcabcde", options, new string[] { "abcde" } };
-                yield return new object[] { null, @"abcd.*", @"abcabcdefg", options, new string[] { "abcdefg" } };
-                yield return new object[] { null, @"abcd(.*)", @"ababcd", options, new string[] { "abcd", "" } };
-                yield return new object[] { null, @"abcd(.*)", @"aabcde", options, new string[] { "abcde", "e" } };
-                yield return new object[] { null, @"abcd(.*)", @"abcabcdefg", options, new string[] { "abcdefg", "efg" } };
-                yield return new object[] { null, @"abcd(.*)e", @"abcabcdefg", options, new string[] { "abcde", "" } };
-                yield return new object[] { null, @"abcd(.*)f", @"abcabcdefg", options, new string[] { "abcdef", "e" } };
-            }
+                yield return new object[] { engine, null, @"\G<%#(?<code>.*?)?%>", @"<%# DataBinder.Eval(this, ""MyNumber"") %>", RegexOptions.Singleline, new string[] { @"<%# DataBinder.Eval(this, ""MyNumber"") %>", @" DataBinder.Eval(this, ""MyNumber"") " } };
+
+                // Nested Quantifiers
+                yield return new object[] { engine, null, @"^[abcd]{0,0x10}*$", "a{0,0x10}}}", RegexOptions.None, new string[] { "a{0,0x10}}}" } };
+
+                // Lazy operator Backtracking
+                yield return new object[] { engine, null, @"http://([a-zA-z0-9\-]*\.?)*?(:[0-9]*)??/", "http://www.msn.com/", RegexOptions.IgnoreCase, new string[] { "http://www.msn.com/", "com", string.Empty } };
+                yield return new object[] { engine, null, @"http://([a-zA-Z0-9\-]*\.?)*?/", @"http://www.google.com/", RegexOptions.IgnoreCase, new string[] { "http://www.google.com/", "com" } };
+
+                yield return new object[] { engine, null, @"([a-z]*?)([\w])", "cat", RegexOptions.IgnoreCase, new string[] { "c", string.Empty, "c" } };
+                yield return new object[] { engine, null, @"^([a-z]*?)([\w])$", "cat", RegexOptions.IgnoreCase, new string[] { "cat", "ca", "t" } };
+
+                // Backtracking
+                yield return new object[] { engine, null, @"([a-z]*)([\w])", "cat", RegexOptions.IgnoreCase, new string[] { "cat", "ca", "t" } };
+                yield return new object[] { engine, null, @"^([a-z]*)([\w])$", "cat", RegexOptions.IgnoreCase, new string[] { "cat", "ca", "t" } };
+
+                // Backtracking with multiple (.*) groups -- important ASP.NET scenario
+                yield return new object[] { engine, null, @"(.*)/(.*).aspx", "/.aspx", RegexOptions.None, new string[] { "/.aspx", string.Empty, string.Empty } };
+                yield return new object[] { engine, null, @"(.*)/(.*).aspx", "/homepage.aspx", RegexOptions.None, new string[] { "/homepage.aspx", string.Empty, "homepage" } };
+                yield return new object[] { engine, null, @"(.*)/(.*).aspx", "pages/.aspx", RegexOptions.None, new string[] { "pages/.aspx", "pages", string.Empty } };
+                yield return new object[] { engine, null, @"(.*)/(.*).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.*)/(.*).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.*)/(.*).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
+                yield return new object[] { engine, null, @"(.*)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
+                yield return new object[] { engine, null, @"(.*)/(.*)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
+
+                // Backtracking with multiple (.+) groups
+                yield return new object[] { engine, null, @"(.+)/(.+).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.+)/(.+).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.+)/(.+).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
+                yield return new object[] { engine, null, @"(.+)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
+                yield return new object[] { engine, null, @"(.+)/(.+)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
+
+                // Backtracking with (.+) group followed by (.*)
+                yield return new object[] { engine, null, @"(.+)/(.*).aspx", "pages/.aspx", RegexOptions.None, new string[] { "pages/.aspx", "pages", string.Empty } };
+                yield return new object[] { engine, null, @"(.+)/(.*).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.+)/(.*).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.+)/(.*).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
+                yield return new object[] { engine, null, @"(.+)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
+                yield return new object[] { engine, null, @"(.+)/(.*)/(.*).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
+
+                // Backtracking with (.*) group followed by (.+)
+                yield return new object[] { engine, null, @"(.*)/(.+).aspx", "/homepage.aspx", RegexOptions.None, new string[] { "/homepage.aspx", string.Empty, "homepage" } };
+                yield return new object[] { engine, null, @"(.*)/(.+).aspx", "pages/homepage.aspx", RegexOptions.None, new string[] { "pages/homepage.aspx", "pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.*)/(.+).aspx", "/pages/homepage.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx", "/pages", "homepage" } };
+                yield return new object[] { engine, null, @"(.*)/(.+).aspx", "/pages/homepage/index.aspx", RegexOptions.None, new string[] { "/pages/homepage/index.aspx", "/pages/homepage", "index" } };
+                yield return new object[] { engine, null, @"(.*)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages/homepage.aspx", "index" } };
+                yield return new object[] { engine, null, @"(.*)/(.+)/(.+).aspx", "/pages/homepage.aspx/index.aspx", RegexOptions.None, new string[] { "/pages/homepage.aspx/index.aspx", "/pages", "homepage.aspx", "index" } };
+
+                // Quantifiers
+                yield return new object[] { engine, null, @"a*", "", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"a*", "a", RegexOptions.None, new string[] { "a" } };
+                yield return new object[] { engine, null, @"a*", "aa", RegexOptions.None, new string[] { "aa" } };
+                yield return new object[] { engine, null, @"a*", "aaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"a*?", "", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"a*?", "a", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"a*?", "aa", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"a+?", "aa", RegexOptions.None, new string[] { "a" } };
+                yield return new object[] { engine, null, @"a{1,", "a{1,", RegexOptions.None, new string[] { "a{1," } };
+                yield return new object[] { engine, null, @"a{1,3}", "aaaaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"a{1,3}?", "aaaaa", RegexOptions.None, new string[] { "a" } };
+                yield return new object[] { engine, null, @"a{2,2}", "aaaaa", RegexOptions.None, new string[] { "aa" } };
+                yield return new object[] { engine, null, @"a{2,2}?", "aaaaa", RegexOptions.None, new string[] { "aa" } };
+                yield return new object[] { engine, null, @".{1,3}", "bb\nba", RegexOptions.None, new string[] { "bb" } };
+                yield return new object[] { engine, null, @".{1,3}?", "bb\nba", RegexOptions.None, new string[] { "b" } };
+                yield return new object[] { engine, null, @".{2,2}", "bbb\nba", RegexOptions.None, new string[] { "bb" } };
+                yield return new object[] { engine, null, @".{2,2}?", "bbb\nba", RegexOptions.None, new string[] { "bb" } };
+                yield return new object[] { engine, null, @"[abc]{1,3}", "ccaba", RegexOptions.None, new string[] { "cca" } };
+                yield return new object[] { engine, null, @"[abc]{1,3}?", "ccaba", RegexOptions.None, new string[] { "c" } };
+                yield return new object[] { engine, null, @"[abc]{2,2}", "ccaba", RegexOptions.None, new string[] { "cc" } };
+                yield return new object[] { engine, null, @"[abc]{2,2}?", "ccaba", RegexOptions.None, new string[] { "cc" } };
+                yield return new object[] { engine, null, @"(?:[abc]def){1,3}xyz", "cdefxyz", RegexOptions.None, new string[] { "cdefxyz" } };
+                yield return new object[] { engine, null, @"(?:[abc]def){1,3}xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "adefbdefcdefxyz" } };
+                yield return new object[] { engine, null, @"(?:[abc]def){1,3}?xyz", "cdefxyz", RegexOptions.None, new string[] { "cdefxyz" } };
+                yield return new object[] { engine, null, @"(?:[abc]def){1,3}?xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "adefbdefcdefxyz" } };
+                yield return new object[] { engine, null, @"(?:[abc]def){2,2}xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "bdefcdefxyz" } };
+                yield return new object[] { engine, null, @"(?:[abc]def){2,2}?xyz", "adefbdefcdefxyz", RegexOptions.None, new string[] { "bdefcdefxyz" } };
+                foreach (string prefix in new[] { "", "xyz" })
+                {
+                    yield return new object[] { engine, null, prefix + @"(?:[abc]def){1,3}", prefix + "cdef", RegexOptions.None, new string[] { prefix + "cdef" } };
+                    yield return new object[] { engine, null, prefix + @"(?:[abc]def){1,3}", prefix + "cdefadefbdef", RegexOptions.None, new string[] { prefix + "cdefadefbdef" } };
+                    yield return new object[] { engine, null, prefix + @"(?:[abc]def){1,3}", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadefbdef" } };
+                    yield return new object[] { engine, null, prefix + @"(?:[abc]def){1,3}?", prefix + "cdef", RegexOptions.None, new string[] { prefix + "cdef" } };
+                    yield return new object[] { engine, null, prefix + @"(?:[abc]def){1,3}?", prefix + "cdefadefbdef", RegexOptions.None, new string[] { prefix + "cdef" } };
+                    yield return new object[] { engine, null, prefix + @"(?:[abc]def){2,2}", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadef" } };
+                    yield return new object[] { engine, null, prefix + @"(?:[abc]def){2,2}?", prefix + "cdefadefbdefadef", RegexOptions.None, new string[] { prefix + "cdefadef" } };
+                }
+                yield return new object[] { engine, null, @"(cat){", "cat{", RegexOptions.None, new string[] { "cat{", "cat" } };
+                yield return new object[] { engine, null, @"(cat){}", "cat{}", RegexOptions.None, new string[] { "cat{}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){,", "cat{,", RegexOptions.None, new string[] { "cat{,", "cat" } };
+                yield return new object[] { engine, null, @"(cat){,}", "cat{,}", RegexOptions.None, new string[] { "cat{,}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){cat}", "cat{cat}", RegexOptions.None, new string[] { "cat{cat}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){cat,5}", "cat{cat,5}", RegexOptions.None, new string[] { "cat{cat,5}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){5,dog}", "cat{5,dog}", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){cat,dog}", "cat{cat,dog}", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){,}?", "cat{,}?", RegexOptions.None, new string[] { "cat{,}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){cat}?", "cat{cat}?", RegexOptions.None, new string[] { "cat{cat}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){cat,5}?", "cat{cat,5}?", RegexOptions.None, new string[] { "cat{cat,5}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){5,dog}?", "cat{5,dog}?", RegexOptions.None, new string[] { "cat{5,dog}", "cat" } };
+                yield return new object[] { engine, null, @"(cat){cat,dog}?", "cat{cat,dog}?", RegexOptions.None, new string[] { "cat{cat,dog}", "cat" } };
+
+                // Atomic subexpressions
+                // Implicitly upgrading (or not) oneloop to be atomic
+                yield return new object[] { engine, null, @"a*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*[^a]", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*[^a]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*[^a]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*(?>[^a]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*bcd", "aaabcd", RegexOptions.None, new string[] { "aaabcd" } };
+                yield return new object[] { engine, null, @"a*[bcd]", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*[bcd]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*[bcd]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*(?>[bcd]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*[bcd]{1,3}", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"a*([bcd]ab|[bef]cd){1,3}", "aaababecdcac", RegexOptions.ExplicitCapture, new string[] { "aaababecd" } };
+                yield return new object[] { engine, null, @"a*([bcd]|[aef]){1,3}", "befb", RegexOptions.ExplicitCapture, new string[] { "bef" } }; // can't upgrade
+                yield return new object[] { engine, null, @"a*$", "aaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"a*$", "aaa", RegexOptions.Multiline, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"a*\b", "aaa bbb", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"a*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"@*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
+                yield return new object[] { engine, null, @"@*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
+                yield return new object[] { engine, null, @"(?:abcd*|efgh)i", "efghi", RegexOptions.None, new string[] { "efghi" } };
+                yield return new object[] { engine, null, @"(?:abcd|efgh*)i", "efgi", RegexOptions.None, new string[] { "efgi" } };
+                yield return new object[] { engine, null, @"(?:abcd|efghj{2,}|j[klm]o+)i", "efghjjjjji", RegexOptions.None, new string[] { "efghjjjjji" } };
+                yield return new object[] { engine, null, @"(?:abcd|efghi{2,}|j[klm]o+)i", "efghiii", RegexOptions.None, new string[] { "efghiii" } };
+                yield return new object[] { engine, null, @"(?:abcd|efghi{2,}|j[klm]o+)i", "efghiiiiiiii", RegexOptions.None, new string[] { "efghiiiiiiii" } };
+                yield return new object[] { engine, null, @"a?ba?ba?ba?b", "abbabab", RegexOptions.None, new string[] { "abbabab" } };
+                yield return new object[] { engine, null, @"a?ba?ba?ba?b", "abBAbab", RegexOptions.IgnoreCase, new string[] { "abBAbab" } };
+                // Implicitly upgrading (or not) notoneloop to be atomic
+                yield return new object[] { engine, null, @"[^b]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[^b]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[^b]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[^b]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[^b]*bac", "aaabac", RegexOptions.None, new string[] { "aaabac" } };
+                yield return new object[] { engine, null, @"[^b]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"(?:abc[^b]*|efgh)i", "efghi", RegexOptions.None, new string[] { "efghi" } }; // can't upgrade
+                yield return new object[] { engine, null, @"(?:abcd|efg[^b]*)b", "efgb", RegexOptions.None, new string[] { "efgb" } };
+                yield return new object[] { engine, null, @"(?:abcd|efg[^b]*)i", "efgi", RegexOptions.None, new string[] { "efgi" } }; // can't upgrade
+                yield return new object[] { engine, null, @"[^a]?a[^a]?a[^a]?a[^a]?a", "baababa", RegexOptions.None, new string[] { "baababa" } };
+                yield return new object[] { engine, null, @"[^a]?a[^a]?a[^a]?a[^a]?a", "BAababa", RegexOptions.IgnoreCase, new string[] { "BAababa" } };
+                // Implicitly upgrading (or not) setloop to be atomic
+                yield return new object[] { engine, null, @"[ac]*", "aaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"[ac]*b", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*b+", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*b+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*(?>b+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*[^a]", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*[^a]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*[^a]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*(?>[^a]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*bcd", "aaabcd", RegexOptions.None, new string[] { "aaabcd" } };
+                yield return new object[] { engine, null, @"[ac]*[bd]", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*[bd]+", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*[bd]+?", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*(?>[bd]+)", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*[bd]{1,3}", "aaab", RegexOptions.None, new string[] { "aaab" } };
+                yield return new object[] { engine, null, @"[ac]*$", "aaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"[ac]*$", "aaa", RegexOptions.Multiline, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"[ac]*\b", "aaa bbb", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"[ac]*\b", "aaa bbb", RegexOptions.ECMAScript, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"[@']*\B", "@@@", RegexOptions.None, new string[] { "@@@" } };
+                yield return new object[] { engine, null, @"[@']*\B", "@@@", RegexOptions.ECMAScript, new string[] { "@@@" } };
+                yield return new object[] { engine, null, @".*.", "@@@", RegexOptions.Singleline, new string[] { "@@@" } };
+                yield return new object[] { engine, null, @"(?:abcd|efg[hij]*)h", "efgh", RegexOptions.None, new string[] { "efgh" } }; // can't upgrade
+                yield return new object[] { engine, null, @"(?:abcd|efg[hij]*)ih", "efgjih", RegexOptions.None, new string[] { "efgjih" } }; // can't upgrade
+                yield return new object[] { engine, null, @"(?:abcd|efg[hij]*)k", "efgjk", RegexOptions.None, new string[] { "efgjk" } };
+                yield return new object[] { engine, null, @"[ace]?b[ace]?b[ace]?b[ace]?b", "cbbabeb", RegexOptions.None, new string[] { "cbbabeb" } };
+                yield return new object[] { engine, null, @"[ace]?b[ace]?b[ace]?b[ace]?b", "cBbAbEb", RegexOptions.IgnoreCase, new string[] { "cBbAbEb" } };
+                yield return new object[] { engine, null, @"a[^wz]*w", "abcdcdcdwz", RegexOptions.None, new string[] { "abcdcdcdw" } };
+                yield return new object[] { engine, null, @"a[^wyz]*w", "abcdcdcdwz", RegexOptions.None, new string[] { "abcdcdcdw" } };
+                yield return new object[] { engine, null, @"a[^wyz]*W", "abcdcdcdWz", RegexOptions.IgnoreCase, new string[] { "abcdcdcdW" } };
+                // Implicitly upgrading (or not) concat loops to be atomic
+                yield return new object[] { engine, null, @"(?:[ab]c[de]f)*", "", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"(?:[ab]c[de]f)*", "acdf", RegexOptions.None, new string[] { "acdf" } };
+                yield return new object[] { engine, null, @"(?:[ab]c[de]f)*", "acdfbcef", RegexOptions.None, new string[] { "acdfbcef" } };
+                yield return new object[] { engine, null, @"(?:[ab]c[de]f)*", "cdfbcef", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"(?:[ab]c[de]f)+", "cdfbcef", RegexOptions.None, new string[] { "bcef" } };
+                yield return new object[] { engine, null, @"(?:[ab]c[de]f)*", "bcefbcdfacfe", RegexOptions.None, new string[] { "bcefbcdf" } };
+                // Implicitly upgrading (or not) nested loops to be atomic
+                yield return new object[] { engine, null, @"(?:a){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"(?:a){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaa" } };
+                yield return new object[] { engine, null, @"(?:a{2}){3}", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } };
+                yield return new object[] { engine, null, @"(?:a{2}?){3}?", "aaaaaaaaa", RegexOptions.None, new string[] { "aaaaaa" } };
+                yield return new object[] { engine, null, @"(?:(?:[ab]c[de]f){3}){2}", "acdfbcdfacefbcefbcefbcdfacdef", RegexOptions.None, new string[] { "acdfbcdfacefbcefbcefbcdf" } };
+                yield return new object[] { engine, null, @"(?:(?:[ab]c[de]f){3}hello){2}", "aaaaaacdfbcdfacefhellobcefbcefbcdfhellooooo", RegexOptions.None, new string[] { "acdfbcdfacefhellobcefbcefbcdfhello" } };
+                yield return new object[] { engine, null, @"CN=(.*[^,]+).*", "CN=localhost", RegexOptions.Singleline, new string[] { "CN=localhost", "localhost" } };
+                // Nested atomic
+                yield return new object[] { engine, null, @"(?>abc[def]gh(i*))", "123abceghiii456", RegexOptions.None, new string[] { "abceghiii", "iii" } };
+                yield return new object[] { engine, null, @"(?>(?:abc)*)", "abcabcabc", RegexOptions.None, new string[] { "abcabcabc" } };
+
+                // Anchoring loops beginning with .* / .+
+                yield return new object[] { engine, null, @".*", "", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @".*", "\n\n\n\n", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @".*", "\n\n\n\n", RegexOptions.Singleline, new string[] { "\n\n\n\n" } };
+                yield return new object[] { engine, null, @".*[1a]", "\n\n\n\n1", RegexOptions.None, new string[] { "1" } };
+                yield return new object[] { engine, null, @"(?s).*(?-s)[1a]", "1\n\n\n\n", RegexOptions.None, new string[] { "1" } };
+                yield return new object[] { engine, null, @"(?s).*(?-s)[1a]", "\n\n\n\n1", RegexOptions.None, new string[] { "\n\n\n\n1" } };
+                yield return new object[] { engine, null, @".*|.*|.*", "", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @".*123|abc", "abc\n123", RegexOptions.None, new string[] { "abc" } };
+                yield return new object[] { engine, null, @".*123|abc", "abc\n123", RegexOptions.Singleline, new string[] { "abc\n123" }, "abc" }; // <-- Nonbacktracking match same as for "abc|.*123"
+                yield return new object[] { engine, null, @"abc|.*123", "abc\n123", RegexOptions.Singleline, new string[] { "abc" } };
+                yield return new object[] { engine, null, @".*", "\n", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @".*\n", "\n", RegexOptions.None, new string[] { "\n" } };
+                yield return new object[] { engine, null, @".*", "\n", RegexOptions.Singleline, new string[] { "\n" } };
+                yield return new object[] { engine, null, @".*\n", "\n", RegexOptions.Singleline, new string[] { "\n" } };
+                yield return new object[] { engine, null, @".*", "abc", RegexOptions.None, new string[] { "abc" } };
+                yield return new object[] { engine, null, @".*abc", "abc", RegexOptions.None, new string[] { "abc" } };
+                yield return new object[] { engine, null, @".*abc|ghi", "ghi", RegexOptions.None, new string[] { "ghi" } };
+                yield return new object[] { engine, null, @".*abc|.*ghi", "abcghi", RegexOptions.None, new string[] { "abc" }, "abcghi" }; // <-- Nonbacktracking match same as for ".*ghi|.*abc"
+                yield return new object[] { engine, null, @".*ghi|.*abc", "abcghi", RegexOptions.None, new string[] { "abcghi" } };
+                yield return new object[] { engine, null, @".*abc|.*ghi", "bcghi", RegexOptions.None, new string[] { "bcghi" } };
+                yield return new object[] { engine, null, @".*abc|.+c", " \n   \n   bc", RegexOptions.None, new string[] { "   bc" } };
+                yield return new object[] { engine, null, @".*abc", "12345 abc", RegexOptions.None, new string[] { "12345 abc" } };
+                yield return new object[] { engine, null, @".*abc", "12345\n abc", RegexOptions.None, new string[] { " abc" } };
+                yield return new object[] { engine, null, @".*abc", "12345\n abc", RegexOptions.Singleline, new string[] { "12345\n abc" } };
+                yield return new object[] { engine, null, @"(.*)abc\1", "\n12345abc12345", RegexOptions.Singleline, new string[] { "12345abc12345", "12345" } };
+                yield return new object[] { engine, null, @".*\nabc", "\n123\nabc", RegexOptions.None, new string[] { "123\nabc" } };
+                yield return new object[] { engine, null, @".*\nabc", "\n123\nabc", RegexOptions.Singleline, new string[] { "\n123\nabc" } };
+                yield return new object[] { engine, null, @".*abc", "abc abc abc \nabc", RegexOptions.None, new string[] { "abc abc abc" } };
+                yield return new object[] { engine, null, @".*abc", "abc abc abc \nabc", RegexOptions.Singleline, new string[] { "abc abc abc \nabc" } };
+                yield return new object[] { engine, null, @".*?abc", "abc abc abc \nabc", RegexOptions.None, new string[] { "abc" } };
+                yield return new object[] { engine, null, @"[^\n]*abc", "123abc\n456abc\n789abc", RegexOptions.None, new string[] { "123abc" } };
+                yield return new object[] { engine, null, @"[^\n]*abc", "123abc\n456abc\n789abc", RegexOptions.Singleline, new string[] { "123abc" } };
+                yield return new object[] { engine, null, @"[^\n]*abc", "123ab\n456abc\n789abc", RegexOptions.None, new string[] { "456abc" } };
+                yield return new object[] { engine, null, @"[^\n]*abc", "123ab\n456abc\n789abc", RegexOptions.Singleline, new string[] { "456abc" } };
+                yield return new object[] { engine, null, @".+", "a", RegexOptions.None, new string[] { "a" } };
+                yield return new object[] { engine, null, @".+", "\nabc", RegexOptions.None, new string[] { "abc" } };
+                yield return new object[] { engine, null, @".+", "\n", RegexOptions.Singleline, new string[] { "\n" } };
+                yield return new object[] { engine, null, @".+", "\nabc", RegexOptions.Singleline, new string[] { "\nabc" } };
+                yield return new object[] { engine, null, @".+abc", "aaaabc", RegexOptions.None, new string[] { "aaaabc" } };
+                yield return new object[] { engine, null, @".+abc", "12345 abc", RegexOptions.None, new string[] { "12345 abc" } };
+                yield return new object[] { engine, null, @".+abc", "12345\n abc", RegexOptions.None, new string[] { " abc" } };
+                yield return new object[] { engine, null, @".+abc", "12345\n abc", RegexOptions.Singleline, new string[] { "12345\n abc" } };
+                yield return new object[] { engine, null, @"(.+)abc\1", "\n12345abc12345", RegexOptions.Singleline, new string[] { "12345abc12345", "12345" } };
+
+                // Unanchored .*
+                yield return new object[] { engine, null, @"\A\s*(?<name>\w+)(\s*\((?<arguments>.*)\))?\s*\Z", "Match(Name)", RegexOptions.None, new string[] { "Match(Name)", "(Name)", "Match", "Name" } };
+                yield return new object[] { engine, null, @"\A\s*(?<name>\w+)(\s*\((?<arguments>.*)\))?\s*\Z", "Match(Na\nme)", RegexOptions.Singleline, new string[] { "Match(Na\nme)", "(Na\nme)", "Match", "Na\nme" } };
+                foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline })
+                {
+                    yield return new object[] { engine, null, @"abcd.*", @"abcabcd", options, new string[] { "abcd" } };
+                    yield return new object[] { engine, null, @"abcd.*", @"abcabcde", options, new string[] { "abcde" } };
+                    yield return new object[] { engine, null, @"abcd.*", @"abcabcdefg", options, new string[] { "abcdefg" } };
+                    yield return new object[] { engine, null, @"abcd(.*)", @"ababcd", options, new string[] { "abcd", "" } };
+                    yield return new object[] { engine, null, @"abcd(.*)", @"aabcde", options, new string[] { "abcde", "e" } };
+                    yield return new object[] { engine, null, @"abcd(.*)", @"abcabcdefg", options, new string[] { "abcdefg", "efg" } };
+                    yield return new object[] { engine, null, @"abcd(.*)e", @"abcabcdefg", options, new string[] { "abcde", "" } };
+                    yield return new object[] { engine, null, @"abcd(.*)f", @"abcabcdefg", options, new string[] { "abcdef", "e" } };
+                }
  
-            // Grouping Constructs Invalid Regular Expressions
-            yield return new object[] { null, @"()", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
-            yield return new object[] { null, @"(?<cat>)", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
-            yield return new object[] { null, @"(?'cat')", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
-            yield return new object[] { null, @"(?:)", "cat", RegexOptions.None, new string[] { string.Empty } };
-            yield return new object[] { null, @"(?imn)", "cat", RegexOptions.None, new string[] { string.Empty } };
-            yield return new object[] { null, @"(?imn)cat", "(?imn)cat", RegexOptions.None, new string[] { "cat" } };
-            yield return new object[] { null, @"(?=)", "cat", RegexOptions.None, new string[] { string.Empty } };
-            yield return new object[] { null, @"(?<=)", "cat", RegexOptions.None, new string[] { string.Empty } };
-            yield return new object[] { null, @"(?>)", "cat", RegexOptions.None, new string[] { string.Empty } };
-
-            // Alternation construct Invalid Regular Expressions
-            yield return new object[] { null, @"(?()|)", "(?()|)", RegexOptions.None, new string[] { "" } };
-
-            yield return new object[] { null, @"(?(cat)|)", "cat", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"(?(cat)|)", "dog", RegexOptions.None, new string[] { "" } };
-
-            yield return new object[] { null, @"(?(cat)catdog|)", "catdog", RegexOptions.None, new string[] { "catdog" } };
-            yield return new object[] { null, @"(?(cat)catdog|)", "dog", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"(?(cat)dog|)", "dog", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"(?(cat)dog|)", "cat", RegexOptions.None, new string[] { "" } };
-
-            yield return new object[] { null, @"(?(cat)|catdog)", "cat", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"(?(cat)|catdog)", "catdog", RegexOptions.None, new string[] { "" } };
-            yield return new object[] { null, @"(?(cat)|dog)", "dog", RegexOptions.None, new string[] { "dog" } };
-
-            // Invalid unicode
-            yield return new object[] { null, "([\u0000-\uFFFF-[azAZ09]]|[\u0000-\uFFFF-[^azAZ09]])+", "azAZBCDE1234567890BCDEFAZza", RegexOptions.None, new string[] { "azAZBCDE1234567890BCDEFAZza", "a" } };
-            yield return new object[] { null, "[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[a]]]]]]+", "abcxyzABCXYZ123890", RegexOptions.None, new string[] { "bcxyzABCXYZ123890" } };
-            yield return new object[] { null, "[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[a]]]]]]]+", "bcxyzABCXYZ123890a", RegexOptions.None, new string[] { "a" } };
-            yield return new object[] { null, "[\u0000-\uFFFF-[\\p{P}\\p{S}\\p{C}]]+", "!@`';.,$+<>=\x0001\x001FazAZ09", RegexOptions.None, new string[] { "azAZ09" } };
-
-            yield return new object[] { null, @"[\uFFFD-\uFFFF]+", "\uFFFC\uFFFD\uFFFE\uFFFF", RegexOptions.IgnoreCase, new string[] { "\uFFFD\uFFFE\uFFFF" } };
-            yield return new object[] { null, @"[\uFFFC-\uFFFE]+", "\uFFFB\uFFFC\uFFFD\uFFFE\uFFFF", RegexOptions.IgnoreCase, new string[] { "\uFFFC\uFFFD\uFFFE" } };
-
-            // Empty Match
-            yield return new object[] { null, @"([a*]*)+?$", "ab", RegexOptions.None, new string[] { "", "" } };
-            yield return new object[] { null, @"(a*)+?$", "b", RegexOptions.None, new string[] { "", "" } };
+                // Grouping Constructs Invalid Regular Expressions
+                yield return new object[] { engine, null, @"()", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
+                yield return new object[] { engine, null, @"(?<cat>)", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
+                yield return new object[] { engine, null, @"(?'cat')", "cat", RegexOptions.None, new string[] { string.Empty, string.Empty } };
+                yield return new object[] { engine, null, @"(?:)", "cat", RegexOptions.None, new string[] { string.Empty } };
+                yield return new object[] { engine, null, @"(?imn)", "cat", RegexOptions.None, new string[] { string.Empty } };
+                yield return new object[] { engine, null, @"(?imn)cat", "(?imn)cat", RegexOptions.None, new string[] { "cat" } };
+                yield return new object[] { engine, null, @"(?=)", "cat", RegexOptions.None, new string[] { string.Empty } };
+                yield return new object[] { engine, null, @"(?<=)", "cat", RegexOptions.None, new string[] { string.Empty } };
+                yield return new object[] { engine, null, @"(?>)", "cat", RegexOptions.None, new string[] { string.Empty } };
+
+                // Alternation construct Invalid Regular Expressions
+                yield return new object[] { engine, null, @"(?()|)", "(?()|)", RegexOptions.None, new string[] { "" } };
+
+                yield return new object[] { engine, null, @"(?(cat)|)", "cat", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"(?(cat)|)", "dog", RegexOptions.None, new string[] { "" } };
+
+                yield return new object[] { engine, null, @"(?(cat)catdog|)", "catdog", RegexOptions.None, new string[] { "catdog" } };
+                yield return new object[] { engine, null, @"(?(cat)catdog|)", "dog", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"(?(cat)dog|)", "dog", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"(?(cat)dog|)", "cat", RegexOptions.None, new string[] { "" } };
+
+                yield return new object[] { engine, null, @"(?(cat)|catdog)", "cat", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"(?(cat)|catdog)", "catdog", RegexOptions.None, new string[] { "" } };
+                yield return new object[] { engine, null, @"(?(cat)|dog)", "dog", RegexOptions.None, new string[] { "dog" } };
+
+                // Invalid unicode
+                yield return new object[] { engine, null, "([\u0000-\uFFFF-[azAZ09]]|[\u0000-\uFFFF-[^azAZ09]])+", "azAZBCDE1234567890BCDEFAZza", RegexOptions.None, new string[] { "azAZBCDE1234567890BCDEFAZza", "a" } };
+                yield return new object[] { engine, null, "[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[a]]]]]]+", "abcxyzABCXYZ123890", RegexOptions.None, new string[] { "bcxyzABCXYZ123890" } };
+                yield return new object[] { engine, null, "[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[\u0000-\uFFFF-[a]]]]]]]+", "bcxyzABCXYZ123890a", RegexOptions.None, new string[] { "a" } };
+                yield return new object[] { engine, null, "[\u0000-\uFFFF-[\\p{P}\\p{S}\\p{C}]]+", "!@`';.,$+<>=\x0001\x001FazAZ09", RegexOptions.None, new string[] { "azAZ09" } };
+
+                yield return new object[] { engine, null, @"[\uFFFD-\uFFFF]+", "\uFFFC\uFFFD\uFFFE\uFFFF", RegexOptions.IgnoreCase, new string[] { "\uFFFD\uFFFE\uFFFF" } };
+                yield return new object[] { engine, null, @"[\uFFFC-\uFFFE]+", "\uFFFB\uFFFC\uFFFD\uFFFE\uFFFF", RegexOptions.IgnoreCase, new string[] { "\uFFFC\uFFFD\uFFFE" } };
+
+                // Empty Match
+                yield return new object[] { engine, null, @"([a*]*)+?$", "ab", RegexOptions.None, new string[] { "", "" } };
+                yield return new object[] { engine, null, @"(a*)+?$", "b", RegexOptions.None, new string[] { "", "" } };
+            }
          }
  
          public static IEnumerable<object[]> Groups_CustomCulture_TestData_enUS()
          {
-            yield return new object[] { "en-US", "CH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
-            yield return new object[] { "en-US", "cH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
-            yield return new object[] { "en-US", "AA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
-            yield return new object[] { "en-US", "aA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
-            yield return new object[] { "en-US", "\u0130", "\u0049", RegexOptions.IgnoreCase, new string[] { "\u0049" } };
-            yield return new object[] { "en-US", "\u0130", "\u0069", RegexOptions.IgnoreCase, new string[] { "\u0069" } };
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
+            {
+                yield return new object[] { engine, "en-US", "CH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
+                yield return new object[] { engine, "en-US", "cH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
+                yield return new object[] { engine, "en-US", "AA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
+                yield return new object[] { engine, "en-US", "aA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
+                yield return new object[] { engine, "en-US", "\u0130", "\u0049", RegexOptions.IgnoreCase, new string[] { "\u0049" } };
+                yield return new object[] { engine, "en-US", "\u0130", "\u0069", RegexOptions.IgnoreCase, new string[] { "\u0069" } };
+            }
          }
  
          public static IEnumerable<object[]> Groups_CustomCulture_TestData_Czech()
          {
-            yield return new object[] { "cs-CZ", "CH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
-            yield return new object[] { "cs-CZ", "cH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
+            {
+                yield return new object[] { engine, "cs-CZ", "CH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
+                yield return new object[] { engine, "cs-CZ", "cH", "Ch", RegexOptions.IgnoreCase, new string[] { "Ch" } };
+            }
          }
  
  
          public static IEnumerable<object[]> Groups_CustomCulture_TestData_Danish()
          {
-            yield return new object[] { "da-DK", "AA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
-            yield return new object[] { "da-DK", "aA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
+            {
+                yield return new object[] { engine, "da-DK", "AA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
+                yield return new object[] { engine, "da-DK", "aA", "Aa", RegexOptions.IgnoreCase, new string[] { "Aa" } };
+            }
          }
  
          public static IEnumerable<object[]> Groups_CustomCulture_TestData_Turkish()
          {
-            yield return new object[] { "tr-TR", "\u0131", "\u0049", RegexOptions.IgnoreCase, new string[] { "\u0049" } };
-            yield return new object[] { "tr-TR", "\u0130", "\u0069", RegexOptions.IgnoreCase, new string[] { "\u0069" } };
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
+            {
+                yield return new object[] { engine, "tr-TR", "\u0131", "\u0049", RegexOptions.IgnoreCase, new string[] { "\u0049" } };
+                yield return new object[] { engine, "tr-TR", "\u0130", "\u0069", RegexOptions.IgnoreCase, new string[] { "\u0069" } };
+            }
          }
  
          public static IEnumerable<object[]> Groups_CustomCulture_TestData_AzeriLatin()
          {
-            if (PlatformDetection.IsNotBrowser)
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
              {
-                yield return new object[] { "az-Latn-AZ", "\u0131", "\u0049", RegexOptions.IgnoreCase, new string[] { "\u0049" } };
-                yield return new object[] { "az-Latn-AZ", "\u0130", "\u0069", RegexOptions.IgnoreCase, new string[] { "\u0069" } };
+                if (PlatformDetection.IsNotBrowser)
+                {
+                    yield return new object[] { engine, "az-Latn-AZ", "\u0131", "\u0049", RegexOptions.IgnoreCase, new string[] { "\u0049" } };
+                    yield return new object[] { engine, "az-Latn-AZ", "\u0130", "\u0069", RegexOptions.IgnoreCase, new string[] { "\u0069" } };
+                }
              }
          }
  
@@ -909,7 +927,7 @@ namespace System.Text.RegularExpressions.Tests
          [MemberData(nameof(Groups_CustomCulture_TestData_AzeriLatin))]
          [ActiveIssue("https://github.com/dotnet/runtime/issues/56407", TestPlatforms.Android)]
          [ActiveIssue("https://github.com/dotnet/runtime/issues/36900", TestPlatforms.iOS | TestPlatforms.tvOS | TestPlatforms.MacCatalyst)]
-        public async Task Groups(string cultureName, string pattern, string input, RegexOptions options, string[] expectedGroups, string altMatch = null)
+        public async Task Groups(RegexEngine engine, string cultureName, string pattern, string input, RegexOptions options, string[] expectedGroups, string altMatch = null)
          {
              if (cultureName is null)
              {
@@ -917,59 +935,50 @@ namespace System.Text.RegularExpressions.Tests
                  cultureName = culture.Equals(CultureInfo.InvariantCulture) ? "en-US" : culture.Name;
              }
  
-            using (new ThreadCultureChange(cultureName))
-            {
-                foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
-                {
-                    // Alternative altMatch when order of alternations matters in backtracking but order does not matter in NonBacktracking mode
-                    // Also in NonBacktracking there is only a single top-level match, which is expectedGroups[0] when altMatch is null
-                    string[] expected = engine == RegexEngine.NonBacktracking ?
-                        new string[] { altMatch ?? expectedGroups[0] } :
-                        expectedGroups;
+            // Alternative altMatch when order of alternations matters in backtracking but order does not matter in NonBacktracking mode
+            // Also in NonBacktracking there is only a single top-level match, which is expectedGroups[0] when altMatch is null
+            expectedGroups = engine == RegexEngine.NonBacktracking ?
+                new string[] { altMatch ?? expectedGroups[0] } :
+                expectedGroups;
  
-                    await GroupsAsync(engine, pattern, input, options, expected);
-                }
+            if (engine == RegexEngine.NonBacktracking && pattern.Contains("?(cat)"))
+            {
+                // General if-then-else construct is not supported and uses the ?(cat) condition in the tests
+                // TODO-NONBACKTRACKING: The constructor will throw NotSupportedException so this check will become obsolete
+                return;
              }
  
-            static async Task GroupsAsync(RegexEngine engine, string pattern, string input, RegexOptions options, string[] expectedGroups)
+            using var _ = new ThreadCultureChange(cultureName);
+
+            Regex regex;
+            try
              {
-                if (engine == RegexEngine.NonBacktracking && pattern.Contains("?(cat)"))
-                {
-                    // General if-then-else construct is not supported and uses the ?(cat) condition in the tests
-                    // TODO-NONBACKTRACKING: The constructor will throw NotSupportedException so this check will become obsolete
-                    return;
-                }
+                regex = await RegexHelpers.GetRegexAsync(engine, pattern, options);
+            }
+            catch (NotSupportedException) when (RegexHelpers.IsNonBacktracking(engine))
+            {
+                // Some constructs are not supported in NonBacktracking mode, such as: if-then-else, lookaround, and backreferences
+                return;
+            }
  
-                Regex regex;
-                try
-                {
-                    regex = await RegexHelpers.GetRegexAsync(engine, pattern, options);
-                }
-                catch (NotSupportedException) when (RegexHelpers.IsNonBacktracking(engine))
-                {
-                    // Some constructs are not supported in NonBacktracking mode, such as: if-then-else, lookaround, and backreferences
-                    return;
-                }
+            Match match = regex.Match(input);
  
-                Match match = regex.Match(input);
+            Assert.True(match.Success);
+            Assert.Equal(expectedGroups[0], match.Value);
  
-                Assert.True(match.Success);
-                Assert.Equal(expectedGroups[0], match.Value);
+            if (!RegexHelpers.IsNonBacktracking(engine))
+            {
+                Assert.Equal(expectedGroups.Length, match.Groups.Count);
  
-                if (!RegexHelpers.IsNonBacktracking(engine))
+                int[] groupNumbers = regex.GetGroupNumbers();
+                string[] groupNames = regex.GetGroupNames();
+                for (int i = 0; i < expectedGroups.Length; i++)
                  {
-                    Assert.Equal(expectedGroups.Length, match.Groups.Count);
-
-                    int[] groupNumbers = regex.GetGroupNumbers();
-                    string[] groupNames = regex.GetGroupNames();
-                    for (int i = 0; i < expectedGroups.Length; i++)
-                    {
-                        Assert.Equal(expectedGroups[i], match.Groups[groupNumbers[i]].Value);
-                        Assert.Equal(match.Groups[groupNumbers[i]], match.Groups[groupNames[i]]);
-
-                        Assert.Equal(groupNumbers[i], regex.GroupNumberFromName(groupNames[i]));
-                        Assert.Equal(groupNames[i], regex.GroupNameFromNumber(groupNumbers[i]));
-                    }
+                    Assert.Equal(expectedGroups[i], match.Groups[groupNumbers[i]].Value);
+                    Assert.Equal(match.Groups[groupNumbers[i]], match.Groups[groupNames[i]]);
+
+                    Assert.Equal(groupNumbers[i], regex.GroupNumberFromName(groupNames[i]));
+                    Assert.Equal(groupNames[i], regex.GroupNameFromNumber(groupNumbers[i]));
                  }
              }
          }
diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs

index e7f0b4d..5da3f4f 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs
@@ -239,6 +239,8 @@ namespace System.Text.RegularExpressions.Tests
                  {
                      yield return ("aaa(?i:match this)bbb", "aaaMaTcH ThIsbbb", RegexOptions.None, 0, 16, true, "aaaMaTcH ThIsbbb");
                  }
+                yield return ("(?i:a)b(?i:c)d", "aaaaAbCdddd", RegexOptions.None, 0, 11, true, "AbCd");
+                yield return ("(?i:[\u0000-\u1000])[Bb]", "aaaaAbCdddd", RegexOptions.None, 0, 11, true, "Ab");
  
                  // Turning off case insensitive option in mid-pattern : Actual - "aaa(?-i:match this)bbb", "i"
                  yield return ("aAa(?-i:match this)bbb", "AaAmatch thisBBb", RegexOptions.IgnoreCase, 0, 16, true, "AaAmatch thisBBb");
@@ -274,6 +276,8 @@ namespace System.Text.RegularExpressions.Tests
                  yield return (@"\p{Ll}", "1bc", RegexOptions.IgnoreCase, 0, 3, true, "b");
                  yield return (@"\p{Lt}", "1bc", RegexOptions.IgnoreCase, 0, 3, true, "b");
                  yield return (@"\p{Lo}", "1bc", RegexOptions.IgnoreCase, 0, 3, false, string.Empty);
+                yield return (".[abc]", "xYZAbC", RegexOptions.IgnoreCase, 0, 6, true, "ZA");
+                yield return (".[abc]", "xYzXyZx", RegexOptions.IgnoreCase, 0, 6, false, "");
  
                  // "\D+"
                  yield return (@"\D+", "12321", RegexOptions.None, 0, 5, false, string.Empty);
@@ -360,7 +364,6 @@ namespace System.Text.RegularExpressions.Tests
                      yield return ("(?>(?:a|ab|abc|abcd))d", "abcd", RegexOptions.RightToLeft, 0, 4, true, "abcd");
                  }
                  yield return ("[^a-z0-9]etag|[^a-z0-9]digest", "this string has .digest as a substring", RegexOptions.None, 16, 7, true, ".digest");
-                yield return (@"a\w*a|def", "aaaaa", RegexOptions.None, 0, 5, true, "aaaaa");
  
                  // No Negation
                  yield return ("[abcd-[abcd]]+", "abcxyzABCXYZ`!@#$%^&*()_-+= \t\n", RegexOptions.None, 0, 30, false, string.Empty);
@@ -516,6 +519,8 @@ namespace System.Text.RegularExpressions.Tests
                      yield return (@".*\dFoo", "This1foo should 2FoO match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 26, true, "This1foo should 2FoO");
                      yield return (@".*\dFoo", "This1Foo should 2fOo match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 26, true, "This1Foo should 2fOo");
                      yield return (@".*\dfoo", "1fooThis2FOO should 1foo match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 8, 4, true, "2FOO");
+                    yield return (@"[\w\s].*", "1fooThis2FOO should 1foo match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 30, true, "1fooThis2FOO should 1foo match");
+                    yield return (@"i.*", "1fooThis2FOO should 1foo match", RegexOptions.IgnoreCase | RegexOptions.RightToLeft, 0, 30, true, "is2FOO should 1foo match");
                  }
  
                  // [ActiveIssue("https://github.com/dotnet/runtime/issues/36149")]
@@ -537,6 +542,29 @@ namespace System.Text.RegularExpressions.Tests
                  //    yield return (@"^(?i:[\u24B6-\u24D0])$", ((char)('\u24CF' + 26)).ToString(), RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, 0, 1, true, ((char)('\u24CF' + 26)).ToString());
                  //}
  
+                // Long inputs
+                string longCharacterRange = string.Concat(Enumerable.Range(1, 0x2000).Select(c => (char)c));
+                foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.IgnoreCase })
+                {
+                    yield return ("\u1000", longCharacterRange, options, 0, 0x2000, true, "\u1000");
+                    yield return ("[\u1000-\u1001]", longCharacterRange, options, 0, 0x2000, true, "\u1000");
+                    yield return ("[\u0FF0-\u0FFF][\u1000-\u1001]", longCharacterRange, options, 0, 0x2000, true, "\u0FFF\u1000");
+
+                    yield return ("\uA640", longCharacterRange, options, 0, 0x2000, false, "");
+                    yield return ("[\u3000-\u3001]", longCharacterRange, options, 0, 0x2000, false, "");
+                    yield return ("[\uA640-\uA641][\u3000-\u3010]", longCharacterRange, options, 0, 0x2000, false, "");
+
+                    if (!RegexHelpers.IsNonBacktracking(engine))
+                    {
+                        yield return ("\u1000", longCharacterRange, options | RegexOptions.RightToLeft, 0, 0x2000, true, "\u1000");
+                        yield return ("[\u1000-\u1001]", longCharacterRange, options | RegexOptions.RightToLeft, 0, 0x2000, true, "\u1001");
+                        yield return ("[\u1000][\u1001-\u1010]", longCharacterRange, options, 0, 0x2000, true, "\u1000\u1001");
+
+                        yield return ("\uA640", longCharacterRange, options | RegexOptions.RightToLeft, 0, 0x2000, false, "");
+                        yield return ("[\u3000-\u3001][\uA640-\uA641]", longCharacterRange, options | RegexOptions.RightToLeft, 0, 0x2000, false, "");
+                    }
+                }
+
                  foreach (RegexOptions options in new[] { RegexOptions.None, RegexOptions.Singleline })
                  {
                      yield return (@"\W.*?\D", "seq 012 of 3 digits", options, 0, 19, true, " 012 ");
@@ -1283,13 +1311,11 @@ namespace System.Text.RegularExpressions.Tests
  
                  // Repeaters
                  Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{2147483647,}")).IsMatch("a"));
-                Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in debug
-                Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{51,}")).IsMatch("a"));
+                Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50,}")).IsMatch("a"));
                  Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50_000,}")).IsMatch("a")); // cutoff for Boyer-Moore prefix in release
-                Assert.False((await RegexHelpers.GetRegexAsync(engine, @"a{50_001,}")).IsMatch("a"));
  
                  // Multis
-                foreach (int length in new[] { 50, 51, 50_000, 50_001, char.MaxValue + 1 }) // based on knowledge of cut-offs used in Boyer-Moore
+                foreach (int length in new[] { 50, 50_000, char.MaxValue + 1 })
                  {
                      // The large counters are too slow for counting a's in NonBacktracking engine
                      // They will incur a constant of size length because in .*a{k} after reading n a's the
diff --git a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs

index b325f1c..e1792c6 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs
@@ -48,11 +48,11 @@ namespace System.Text.RegularExpressions.Tests
              return start == 0;
          }
  
-        public static Regex CreateRegexInCulture(string pattern, RegexOptions options, Globalization.CultureInfo culture)
+        public static async Task<Regex> GetRegexAsync(RegexEngine engine, string pattern, RegexOptions options, Globalization.CultureInfo culture)
          {
              using (new System.Tests.ThreadCultureChange(culture))
              {
-                return new Regex(pattern, options);
+                return await GetRegexAsync(engine, pattern, options);
              }
          }
  
@@ -116,7 +116,7 @@ namespace System.Text.RegularExpressions.Tests
              // - Handle NonBacktrackingSourceGenerated
  
              return
-                options is null ? new Regex(pattern, RegexOptions.Compiled | OptionsFromEngine(engine)) :
+                options is null ? new Regex(pattern, OptionsFromEngine(engine)) :
                  matchTimeout is null ? new Regex(pattern, options.Value | OptionsFromEngine(engine)) :
                  new Regex(pattern, options.Value | OptionsFromEngine(engine), matchTimeout.Value);
          }
@@ -136,7 +136,7 @@ namespace System.Text.RegularExpressions.Tests
              {
                  (string pattern, RegexOptions? options, TimeSpan? matchTimeout) = regexes[i];
                  results[i] =
-                    options is null ? new Regex(pattern, RegexOptions.Compiled | OptionsFromEngine(engine)) :
+                    options is null ? new Regex(pattern, OptionsFromEngine(engine)) :
                      matchTimeout is null ? new Regex(pattern, options.Value | OptionsFromEngine(engine)) :
                      new Regex(pattern, options.Value | OptionsFromEngine(engine), matchTimeout.Value);
              }
diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs

index adcde90..028afab 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs
@@ -12,62 +12,43 @@ namespace System.Text.RegularExpressions.Tests
  {
      public class RegexCultureTests
      {
-        // TODO: Validate source generator after figuring out what to do with culture
-
-        public static IEnumerable<RegexOptions> RegexOptionsExtended()
-        {
-            yield return RegexOptions.None;
-            yield return RegexOptions.Compiled;
-            if (PlatformDetection.IsNetCore)
-            {
-                yield return RegexHelpers.RegexOptionNonBacktracking;
-            }
-        }
-
-        public static IEnumerable<object[]> RegexOptionsExtended_MemberData() =>
-            from options in RegexOptionsExtended()
-            select new object[] { options };
-
          public static IEnumerable<object[]> CharactersComparedOneByOne_AnchoredPattern_TestData()
          {
-            foreach (RegexOptions options in RegexOptionsExtended())
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
              {
-                yield return new object[] { "^aa$", "aA", "da-DK", options, false };
-                yield return new object[] { "^aA$", "aA", "da-DK", options, true };
-                yield return new object[] { "^aa$", "aA", "da-DK", options | RegexOptions.IgnoreCase, true };
-                yield return new object[] { "^aA$", "aA", "da-DK", options | RegexOptions.IgnoreCase, true };
+                yield return new object[] { engine, "^aa$", "aA", "da-DK", RegexOptions.None, false };
+                yield return new object[] { engine, "^aA$", "aA", "da-DK", RegexOptions.None, true };
+                yield return new object[] { engine, "^aa$", "aA", "da-DK", RegexOptions.IgnoreCase, true };
+                yield return new object[] { engine, "^aA$", "aA", "da-DK", RegexOptions.IgnoreCase, true };
              }
          }
  
          [Theory]
          [MemberData(nameof(CharactersComparedOneByOne_AnchoredPattern_TestData))]
-        public void CharactersComparedOneByOne_AnchoredPattern(string pattern, string input, string culture, RegexOptions options, bool expected)
+        public async Task CharactersComparedOneByOne_AnchoredPattern(RegexEngine engine, string pattern, string input, string culture, RegexOptions options, bool expected)
          {
              // Regex compares characters one by one.  If that changes, it could impact the behavior of
              // a case like this, where these characters are not the same, but the strings compare
              // as equal with the invariant culture (and some other cultures as well).
              using (new ThreadCultureChange(culture))
              {
-                foreach (RegexOptions compiled in new[] { RegexOptions.None, RegexOptions.Compiled })
-                {
-                    Assert.Equal(expected, new Regex(pattern, options | compiled).IsMatch(input));
-                }
+                Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
+                Assert.Equal(expected, r.IsMatch(input));
              }
          }
  
-
          public static IEnumerable<object[]> CharactersComparedOneByOne_Invariant_TestData()
          {
-            foreach (RegexOptions options in RegexOptionsExtended())
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
              {
-                yield return new object[] { options };
-                yield return new object[] { options | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant };
+                yield return new object[] { engine, RegexOptions.None };
+                yield return new object[] { engine, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant };
              }
          }
  
          [Theory]
          [MemberData(nameof(CharactersComparedOneByOne_Invariant_TestData))]
-        public void CharactersComparedOneByOne_Invariant(RegexOptions options)
+        public async Task CharactersComparedOneByOne_Invariant(RegexEngine engine, RegexOptions options)
          {
              // Regex compares characters one by one.  If that changes, it could impact the behavior of
              // a case like this, where these characters are not the same, but the strings compare
@@ -88,35 +69,20 @@ namespace System.Text.RegularExpressions.Tests
                  string input = string.Concat(Enumerable.Repeat(S2, multiple));
                  Regex r;
  
-                // Validate when the string is at the beginning of the pattern, as it impacts Boyer-Moore prefix matching.
-                r = new Regex(pattern, options);
+                // Validate when the string is at the beginning of the pattern, as it impacts prefix matching.
+                r = await RegexHelpers.GetRegexAsync(engine, pattern, options);
                  Assert.False(r.IsMatch(input));
                  Assert.True(r.IsMatch(pattern));
  
                  // Validate when it's not at the beginning of the pattern, as it impacts "multi" matching.
-                r = new Regex("[abc]" + pattern, options);
+                r = await RegexHelpers.GetRegexAsync(engine, "[abc]" + pattern, options);
                  Assert.False(r.IsMatch("a" + input));
                  Assert.True(r.IsMatch("a" + pattern));
              }
          }
  
-        public static IEnumerable<object[]> CharactersLowercasedOneByOne_MemberData()
-        {
-            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
-            {
-                switch (engine)
-                {
-                    case RegexEngine.SourceGenerated:
-                    case RegexEngine.NonBacktrackingSourceGenerated:
-                        continue;
-                }
-
-                yield return new object[] { engine };
-            }
-        }
-
          [Theory]
-        [MemberData(nameof(CharactersLowercasedOneByOne_MemberData))]
+        [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
          public async Task CharactersLowercasedOneByOne(RegexEngine engine)
          {
              using (new ThreadCultureChange("en-US"))
@@ -191,15 +157,15 @@ namespace System.Text.RegularExpressions.Tests
          [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Doesn't support NonBacktracking")]
          [Fact]
          [ActiveIssue("https://github.com/dotnet/runtime/issues/60568", TestPlatforms.Android)]
-        public void TurkishI_Is_Differently_LowerUpperCased_In_Turkish_Culture_NonBacktracking()
+        public async Task TurkishI_Is_Differently_LowerUpperCased_In_Turkish_Culture_NonBacktracking()
          {
              var turkish = new CultureInfo("tr-TR");
              string input = "I\u0131\u0130i";
  
              // Use the input as the regex also
              // Ignore the Compiled option here because it is a noop in combination with NonBacktracking 
-            Regex cultInvariantRegex = RegexHelpers.CreateRegexInCulture(input, RegexHelpers.RegexOptionNonBacktracking | RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, CultureInfo.InvariantCulture);
-            Regex turkishRegex = RegexHelpers.CreateRegexInCulture(input, RegexHelpers.RegexOptionNonBacktracking | RegexOptions.IgnoreCase, turkish);
+            Regex cultInvariantRegex = await RegexHelpers.GetRegexAsync(RegexEngine.NonBacktracking, input, RegexOptions.IgnoreCase | RegexOptions.CultureInvariant, CultureInfo.InvariantCulture);
+            Regex turkishRegex = await RegexHelpers.GetRegexAsync(RegexEngine.NonBacktracking, input, RegexOptions.IgnoreCase, turkish);
  
              Assert.True(cultInvariantRegex.IsMatch(input));
              Assert.True(turkishRegex.IsMatch(input));    // <---------- This result differs from the result in the previous test!!!
@@ -220,60 +186,70 @@ namespace System.Text.RegularExpressions.Tests
              Assert.True(turkishRegex.IsMatch(input.ToUpper(turkish)));
          }
  
-        [ActiveIssue("Incorrect handling of IgnoreCase over intervals in Turkish Culture, https://github.com/dotnet/runtime/issues/58958")]
-        [Fact]
-        public void TurkishCulture_Handling_Of_IgnoreCase()
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/58958")]
+        [Theory]
+        [MemberData(nameof(RegexHelpers.AvailableEngines_MemberData), MemberType = typeof(RegexHelpers))]
+        public async Task TurkishCulture_Handling_Of_IgnoreCase(RegexEngine engine)
          {
              var turkish = new CultureInfo("tr-TR");
              string input = "I\u0131\u0130i";
              string pattern = "[H-J][\u0131-\u0140][\u0120-\u0130][h-j]";
  
-            Regex regex = RegexHelpers.CreateRegexInCulture(pattern, RegexOptions.IgnoreCase, turkish);
+            Regex regex = await RegexHelpers.GetRegexAsync(engine, pattern, RegexOptions.IgnoreCase, turkish);
  
              // The pattern must trivially match the input because all of the letters fall in the given intervals
              // Ignoring case can only add more letters here -- not REMOVE letters
              Assert.True(regex.IsMatch(input));
          }
  
-        [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework, "Doesn't support NonBacktracking")]
-        [Fact]
-        public void TurkishCulture_Handling_Of_IgnoreCase_NonBacktracking()
+        public static IEnumerable<object[]> TurkishCulture_MatchesWordChar_MemberData()
          {
-            var turkish = new CultureInfo("tr-TR");
-            string input = "I\u0131\u0130i";
-            string pattern = "[H-J][\u0131-\u0140][\u0120-\u0130][h-j]";
-
-            Regex regex = RegexHelpers.CreateRegexInCulture(pattern, RegexOptions.IgnoreCase | RegexHelpers.RegexOptionNonBacktracking, turkish);
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
+            {
+                yield return new object[] { engine, "I\u0131\u0130i", RegexOptions.None, "I\u0131\u0130i" };
+                yield return new object[] { engine, "I\u0131\u0130i", RegexOptions.IgnoreCase, "I\u0131\u0130i" };
+                if (!RegexHelpers.IsNonBacktracking(engine))
+                {
+                    yield return new object[] { engine, "I\u0131\u0130i", RegexOptions.IgnoreCase | RegexOptions.ECMAScript, "" };
+                }
+            }
+        }
  
-            // The pattern must trivially match the input because all of the letters fall in the given intervals
-            // Ignoring case can only add more letters here -- not REMOVE letters
-            Assert.True(regex.IsMatch(input));
+        [Theory]
+        [MemberData(nameof(TurkishCulture_MatchesWordChar_MemberData))]
+        public async Task TurkishCulture_MatchesWordChar(RegexEngine engine, string input, RegexOptions options, string expectedResult)
+        {
+            using (new ThreadCultureChange(new CultureInfo("tr-TR")))
+            {
+                Regex regex = await RegexHelpers.GetRegexAsync(engine, @"\w*", options);
+                Assert.Equal(expectedResult, regex.Match(input).Value);
+            }
          }
  
          public static IEnumerable<object[]> Match_In_Different_Cultures_TestData()
          {
              CultureInfo invariant = CultureInfo.InvariantCulture;
-            CultureInfo current = CultureInfo.CurrentCulture;
+            CultureInfo enUS = new CultureInfo("en-US");
              CultureInfo turkish = new CultureInfo("tr-TR");
  
-            foreach (RegexOptions options in RegexOptionsExtended())
+            foreach (RegexEngine engine in RegexHelpers.AvailableEngines)
              {
                  // \u0130 (Turkish I with dot) and \u0131 (Turkish i without dot) are unrelated characters in general
  
                  // Expected answers in the default en-US culture
-                yield return new object[] { "(?i:I)", options, current, "xy\u0131ab", "" };
-                yield return new object[] { "(?i:iI+)", options, current, "abcIIIxyz", "III" };
-                yield return new object[] { "(?i:iI+)", options, current, "abcIi\u0130xyz", "Ii\u0130" };
-                yield return new object[] { "(?i:iI+)", options, current, "abcI\u0130ixyz", "I\u0130i" };
-                yield return new object[] { "(?i:iI+)", options, current, "abc\u0130IIxyz", "\u0130II" };
-                yield return new object[] { "(?i:iI+)", options, current, "abc\u0130\u0131Ixyz", "" };
-                yield return new object[] { "(?i:iI+)", options, current, "abc\u0130Iixyz", "\u0130Ii" };
-                yield return new object[] { "(?i:[^IJKLM]I)", options, current, "ii\u0130i\u0131ab", "" };
+                yield return new object[] { "(?i:I)", engine, enUS, "xy\u0131ab", "" };
+                yield return new object[] { "(?i:iI+)", engine, enUS, "abcIIIxyz", "III" };
+                yield return new object[] { "(?i:iI+)", engine, enUS, "abcIi\u0130xyz", "Ii\u0130" };
+                yield return new object[] { "(?i:iI+)", engine, enUS, "abcI\u0130ixyz", "I\u0130i" };
+                yield return new object[] { "(?i:iI+)", engine, enUS, "abc\u0130IIxyz", "\u0130II" };
+                yield return new object[] { "(?i:iI+)", engine, enUS, "abc\u0130\u0131Ixyz", "" };
+                yield return new object[] { "(?i:iI+)", engine, enUS, "abc\u0130Iixyz", "\u0130Ii" };
+                yield return new object[] { "(?i:[^IJKLM]I)", engine, enUS, "ii\u0130i\u0131ab", "" };
  
                  // Expected answers in the invariant culture
-                yield return new object[] { "(?i:I)", options, invariant, "xy\u0131ab", "" };
-                yield return new object[] { "(?i:iI+)", options, invariant, "abcIIIxyz", "III" };
-                yield return new object[] { "(?i:iI+)", options, invariant, "abc\u0130\u0131Ixyz", "" };
+                yield return new object[] { "(?i:I)", engine, invariant, "xy\u0131ab", "" };
+                yield return new object[] { "(?i:iI+)", engine, invariant, "abcIIIxyz", "III" };
+                yield return new object[] { "(?i:iI+)", engine, invariant, "abc\u0130\u0131Ixyz", "" };
  
                  // Expected answers in the Turkish culture
                  //
@@ -281,17 +257,17 @@ namespace System.Text.RegularExpressions.Tests
                  // https://github.com/dotnet/runtime/issues/60568
                  if (!PlatformDetection.IsAndroid)
                  {
-                    yield return new object[] { "(?i:I)", options, turkish, "xy\u0131ab", "\u0131" };
-                    yield return new object[] { "(?i:iI+)", options, turkish, "abcIIIxyz", "" };
-                    yield return new object[] { "(?i:iI+)", options, turkish, "abcIi\u0130xyz", "" };
-                    yield return new object[] { "(?i:iI+)", options, turkish, "abcI\u0130ixyz", "" };
-                    yield return new object[] { "(?i:[^IJKLM]I)", options, turkish, "ii\u0130i\u0131ab", "i\u0131" };
+                    yield return new object[] { "(?i:I)", engine, turkish, "xy\u0131ab", "\u0131" };
+                    yield return new object[] { "(?i:iI+)", engine, turkish, "abcIIIxyz", "" };
+                    yield return new object[] { "(?i:iI+)", engine, turkish, "abcIi\u0130xyz", "" };
+                    yield return new object[] { "(?i:iI+)", engine, turkish, "abcI\u0130ixyz", "" };
+                    yield return new object[] { "(?i:[^IJKLM]I)", engine, turkish, "ii\u0130i\u0131ab", "i\u0131" };
                  }
  
                  // None and Compiled are separated into the Match_In_Different_Cultures_CriticalCases test
-                if (options == RegexHelpers.RegexOptionNonBacktracking)
+                if (RegexHelpers.IsNonBacktracking(engine))
                  {
-                    foreach (object[] data in Match_In_Different_Cultures_CriticalCases_TestData_For(options))
+                    foreach (object[] data in Match_In_Different_Cultures_CriticalCases_TestData_For(engine))
                      {
                          yield return data;
                      }
@@ -299,39 +275,39 @@ namespace System.Text.RegularExpressions.Tests
              }
          }
  
-        public static IEnumerable<object[]> Match_In_Different_Cultures_CriticalCases_TestData_For(RegexOptions options)
+        public static IEnumerable<object[]> Match_In_Different_Cultures_CriticalCases_TestData_For(RegexEngine engine)
          {
              CultureInfo invariant = CultureInfo.InvariantCulture;
              CultureInfo turkish = new CultureInfo("tr-TR");
  
              // Expected answers in the invariant culture
-            yield return new object[] { "(?i:iI+)", options, invariant, "abcIi\u0130xyz", "Ii" };               // <-- failing for None, Compiled
-            yield return new object[] { "(?i:iI+)", options, invariant, "abcI\u0130ixyz", "" };                 // <-- failing for Compiled
-            yield return new object[] { "(?i:iI+)", options, invariant, "abc\u0130IIxyz", "II" };               // <-- failing for Compiled
-            yield return new object[] { "(?i:iI+)", options, invariant, "abc\u0130Iixyz", "Ii" };               // <-- failing for Compiled
-            yield return new object[] { "(?i:[^IJKLM]I)", options, invariant, "ii\u0130i\u0131ab", "\u0130i" }; // <-- failing for None, Compiled
+            yield return new object[] { "(?i:iI+)", engine, invariant, "abcIi\u0130xyz", "Ii" };               // <-- failing for None, Compiled
+            yield return new object[] { "(?i:iI+)", engine, invariant, "abcI\u0130ixyz", "" };                 // <-- failing for Compiled
+            yield return new object[] { "(?i:iI+)", engine, invariant, "abc\u0130IIxyz", "II" };               // <-- failing for Compiled
+            yield return new object[] { "(?i:iI+)", engine, invariant, "abc\u0130Iixyz", "Ii" };               // <-- failing for Compiled
+            yield return new object[] { "(?i:[^IJKLM]I)", engine, invariant, "ii\u0130i\u0131ab", "\u0130i" }; // <-- failing for None, Compiled
  
              // Expected answers in the Turkish culture
              // Android produces unexpected results for tr-TR
              // https://github.com/dotnet/runtime/issues/60568
              if (!PlatformDetection.IsAndroid)
              {
-                yield return new object[] { "(?i:iI+)", options, turkish, "abc\u0130IIxyz", "\u0130II" };           // <-- failing for None, Compiled
-                yield return new object[] { "(?i:iI+)", options, turkish, "abc\u0130\u0131Ixyz", "\u0130\u0131I" }; // <-- failing for None, Compiled
-                yield return new object[] { "(?i:iI+)", options, turkish, "abc\u0130Iixyz", "\u0130I" };            // <-- failing for None, Compiled
+                yield return new object[] { "(?i:iI+)", engine, turkish, "abc\u0130IIxyz", "\u0130II" };           // <-- failing for None, Compiled
+                yield return new object[] { "(?i:iI+)", engine, turkish, "abc\u0130\u0131Ixyz", "\u0130\u0131I" }; // <-- failing for None, Compiled
+                yield return new object[] { "(?i:iI+)", engine, turkish, "abc\u0130Iixyz", "\u0130I" };            // <-- failing for None, Compiled
              }
          }
  
          public static IEnumerable<object[]> Match_In_Different_Cultures_CriticalCases_TestData() =>
-            Match_In_Different_Cultures_CriticalCases_TestData_For(RegexOptions.None).Union(Match_In_Different_Cultures_CriticalCases_TestData_For(RegexOptions.Compiled));
+            Match_In_Different_Cultures_CriticalCases_TestData_For(RegexEngine.Interpreter).Union(Match_In_Different_Cultures_CriticalCases_TestData_For(RegexEngine.Compiled));
  
          [ActiveIssue("https://github.com/dotnet/runtime/issues/60899", TestPlatforms.Browser)]
+        [ActiveIssue("https://github.com/dotnet/runtime/issues/60697", TestPlatforms.iOS | TestPlatforms.tvOS)]
          [Theory]
          [MemberData(nameof(Match_In_Different_Cultures_TestData))]
-        [ActiveIssue("https://github.com/dotnet/runtime/issues/60697", TestPlatforms.iOS | TestPlatforms.tvOS)]
-        public void Match_In_Different_Cultures(string pattern, RegexOptions options, CultureInfo culture, string input, string match_expected)
+        public async Task Match_In_Different_Cultures(string pattern, RegexEngine engine, CultureInfo culture, string input, string match_expected)
          {
-            Regex r = RegexHelpers.CreateRegexInCulture(pattern, options, culture);
+            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, RegexOptions.None, culture);
              Match match = r.Match(input);
              Assert.Equal(match_expected, match.Value);
          }
@@ -339,9 +315,9 @@ namespace System.Text.RegularExpressions.Tests
          [ActiveIssue("Incorrect treatment of IgnoreCase in Turkish and Invariant cultures, https://github.com/dotnet/runtime/issues/58956, https://github.com/dotnet/runtime/issues/58958 ")]
          [Theory]
          [MemberData(nameof(Match_In_Different_Cultures_CriticalCases_TestData))]
-        public void Match_In_Different_Cultures_CriticalCases(string pattern, RegexOptions options, CultureInfo culture, string input, string match_expected)
+        public async Task Match_In_Different_Cultures_CriticalCases(string pattern, RegexEngine engine, CultureInfo culture, string input, string match_expected)
          {
-            Regex r = RegexHelpers.CreateRegexInCulture(pattern, options, culture);
+            Regex r = await RegexHelpers.GetRegexAsync(engine, pattern, RegexOptions.None, culture);
              Match match = r.Match(input);
              Assert.Equal(match_expected, match.Value);
          }
@@ -367,9 +343,8 @@ namespace System.Text.RegularExpressions.Tests
          /// </summary>
          [OuterLoop("May take several seconds due to large number of cultures tested")]
          [SkipOnTargetFramework(TargetFrameworkMonikers.NetFramework)]
-        [Theory]
-        [MemberData(nameof(RegexOptionsExtended_MemberData))]
-        public void TestIgnoreCaseRelation(RegexOptions options)
+        [Fact]
+        public void TestIgnoreCaseRelation()
          {
              // these 22 characters are considered case-insensitive by regex, while they are case-sensitive outside regex
              // but they are only case-sensitive in an asymmmetrical way: tolower(c)=c, tolower(toupper(c)) != c
@@ -380,10 +355,10 @@ namespace System.Text.RegularExpressions.Tests
              {
                  char cU = char.ToUpper(c);
                  Assert.NotEqual(c, cU);
-                Assert.False(Regex.IsMatch(c.ToString(), cU.ToString(), options | RegexOptions.IgnoreCase));
+                Assert.False(Regex.IsMatch(c.ToString(), cU.ToString(), RegexOptions.IgnoreCase));
              }
  
-            Assert.False(Regex.IsMatch(Turkish_i_withoutDot.ToString(), "i", options | RegexOptions.IgnoreCase));
+            Assert.False(Regex.IsMatch(Turkish_i_withoutDot.ToString(), "i", RegexOptions.IgnoreCase));
  
              // as baseline it is assumed the the invariant culture does not change
              HashSet<char>[] inv_table = ComputeIgnoreCaseTable(CultureInfo.InvariantCulture, treatedAsCaseInsensitive);
diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexExperiment.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexExperiment.cs

index ee63395..3f27aab 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/tests/RegexExperiment.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/RegexExperiment.cs
@@ -33,11 +33,14 @@ namespace System.Text.RegularExpressions.Tests
          /// <summary>Output directory for generated dgml files.</summary>
          private static string DgmlOutputDirectoryPath => Path.Combine(s_tmpWorkingDir, "dgml");
  
-        private static string ExperimentDirectoryPath => Path.Combine(s_tmpWorkingDir, "experiments");
-
-        [ConditionalFact(nameof(Enabled))]
+        [Fact]
          public void RegenerateUnicodeTables()
          {
+            if (!Enabled)
+            {
+                return;
+            }
+
              MethodInfo? genUnicode = typeof(Regex).GetMethod("GenerateUnicodeTables", BindingFlags.NonPublic | BindingFlags.Static);
              // GenerateUnicodeTables is not available in Release build
              if (genUnicode is not null)
@@ -46,9 +49,6 @@ namespace System.Text.RegularExpressions.Tests
              }
          }
  
-        private static void WriteOutput(string message) =>
-            File.AppendAllText(OutputFilePath, message);
-
          /// <summary>Save the regex as a DFA in DGML format in the textwriter.</summary>
          private static bool TrySaveDGML(Regex regex, TextWriter writer, int bound = -1, bool hideStateInfo = false, bool addDotStar = false, bool inReverse = false, bool onlyDFAinfo = false, int maxLabelLength = -1, bool asNFA = false)
          {
@@ -85,60 +85,6 @@ namespace System.Text.RegularExpressions.Tests
              }
          }
  
-        /// <summary>
-        /// The intent is that this method is run in realease build for lightweight performance testing.
-        /// One can e.g. open the outputfile in emacs with AUTO-REVERT-ON in order to follow the progress in real time.
-        /// It will print timing info and match info for both DFA, Compiled option and None.
-        /// Place sample regexes in the regexesfile (one per line) and sample input in inputfile.
-        /// It will essentially produce a csv file with the info:
-        /// regexnr, matchtime_DFA, result_DFA, matchtime_Compiled, result_Compiled, matchtime_None, result_None,
-        /// where result_.. is one of
-        ///   Yes(index,length)
-        ///   No
-        ///   TIMEOUT
-        ///   ERROR 
-        ///  and in the case of TIMEOUT or ERROR time is 10000 (the timeout limit of 10sec)
-        /// </summary>
-        [ConditionalFact(nameof(Enabled))]
-        public void TestRunPerformance()
-        {
-            if (!Directory.Exists(ExperimentDirectoryPath))
-            {
-                Directory.CreateDirectory(ExperimentDirectoryPath);
-            }
-
-            string[] dirs = Directory.GetDirectories(ExperimentDirectoryPath);
-            if (dirs.Length == 0)
-            {
-                WriteOutput("\nExperiments directory is empty");
-                return;
-            }
-
-            DirectoryInfo experimentDI = Directory.GetParent(dirs[0]);
-            DirectoryInfo[] experiments =
-                Array.FindAll(experimentDI.GetDirectories(),
-                             di => ((di.Attributes & FileAttributes.Hidden) != (FileAttributes.Hidden)) &&
-                                   Array.Exists(di.GetFiles(), f => f.Name.Equals("regexes.txt")) &&
-                                   Array.Exists(di.GetFiles(), f => f.Name.Equals("input.txt")));
-            if (experiments.Length == 0)
-            {
-                WriteOutput("\nExperiments directory has no indiviual experiment subdirectories containing files 'regexes.txt' and 'input.txt'.");
-                return;
-            }
-
-            for (int i = 0; i < experiments.Length; i++)
-            {
-                string input = File.ReadAllText(Path.Combine(experiments[i].FullName, "input.txt"));
-                string[] rawRegexes = File.ReadAllLines(Path.Combine(experiments[i].FullName, "regexes.txt"));
-
-                WriteOutput($"\n---------- {experiments[i].Name} ----------");
-                for (int r = 0; r < rawRegexes.Length; r++)
-                {
-                    TestRunRegex((r + 1).ToString(), rawRegexes[r], input);
-                }
-            }
-        }
-
          private static long MeasureMatchTime(Regex re, string input, out Match match)
          {
              try
@@ -178,9 +124,14 @@ namespace System.Text.RegularExpressions.Tests
          /// </summary>
          private static string Not(string regex) => $"(?({regex})[0-[0]]|.*)";
  
-        [ConditionalFact(nameof(Enabled))]
+        [Fact]
          public void ViewSampleRegexInDGML()
          {
+            if (!Enabled)
+            {
+                return;
+            }
+
              try
              {
                  //string rawregex = @"\bis\w*\b";
@@ -233,45 +184,6 @@ namespace System.Text.RegularExpressions.Tests
              }
          }
  
-        private void TestRunRegex(string name, string rawregex, string input, bool viewDGML = false, bool dotStar = false)
-        {
-            var reNone = new Regex(rawregex, RegexOptions.None, new TimeSpan(0, 0, 10));
-            var reCompiled = new Regex(rawregex, RegexOptions.Compiled, new TimeSpan(0, 0, 10));
-            var reNonBacktracking = new Regex(rawregex, RegexOptions.NonBacktracking);
-
-            if (viewDGML)
-                ViewDGML(reNonBacktracking, addDotStar: dotStar);
-            WriteOutput($"\n{name}");
-
-            // First call in each case is a warmup
-
-            // None
-            MeasureMatchTime(reNone, input, out _);
-            long tN = MeasureMatchTime(reNone, input, out Match mN);
-            WriteMatchOutput(tN, mN);
-
-            // Compiled
-            MeasureMatchTime(reCompiled, input, out _);
-            long tC = MeasureMatchTime(reCompiled, input, out Match mC);
-            WriteMatchOutput(tC, mC);
-
-            // Non-Backtracking
-            MeasureMatchTime(reNonBacktracking, input, out _);
-            long tD = MeasureMatchTime(reNonBacktracking, input, out Match mD);
-            WriteMatchOutput(tD, mD);
-
-            void WriteMatchOutput(long t, Match m)
-            {
-                WriteOutput(t switch
-                {
-                    -1 => ",10000,TIMEOUT",
-                    -2 => ",10000,ERROR",
-                    _ when m.Success => $",{t},Yes({m.Index}:{m.Length})",
-                    _ => $",{t},No"
-                });
-            }
-        }
-
          #region Tests involving Intersection and Complement
          // Currently only run in DEBUG mode in the NonBacktracking engine
          [ConditionalFact(typeof(PlatformDetection), nameof(PlatformDetection.IsNetCore))]
diff --git a/src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs b/src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs

index 810cbe3..fc62a3d 100644 (file)
--- a/src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs
+++ b/src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs
@@ -370,6 +370,7 @@ namespace System.Text.RegularExpressions.Tests
          [InlineData("(?i:abcde)|(?i:abcdf)", "(?i:abcd[ef])")]
          [InlineData("xyz(?:(?i:abcde)|(?i:abcdf))", "xyz(?i:abcd[ef])")]
          [InlineData("bonjour|hej|ciao|shalom|zdravo|pozdrav|hallo|hola|hello|hey|witam|tere|bonjou|salam|helo|sawubona", "(?>bonjou(?>r|)|h(?>e(?>j|(?>l(?>lo|o)|y))|allo|ola)|ciao|s(?>halom|a(?>lam|wubona))|zdravo|pozdrav|witam|tere)")]
+        [InlineData("\\w\\d123|\\w\\dabc", "\\w\\d(?:123|abc)")]
          // Auto-atomicity
          [InlineData("a*b", "(?>a*)b")]
          [InlineData("a*b+", "(?>a*)b+")]
@@ -391,6 +392,16 @@ namespace System.Text.RegularExpressions.Tests
          [InlineData("(?:w*)+\\.", "(?>w*)+\\.")]
          [InlineData("(a[bcd]e*)*fg", "(a[bcd](?>e*))*fg")]
          [InlineData("(\\w[bcd]\\s*)*fg", "(\\w[bcd](?>\\s*))*fg")]
+        // IgnoreCase set creation
+        [InlineData("(?i)abcd", "[Aa][Bb][Cc][Dd]")]
+        [InlineData("(?i)abcd|efgh", "[Aa][Bb][Cc][Dd]|[Ee][Ff][Gg][Hh]")]
+        [InlineData("(?i)a|b", "[AaBb]")]
+        [InlineData("(?i)[abcd]", "[AaBbCcDd]")]
+        [InlineData("(?i)[acexyz]", "[AaCcEeXxYyZz]")]
+        [InlineData("(?i)\\w", "\\w")]
+        [InlineData("(?i)\\d", "\\d")]
+        [InlineData("(?i).", ".")]
+        [InlineData("(?i)\\$", "\\$")]
          public void PatternsReduceIdentically(string pattern1, string pattern2)
          {
              string result1 = GetRegexCodes(new Regex(pattern1));
@@ -401,10 +412,6 @@ namespace System.Text.RegularExpressions.Tests
              }
  
              Assert.NotEqual(GetRegexCodes(new Regex(pattern1, RegexOptions.RightToLeft)), GetRegexCodes(new Regex(pattern2)));
-            if (!pattern1.Contains("?i:") && !pattern2.Contains("?i:"))
-            {
-                Assert.NotEqual(GetRegexCodes(new Regex(pattern1, RegexOptions.IgnoreCase)), GetRegexCodes(new Regex(pattern2)));
-            }
          }
  
          [Theory]
@@ -450,7 +457,6 @@ namespace System.Text.RegularExpressions.Tests
          // Not reducing branches of alternations with different casing
          [InlineData("(?i:abcd)|abcd", "abcd|abcd")]
          [InlineData("abcd|(?i:abcd)", "abcd|abcd")]
-        [InlineData("abc(?:(?i:e)|f)", "abc[ef]")]
          // Not applying auto-atomicity
          [InlineData("a*b*", "(?>a*)b*")]
          [InlineData("[ab]*[^a]", "(?>[ab]*)[^a]")]
author	Stephen Toub <stoub@microsoft.com>
	Wed, 17 Nov 2021 16:41:12 +0000 (11:41 -0500)
committer	GitHub <noreply@github.com>
	Wed, 17 Nov 2021 16:41:12 +0000 (11:41 -0500)
src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Emitter.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/gen/RegexGenerator.Parser.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/gen/Stubs.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/gen/System.Text.RegularExpressions.Generator.csproj		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System.Text.RegularExpressions.csproj		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.Cache.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Regex.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs	[deleted file]	patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexFindOptimizations.cs	[new file with mode: 0644]	patch \| blob
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexLWCGCompiler.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexParser.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexPrefixAnalyzer.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexWriter.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Algebras/BDD.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexBuilder.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexInfo.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexMatcher.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexNode.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexRunnerFactory.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/SymbolicRegexSampler.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/GeneratorHelper.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/IgnoreCaseRelationGenerator.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Symbolic/Unicode/UnicodeCategoryRangesGenerator.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/src/System/Threading/StackHelper.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/tests/Regex.Groups.Tests.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/tests/Regex.Match.Tests.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/tests/Regex.Tests.Common.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/tests/RegexCultureTests.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/tests/RegexExperiment.cs		patch \| blob \| history
src/libraries/System.Text.RegularExpressions/tests/RegexReductionTests.cs		patch \| blob \| history