Improve Regex performance (mainly interpreted) (#449)
authorStephen Toub <stoub@microsoft.com>
Tue, 3 Dec 2019 00:09:19 +0000 (19:09 -0500)
committerGitHub <noreply@github.com>
Tue, 3 Dec 2019 00:09:19 +0000 (19:09 -0500)
* Remove branches from tight inner interpreter loop in FindFirstChar

* Tweak RegexBoyerMoore.IsMatch

Reduce the checks needed and elimiate unnecessary layers of function calls.

* Remove IsSingleton optimization

This doesn't show up in real regexes and is just adding unnecessary complication to the code.  No one writes `[a-b]`... they just write `a`.  SingletonInverse is more useful, as you can search for any character except for a specific one, e.g. find the first character that's not a dash.

* Cache CharInClass results for ASCII lookups

* Improve codegen in a few places (and a little cleanup)

* Mark RegexInterpreter.SetOperator aggressive inlining

It's small but isn't getting inlined; it's only called in 4 places, but on hot paths, and inlininig it nets around an ~8% throughput win.

src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/Match.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexBoyerMoore.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCharClass.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCode.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexCompiler.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexInterpreter.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexNode.cs
src/libraries/System.Text.RegularExpressions/src/System/Text/RegularExpressions/RegexRunner.cs

index 9c68d11..a29209b 100644 (file)
@@ -81,9 +81,10 @@ namespace System.Text.RegularExpressions
             _textend = textend;
             _textstart = textstart;
 
-            for (int i = 0; i < _matchcount.Length; i++)
+            int[] matchcount = _matchcount;
+            for (int i = 0; i < matchcount.Length; i++)
             {
-                _matchcount[i] = 0;
+                matchcount[i] = 0;
             }
 
             _balancing = false;
@@ -170,21 +171,23 @@ namespace System.Text.RegularExpressions
         internal void AddMatch(int cap, int start, int len)
         {
             _matches[cap] ??= new int[2];
+            int[][] matches = _matches;
 
-            int capcount = _matchcount[cap];
+            int[] matchcount = _matchcount;
+            int capcount = matchcount[cap];
 
-            if (capcount * 2 + 2 > _matches[cap].Length)
+            if (capcount * 2 + 2 > matches[cap].Length)
             {
-                int[] oldmatches = _matches[cap];
+                int[] oldmatches = matches[cap];
                 int[] newmatches = new int[capcount * 8];
                 for (int j = 0; j < capcount * 2; j++)
                     newmatches[j] = oldmatches[j];
-                _matches[cap] = newmatches;
+                matches[cap] = newmatches;
             }
 
-            _matches[cap][capcount * 2] = start;
-            _matches[cap][capcount * 2 + 1] = len;
-            _matchcount[cap] = capcount + 1;
+            matches[cap][capcount * 2] = start;
+            matches[cap][capcount * 2 + 1] = len;
+            matchcount[cap] = capcount + 1;
         }
 
         /*
@@ -204,15 +207,16 @@ namespace System.Text.RegularExpressions
 
             // first see if it is negative, and therefore is a reference to the next available
             // capture group for balancing.  If it is, we'll reset target to point to that capture.
-            if (_matches[cap][target] < 0)
-                target = -3 - _matches[cap][target];
+            int[][] matches = _matches;
+            if (matches[cap][target] < 0)
+                target = -3 - matches[cap][target];
 
             // move back to the previous capture
             target -= 2;
 
             // if the previous capture is a reference, just copy that reference to the end.  Otherwise, point to it.
-            if (target >= 0 && _matches[cap][target] < 0)
-                AddMatch(cap, _matches[cap][target], _matches[cap][target + 1]);
+            if (target >= 0 && matches[cap][target] < 0)
+                AddMatch(cap, matches[cap][target], matches[cap][target + 1]);
             else
                 AddMatch(cap, -3 - target, -4 - target /* == -3 - (target + 1) */ );
         }
@@ -230,7 +234,8 @@ namespace System.Text.RegularExpressions
         /// </summary>
         internal bool IsMatched(int cap)
         {
-            return cap < _matchcount.Length && _matchcount[cap] > 0 && _matches[cap][_matchcount[cap] * 2 - 1] != (-3 + 1);
+            int[] matchcount = _matchcount;
+            return (uint)cap < (uint)matchcount.Length && matchcount[cap] > 0 && _matches[cap][matchcount[cap] * 2 - 1] != (-3 + 1);
         }
 
         /// <summary>
@@ -238,11 +243,13 @@ namespace System.Text.RegularExpressions
         /// </summary>
         internal int MatchIndex(int cap)
         {
-            int i = _matches[cap][_matchcount[cap] * 2 - 2];
+            int[][] matches = _matches;
+
+            int i = matches[cap][_matchcount[cap] * 2 - 2];
             if (i >= 0)
                 return i;
 
-            return _matches[cap][-3 - i];
+            return matches[cap][-3 - i];
         }
 
         /// <summary>
@@ -250,11 +257,13 @@ namespace System.Text.RegularExpressions
         /// </summary>
         internal int MatchLength(int cap)
         {
-            int i = _matches[cap][_matchcount[cap] * 2 - 1];
+            int[][] matches = _matches;
+
+            int i = matches[cap][_matchcount[cap] * 2 - 1];
             if (i >= 0)
                 return i;
 
-            return _matches[cap][-3 - i];
+            return matches[cap][-3 - i];
         }
 
         /// <summary>
@@ -262,11 +271,15 @@ namespace System.Text.RegularExpressions
         /// </summary>
         internal void Tidy(int textpos)
         {
-            int[] interval = _matches[0];
+            int[][] matches = _matches;
+
+            int[] interval = matches[0];
             Index = interval[0];
             Length = interval[1];
             _textpos = textpos;
-            _capcount = _matchcount[0];
+
+            int[] matchcount = _matchcount;
+            _capcount = matchcount[0];
 
             if (_balancing)
             {
@@ -276,13 +289,13 @@ namespace System.Text.RegularExpressions
                 // until we find a balance captures.  Then we check each subsequent entry.  If it's a balance
                 // capture (it's negative), we decrement j.  If it's a real capture, we increment j and copy
                 // it down to the last free position.
-                for (int cap = 0; cap < _matchcount.Length; cap++)
+                for (int cap = 0; cap < matchcount.Length; cap++)
                 {
                     int limit;
                     int[] matcharray;
 
-                    limit = _matchcount[cap] * 2;
-                    matcharray = _matches[cap];
+                    limit = matchcount[cap] * 2;
+                    matcharray = matches[cap];
 
                     int i = 0;
                     int j;
@@ -310,7 +323,7 @@ namespace System.Text.RegularExpressions
                         }
                     }
 
-                    _matchcount[cap] = j / 2;
+                    matchcount[cap] = j / 2;
                 }
 
                 _balancing = false;
index cc427df..c142f5d 100644 (file)
@@ -205,23 +205,6 @@ namespace System.Text.RegularExpressions
             }
         }
 
-        private bool MatchPattern(string text, int index)
-        {
-            if (CaseInsensitive)
-            {
-                if (text.Length - index < Pattern.Length)
-                {
-                    return false;
-                }
-
-                return (0 == string.Compare(Pattern, 0, text, index, Pattern.Length, CaseInsensitive, _culture));
-            }
-            else
-            {
-                return (0 == string.CompareOrdinal(Pattern, 0, text, index, Pattern.Length));
-            }
-        }
-
         /// <summary>
         /// When a regex is anchored, we can do a quick IsMatch test instead of a Scan
         /// </summary>
@@ -231,16 +214,21 @@ namespace System.Text.RegularExpressions
             {
                 if (index < beglimit || endlimit - index < Pattern.Length)
                     return false;
-
-                return MatchPattern(text, index);
             }
             else
             {
                 if (index > endlimit || index - beglimit < Pattern.Length)
                     return false;
 
-                return MatchPattern(text, index - Pattern.Length);
+                index -= Pattern.Length;
             }
+
+            if (CaseInsensitive)
+            {
+                return string.Compare(Pattern, 0, text, index, Pattern.Length, ignoreCase: true, _culture) == 0;
+            }
+
+            return Pattern.AsSpan().SequenceEqual(text.AsSpan(index, Pattern.Length));
         }
 
         /// <summary>
index 26e4496..2c66ad6 100644 (file)
@@ -5,6 +5,7 @@
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Globalization;
+using System.Threading;
 
 namespace System.Text.RegularExpressions
 {
@@ -732,7 +733,7 @@ namespace System.Text.RegularExpressions
         /// </summary>
         public static char SingletonChar(string set)
         {
-            Debug.Assert(IsSingleton(set) || IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class");
+            Debug.Assert(IsSingletonInverse(set), "Tried to get the singleton char out of a non singleton character class");
             return set[SetStartIndex];
         }
 
@@ -747,14 +748,6 @@ namespace System.Text.RegularExpressions
             !IsNegated(charClass) &&
             !IsSubtraction(charClass);
 
-        /// <summary><c>true</c> if the set contains a single character only</summary>
-        public static bool IsSingleton(string set) =>
-            set[CategoryLengthIndex] == 0 &&
-            set[SetLengthIndex] == 2 &&
-            !IsNegated(set) &&
-            !IsSubtraction(set) &&
-            (set[SetStartIndex] == LastChar || set[SetStartIndex] + 1 == set[SetStartIndex + 1]);
-
         public static bool IsSingletonInverse(string set) =>
             set[CategoryLengthIndex] == 0 &&
             set[SetLengthIndex] == 2 &&
@@ -823,6 +816,68 @@ namespace System.Text.RegularExpressions
             }
         }
 
+        public static bool CharInClass(char ch, string set, ref int[]? asciiResultCache)
+        {
+            // The int[] contains 8 ints, or 256 bits.  These are laid out as pairs, where the first bit ("known") in the pair
+            // says whether the second bit ("value") in the pair has already been computed.  Once a value is computed, it's never
+            // changed, so since Int32s are written/read atomically, we can trust the value bit if we see that the known bit
+            // has been set.  If the known bit hasn't been set, then we proceed to look it up, and then swap in the result.
+            const int CacheArrayLength = 8;
+            Debug.Assert(asciiResultCache is null || asciiResultCache.Length == CacheArrayLength, "set lookup should be able to store two bits for each of the first 128 characters");
+
+            if (ch < 128)
+            {
+                // Lazily-initialize the cache for this set.
+                if (asciiResultCache is null)
+                {
+                    Interlocked.CompareExchange(ref asciiResultCache, new int[CacheArrayLength], null);
+                }
+
+                // Determine which int in the lookup array contains the known and value bits for this character,
+                // and compute their bit numbers.
+                ref int slot = ref asciiResultCache[ch >> 4];
+                int knownBit = 1 << ((ch & 0xF) << 1);
+                int valueBit = knownBit << 1;
+
+                // If the value for this bit has already been computed, use it.
+                int current = slot;
+                if ((current & knownBit) != 0)
+                {
+                    return (current & valueBit) != 0;
+                }
+
+                // (After warm-up, we should find ourselves rarely getting here.)
+
+                // Otherwise, compute it normally.
+                bool isInClass = CharInClass(ch, set);
+
+                // Determine which bits to write back to the array.
+                int bitsToSet = knownBit;
+                if (isInClass)
+                {
+                    bitsToSet |= valueBit;
+                }
+
+                // "or" the bits back in a thread-safe manner.
+                while (true)
+                {
+                    int oldValue = Interlocked.CompareExchange(ref slot, current | bitsToSet, current);
+                    if (oldValue == current)
+                    {
+                        break;
+                    }
+
+                    current = oldValue;
+                }
+
+                // Return the computed value.
+                return isInClass;
+            }
+
+            // Non-ASCII.  Fall back to computing the answer.
+            return CharInClassRecursive(ch, set, 0);
+        }
+
         public static bool CharInClass(char ch, string set) =>
             CharInClassRecursive(ch, set, 0);
 
index 818a058..33ef2f0 100644 (file)
@@ -18,7 +18,6 @@
 using System.Collections;
 using System.Collections.Generic;
 using System.Diagnostics;
-using System.Globalization;
 
 namespace System.Text.RegularExpressions
 {
@@ -91,10 +90,12 @@ namespace System.Text.RegularExpressions
 
         public readonly int[] Codes;                     // the code
         public readonly string[] Strings;                // the string/set table
+        public readonly int[]?[] StringsAsciiLookup;     // the ASCII lookup table optimization for the sets in Strings
         public readonly int TrackCount;                  // how many instructions use backtracking
         public readonly Hashtable? Caps;                 // mapping of user group numbers -> impl group slots
         public readonly int CapSize;                     // number of impl group slots
         public readonly RegexPrefix? FCPrefix;           // the set of candidate first characters (may be null)
+        public int[]? FCPrefixAsciiLookup;               // the ASCII lookup table optimization for the set of candidate first characters if there are any
         public readonly RegexBoyerMoore? BMPrefix;       // the fixed prefix string as a Boyer-Moore machine (may be null)
         public readonly int Anchors;                     // the set of zero-length start anchors (RegexFCD.Bol, etc)
         public readonly bool RightToLeft;                // true if right to left
@@ -109,6 +110,7 @@ namespace System.Text.RegularExpressions
 
             Codes = codes;
             Strings = stringlist.ToArray();
+            StringsAsciiLookup = new int[Strings.Length][];
             TrackCount = trackcount;
             Caps = caps;
             CapSize = capsize;
index 4ece103..1f7fe5d 100644 (file)
@@ -1163,29 +1163,14 @@ namespace System.Text.RegularExpressions
                     CallToLower();
                 }
 
-                if (!RegexCharClass.IsSingleton(_fcPrefix.GetValueOrDefault().Prefix))
-                {
-                    EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, charInClassV);
-                    BrtrueFar(l2);
-                }
-                else
-                {
-                    Ldc(RegexCharClass.SingletonChar(_fcPrefix.GetValueOrDefault().Prefix));
-                    Beq(l2);
-                }
+                EmitCallCharInClass(_fcPrefix.GetValueOrDefault().Prefix, charInClassV);
+                BrtrueFar(l2);
 
                 MarkLabel(l5);
 
                 Ldloc(cV);
                 Ldc(0);
-                if (!RegexCharClass.IsSingleton(_fcPrefix.GetValueOrDefault().Prefix))
-                {
-                    BgtFar(l1);
-                }
-                else
-                {
-                    Bgt(l1);
-                }
+                BgtFar(l1);
 
                 Ldc(0);
                 BrFar(l3);
index 067fb04..4cc0457 100644 (file)
@@ -7,6 +7,7 @@
 
 using System.Diagnostics;
 using System.Globalization;
+using System.Runtime.CompilerServices;
 
 namespace System.Text.RegularExpressions
 {
@@ -83,36 +84,61 @@ namespace System.Text.RegularExpressions
 
         private void TrackPush(int I1)
         {
-            runtrack![--runtrackpos] = I1;
-            runtrack[--runtrackpos] = _codepos;
+            int[] localruntrack = runtrack!;
+            int localruntrackpos = runtrackpos;
+
+            localruntrack[--localruntrackpos] = I1;
+            localruntrack[--localruntrackpos] = _codepos;
+
+            runtrackpos = localruntrackpos;
         }
 
         private void TrackPush(int I1, int I2)
         {
-            runtrack![--runtrackpos] = I1;
-            runtrack[--runtrackpos] = I2;
-            runtrack[--runtrackpos] = _codepos;
+            int[] localruntrack = runtrack!;
+            int localruntrackpos = runtrackpos;
+
+            localruntrack[--localruntrackpos] = I1;
+            localruntrack[--localruntrackpos] = I2;
+            localruntrack[--localruntrackpos] = _codepos;
+
+            runtrackpos = localruntrackpos;
         }
 
         private void TrackPush(int I1, int I2, int I3)
         {
-            runtrack![--runtrackpos] = I1;
-            runtrack[--runtrackpos] = I2;
-            runtrack[--runtrackpos] = I3;
-            runtrack[--runtrackpos] = _codepos;
+            int[] localruntrack = runtrack!;
+            int localruntrackpos = runtrackpos;
+
+            localruntrack[--localruntrackpos] = I1;
+            localruntrack[--localruntrackpos] = I2;
+            localruntrack[--localruntrackpos] = I3;
+            localruntrack[--localruntrackpos] = _codepos;
+
+            runtrackpos = localruntrackpos;
         }
 
         private void TrackPush2(int I1)
         {
-            runtrack![--runtrackpos] = I1;
-            runtrack[--runtrackpos] = -_codepos;
+            int[] localruntrack = runtrack!;
+            int localruntrackpos = runtrackpos;
+
+            localruntrack[--localruntrackpos] = I1;
+            localruntrack[--localruntrackpos] = -_codepos;
+
+            runtrackpos = localruntrackpos;
         }
 
         private void TrackPush2(int I1, int I2)
         {
-            runtrack![--runtrackpos] = I1;
-            runtrack[--runtrackpos] = I2;
-            runtrack[--runtrackpos] = -_codepos;
+            int[] localruntrack = runtrack!;
+            int localruntrackpos = runtrackpos;
+
+            localruntrack[--localruntrackpos] = I1;
+            localruntrack[--localruntrackpos] = I2;
+            localruntrack[--localruntrackpos] = -_codepos;
+
+            runtrackpos = localruntrackpos;
         }
 
         private void Backtrack()
@@ -145,6 +171,7 @@ namespace System.Text.RegularExpressions
             _codepos = newpos;
         }
 
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         private void SetOperator(int op)
         {
             _caseInsensitive = (0 != (op & RegexCode.Ci));
@@ -186,8 +213,13 @@ namespace System.Text.RegularExpressions
 
         private void StackPush(int I1, int I2)
         {
-            runstack![--runstackpos] = I1;
-            runstack[--runstackpos] = I2;
+            int[] localrunstack = runstack!;
+            int localrunstackpos = runstackpos;
+
+            localrunstack[--localrunstackpos] = I1;
+            localrunstack[--localrunstackpos] = I2;
+
+            runstackpos = localrunstackpos;
         }
 
         private void StackPop()
@@ -241,16 +273,13 @@ namespace System.Text.RegularExpressions
             return _rightToLeft ? -1 : 1;
         }
 
-        private int Forwardchars()
-        {
-            return _rightToLeft ? runtextpos - runtextbeg : runtextend - runtextpos;
-        }
+        private int Forwardchars() => _rightToLeft ? runtextpos - runtextbeg : runtextend - runtextpos;
 
         private char Forwardcharnext()
         {
-            char ch = (_rightToLeft ? runtext![--runtextpos] : runtext![runtextpos++]);
+            char ch = _rightToLeft ? runtext![--runtextpos] : runtext![runtextpos++];
 
-            return (_caseInsensitive ? _culture.TextInfo.ToLower(ch) : ch);
+            return _caseInsensitive ? _culture.TextInfo.ToLower(ch) : ch;
         }
 
         private bool Stringmatch(string str)
@@ -281,8 +310,9 @@ namespace System.Text.RegularExpressions
             }
             else
             {
+                TextInfo ti = _culture.TextInfo;
                 while (c != 0)
-                    if (str[--c] != _culture.TextInfo.ToLower(runtext![--pos]))
+                    if (str[--c] != ti.ToLower(runtext![--pos]))
                         return false;
             }
 
@@ -328,8 +358,9 @@ namespace System.Text.RegularExpressions
             }
             else
             {
+                TextInfo ti = _culture.TextInfo;
                 while (c-- != 0)
-                    if (_culture.TextInfo.ToLower(runtext![--cmpos]) != _culture.TextInfo.ToLower(runtext[--pos]))
+                    if (ti.ToLower(runtext![--cmpos]) != ti.ToLower(runtext[--pos]))
                         return false;
             }
 
@@ -418,27 +449,67 @@ namespace System.Text.RegularExpressions
             _caseInsensitive = _code.FCPrefix.GetValueOrDefault().CaseInsensitive;
             string set = _code.FCPrefix.GetValueOrDefault().Prefix;
 
-            if (RegexCharClass.IsSingleton(set))
-            {
-                char ch = RegexCharClass.SingletonChar(set);
+            // We now loop through looking for the first matching character.  This is a hot loop, so we lift out as many
+            // branches as we can.  Each operation requires knowing whether this is a) right-to-left vs left-to-right, and
+            // b) case-sensitive vs case-insensitive.  So, we split it all out into 4 loops, for each combination of these.
+            // It's duplicated code, but it allows the inner loop to be much tighter than if everything were combined with
+            // multiple branches on each operation.  We can also then use spans to avoid bounds checks in at least the forward
+            // iteration direction where the JIT is able to detect the pattern.
 
-                for (int i = Forwardchars(); i > 0; i--)
+            if (!_rightToLeft)
+            {
+                ReadOnlySpan<char> span = runtext.AsSpan(runtextpos, runtextend - runtextpos);
+                if (!_caseInsensitive)
+                {
+                    // left-to-right, case-sensitive
+                    for (int i = 0; i < span.Length; i++)
+                    {
+                        if (RegexCharClass.CharInClass(span[i], set, ref _code.FCPrefixAsciiLookup))
+                        {
+                            runtextpos += i;
+                            return true;
+                        }
+                    }
+                }
+                else
                 {
-                    if (ch == Forwardcharnext())
+                    // left-to-right, case-insensitive
+                    TextInfo ti = _culture.TextInfo;
+                    for (int i = 0; i < span.Length; i++)
                     {
-                        Backwardnext();
-                        return true;
+                        if (RegexCharClass.CharInClass(ti.ToLower(span[i]), set, ref _code.FCPrefixAsciiLookup))
+                        {
+                            runtextpos += i;
+                            return true;
+                        }
                     }
                 }
             }
             else
             {
-                for (int i = Forwardchars(); i > 0; i--)
+                if (!_caseInsensitive)
+                {
+                    // right-to-left, case-sensitive
+                    for (int i = runtextpos - 1; i >= runtextbeg; i--)
+                    {
+                        if (RegexCharClass.CharInClass(runtext![i], set, ref _code.FCPrefixAsciiLookup))
+                        {
+                            runtextpos = i + 1;
+                            return true;
+                        }
+                    }
+                }
+                else
                 {
-                    if (RegexCharClass.CharInClass(Forwardcharnext(), set))
+                    // right-to-left, case-insensitive
+                    TextInfo ti = _culture.TextInfo;
+                    for (int i = runtextpos - 1; i >= runtextbeg; i--)
                     {
-                        Backwardnext();
-                        return true;
+                        if (RegexCharClass.CharInClass(ti.ToLower(runtext![i]), set, ref _code.FCPrefixAsciiLookup))
+                        {
+                            runtextpos = i + 1;
+                            return true;
+                        }
                     }
                 }
             }
@@ -887,9 +958,15 @@ namespace System.Text.RegularExpressions
                         continue;
 
                     case RegexCode.Set:
-                        if (Forwardchars() < 1 || !RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)]))
+                        if (Forwardchars() < 1)
                             break;
 
+                        {
+                            int operand = Operand(0);
+                            if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[operand], ref _code.StringsAsciiLookup[operand]))
+                                break;
+                        }
+
                         advance = 1;
                         continue;
 
@@ -962,7 +1039,9 @@ namespace System.Text.RegularExpressions
                             if (Forwardchars() < c)
                                 break;
 
-                            string set = _code.Strings[Operand(0)];
+                            int operand0 = Operand(0);
+                            string set = _code.Strings[operand0];
+                            ref int[]? setLookup = ref _code.StringsAsciiLookup[operand0];
 
                             while (c-- > 0)
                             {
@@ -974,7 +1053,7 @@ namespace System.Text.RegularExpressions
                                     CheckTimeout();
                                 }
 
-                                if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
+                                if (!RegexCharClass.CharInClass(Forwardcharnext(), set, ref setLookup))
                                     goto BreakBackward;
                             }
 
@@ -986,8 +1065,9 @@ namespace System.Text.RegularExpressions
                         {
                             int c = Operand(1);
 
-                            if (c > Forwardchars())
-                                c = Forwardchars();
+                            int fc = Forwardchars();
+                            if (c > fc)
+                                c = fc;
 
                             char ch = (char)Operand(0);
                             int i;
@@ -1012,8 +1092,9 @@ namespace System.Text.RegularExpressions
                         {
                             int c = Operand(1);
 
-                            if (c > Forwardchars())
-                                c = Forwardchars();
+                            int fc = Forwardchars();
+                            if (c > fc)
+                                c = fc;
 
                             char ch = (char)Operand(0);
                             int i;
@@ -1038,10 +1119,13 @@ namespace System.Text.RegularExpressions
                         {
                             int c = Operand(1);
 
-                            if (c > Forwardchars())
-                                c = Forwardchars();
+                            int fc = Forwardchars();
+                            if (c > fc)
+                                c = fc;
 
-                            string set = _code.Strings[Operand(0)];
+                            int operand0 = Operand(0);
+                            string set = _code.Strings[operand0];
+                            ref int[]? setLookup = ref _code.StringsAsciiLookup[operand0];
                             int i;
 
                             for (i = c; i > 0; i--)
@@ -1054,7 +1138,7 @@ namespace System.Text.RegularExpressions
                                     CheckTimeout();
                                 }
 
-                                if (!RegexCharClass.CharInClass(Forwardcharnext(), set))
+                                if (!RegexCharClass.CharInClass(Forwardcharnext(), set, ref setLookup))
                                 {
                                     Backwardnext();
                                     break;
@@ -1104,8 +1188,9 @@ namespace System.Text.RegularExpressions
                         {
                             int c = Operand(1);
 
-                            if (c > Forwardchars())
-                                c = Forwardchars();
+                            int fc = Forwardchars();
+                            if (c > fc)
+                                c = fc;
 
                             if (c > 0)
                                 TrackPush(c - 1, Textpos());
@@ -1118,8 +1203,9 @@ namespace System.Text.RegularExpressions
                         {
                             int c = Operand(1);
 
-                            if (c > Forwardchars())
-                                c = Forwardchars();
+                            int fc = Forwardchars();
+                            if (c > fc)
+                                c = fc;
 
                             if (c > 0)
                                 TrackPush(c - 1, Textpos());
@@ -1170,7 +1256,8 @@ namespace System.Text.RegularExpressions
                             int pos = TrackPeek(1);
                             Textto(pos);
 
-                            if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[Operand(0)]))
+                            int operand0 = Operand(0);
+                            if (!RegexCharClass.CharInClass(Forwardcharnext(), _code.Strings[operand0], ref _code.StringsAsciiLookup[operand0]))
                                 break;
 
                             int i = TrackPeek();
index 13cff7e..e9e4872 100644 (file)
@@ -279,8 +279,7 @@ namespace System.Text.RegularExpressions
         }
 
         /// <summary>
-        /// Simple optimization. If a set is a singleton, an inverse singleton,
-        /// or empty, it's transformed accordingly.
+        /// Simple optimization. If a set is an inverse singleton or empty, it's transformed accordingly.
         /// </summary>
         private RegexNode ReduceSet()
         {
@@ -293,12 +292,6 @@ namespace System.Text.RegularExpressions
                 NType = Nothing;
                 Str = null;
             }
-            else if (RegexCharClass.IsSingleton(Str))
-            {
-                Ch = RegexCharClass.SingletonChar(Str);
-                Str = null;
-                NType += (One - Set);
-            }
             else if (RegexCharClass.IsSingletonInverse(Str))
             {
                 Ch = RegexCharClass.SingletonChar(Str);
index af52dcc..db3d8b4 100644 (file)
@@ -342,9 +342,12 @@ namespace System.Text.RegularExpressions
         /// </summary>
         protected void EnsureStorage()
         {
-            if (runstackpos < runtrackcount * 4)
+            int limit = runtrackcount * 4;
+
+            if (runstackpos < limit)
                 DoubleStack();
-            if (runtrackpos < runtrackcount * 4)
+
+            if (runtrackpos < limit)
                 DoubleTrack();
         }
 
@@ -382,9 +385,7 @@ namespace System.Text.RegularExpressions
         /// </summary>
         protected void DoubleTrack()
         {
-            int[] newtrack;
-
-            newtrack = new int[runtrack!.Length * 2];
+            int[] newtrack = new int[runtrack!.Length * 2];
 
             Array.Copy(runtrack, 0, newtrack, runtrack.Length, runtrack.Length);
             runtrackpos += runtrack.Length;
@@ -397,9 +398,7 @@ namespace System.Text.RegularExpressions
         /// </summary>
         protected void DoubleStack()
         {
-            int[] newstack;
-
-            newstack = new int[runstack!.Length * 2];
+            int[] newstack = new int[runstack!.Length * 2];
 
             Array.Copy(runstack, 0, newstack, runstack.Length, runstack.Length);
             runstackpos += runstack.Length;
@@ -411,9 +410,7 @@ namespace System.Text.RegularExpressions
         /// </summary>
         protected void DoubleCrawl()
         {
-            int[] newcrawl;
-
-            newcrawl = new int[runcrawl!.Length * 2];
+            int[] newcrawl = new int[runcrawl!.Length * 2];
 
             Array.Copy(runcrawl, 0, newcrawl, runcrawl.Length, runcrawl.Length);
             runcrawlpos += runcrawl.Length;
@@ -456,11 +453,9 @@ namespace System.Text.RegularExpressions
         {
             if (end < start)
             {
-                int T;
-
-                T = end;
+                int t = end;
                 end = start;
-                start = T;
+                start = t;
             }
 
             Crawl(capnum);
@@ -474,22 +469,17 @@ namespace System.Text.RegularExpressions
         /// </summary>
         protected void TransferCapture(int capnum, int uncapnum, int start, int end)
         {
-            int start2;
-            int end2;
-
-            // these are the two intervals that are cancelling each other
+            // these are the two intervals that are canceling each other
 
             if (end < start)
             {
-                int T;
-
-                T = end;
+                int t = end;
                 end = start;
-                start = T;
+                start = t;
             }
 
-            start2 = MatchIndex(uncapnum);
-            end2 = start2 + MatchLength(uncapnum);
+            int start2 = MatchIndex(uncapnum);
+            int end2 = start2 + MatchLength(uncapnum);
 
             // The new capture gets the innermost defined interval