Improve performance of string.IndexOfAny for 2 & 3 char searches (#13219)
authorBruce Bowyer-Smyth <bbowyersmyth@live.com.au>
Tue, 8 Aug 2017 17:25:31 +0000 (03:25 +1000)
committerJan Kotas <jkotas@microsoft.com>
Tue, 8 Aug 2017 17:25:31 +0000 (10:25 -0700)
src/classlibnative/bcltype/stringnative.cpp
src/mscorlib/src/System/String.Searching.cs
src/vm/ecalllist.h

index af6593a..1a92e04 100644 (file)
@@ -327,8 +327,6 @@ FCIMPL4(INT32, COMString::IndexOfCharArray, StringObject* thisRef, CHARArray* va
 
     if (thisRef == NULL)
         FCThrow(kNullReferenceException);
-    if (valueRef == NULL)
-        FCThrowArgumentNull(W("anyOf"));
 
     WCHAR *thisChars;
     WCHAR *valueChars;
@@ -337,14 +335,6 @@ FCIMPL4(INT32, COMString::IndexOfCharArray, StringObject* thisRef, CHARArray* va
 
     thisRef->RefInterpretGetStringValuesDangerousForGC(&thisChars, &thisLength);
 
-    if (startIndex < 0 || startIndex > thisLength) {
-        FCThrowArgumentOutOfRange(W("startIndex"), W("ArgumentOutOfRange_Index"));
-    }
-
-    if (count < 0 || count > thisLength - startIndex) {
-        FCThrowArgumentOutOfRange(W("count"), W("ArgumentOutOfRange_Count"));
-    }
-
     int endIndex = startIndex + count;
 
     valueLength = valueRef->GetNumComponents();
@@ -494,19 +484,31 @@ void InitializeProbabilisticMap(int* charMap, __in_ecount(length) const WCHAR* c
     _ASSERTE(charArray != NULL);
     _ASSERTE(length >= 0);
 
+    bool hasAscii = false;
+
     for(int i = 0; i < length; ++i) {
         int hi,lo;
 
-        WCHAR c = charArray[i];
+        int c = charArray[i];
 
-        hi = (c >> 8) & 0xFF;
         lo = c & 0xFF;
+        hi = (c >> 8) & 0xFF;
 
         int* value = &charMap[lo & PROBABILISTICMAP_BLOCK_INDEX_MASK];
         SetBit(value, lo >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);
 
-        value = &charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK];
-        SetBit(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);
+        if (hi > 0) {
+            value = &charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK];
+            SetBit(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT);
+        }
+        else {
+            hasAscii = true;
+        }
+    }
+
+    if (hasAscii) {
+        // Common to search for ASCII symbols. Just the high value once.
+        charMap[0] |= 1;
     }
 }
 
index 22e2a80..b95ed7f 100644 (file)
@@ -96,8 +96,106 @@ namespace System
         }
 
         [Pure]
+        public int IndexOfAny(char[] anyOf, int startIndex, int count)
+        {
+            if (anyOf == null)
+            {
+                throw new ArgumentNullException(nameof(anyOf));
+            }
+
+            if ((uint)startIndex > (uint)Length)
+            {
+                throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index);
+            }
+
+            if ((uint)count > (uint)(Length - startIndex))
+            {
+                throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count);
+            }
+
+            if (anyOf.Length == 2)
+            {
+                // Very common optimization for directory separators (/, \), quotes (", '), brackets, etc
+                return IndexOfAny(anyOf[0], anyOf[1], startIndex, count);
+            }
+            else if (anyOf.Length == 3)
+            {
+                return IndexOfAny(anyOf[0], anyOf[1], anyOf[2], startIndex, count);
+            }
+            else if (anyOf.Length > 3)
+            {
+                return IndexOfCharArray(anyOf, startIndex, count);
+            }
+            else if (anyOf.Length == 1)
+            {
+                return IndexOf(anyOf[0], startIndex, count);
+            }
+            else // anyOf.Length == 0
+            {
+                return -1;
+            }
+        }
+
+        private unsafe int IndexOfAny(char value1, char value2, int startIndex, int count)
+        {
+            fixed (char* pChars = &_firstChar)
+            {
+                char* pCh = pChars + startIndex;
+
+                while (count > 0)
+                {
+                    char c = *pCh;
+
+                    if (c == value1 || c == value2)
+                        goto ReturnIndex;
+
+                    // Possibly reads outside of count and can include null terminator
+                    // Handled in the return logic
+                    c = *(pCh + 1);
+
+                    if (c == value1 || c == value2)
+                        goto ReturnIndex1;
+
+                    pCh += 2;
+                    count -= 2;
+                }
+
+                return -1;
+
+            ReturnIndex:
+                return (int)(pCh - pChars);
+
+            ReturnIndex1:
+                return (count == 1 ? -1 : (int)(pCh - pChars) + 1);
+            }
+        }
+
+        private unsafe int IndexOfAny(char value1, char value2, char value3, int startIndex, int count)
+        {
+            fixed (char* pChars = &_firstChar)
+            {
+                char* pCh = pChars + startIndex;
+
+                while (count > 0)
+                {
+                    char c = *pCh;
+
+                    if (c == value1 || c == value2 || c == value3)
+                        goto ReturnIndex;
+
+                    pCh++;
+                    count--;
+                }
+
+                return -1;
+
+            ReturnIndex:
+                return (int)(pCh - pChars);
+            }
+        }
+
         [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        public extern int IndexOfAny(char[] anyOf, int startIndex, int count);
+        private extern int IndexOfCharArray(char[] anyOf, int startIndex, int count);
 
 
         // Determines the position within this string of the first occurrence of the specified
index f5dfc9d..114d2cf 100644 (file)
@@ -119,7 +119,7 @@ FCFuncStart(gStringFuncs)
     FCIntrinsic("get_Chars", COMString::GetCharAt, CORINFO_INTRINSIC_StringGetChar)
     FCFuncElement("IsAscii", COMString::IsAscii)
     FCFuncElement("CompareOrdinalHelper", COMString::CompareOrdinalEx)
-    FCFuncElement("IndexOfAny", COMString::IndexOfCharArray)
+    FCFuncElement("IndexOfCharArray", COMString::IndexOfCharArray)
     FCFuncElement("LastIndexOfAny", COMString::LastIndexOfCharArray)
     FCFuncElementSig("ReplaceInternal", &gsig_IM_Str_Str_RetStr, COMString::ReplaceString)
 #ifdef FEATURE_COMINTEROP