From: Bruce Bowyer-Smyth Date: Tue, 8 Aug 2017 17:25:31 +0000 (+1000) Subject: Improve performance of string.IndexOfAny for 2 & 3 char searches (#13219) X-Git-Tag: accepted/tizen/base/20180629.140029~670^2~390 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=9a405f2f61e477fc8964ded0788d4c20a1abd8da;p=platform%2Fupstream%2Fcoreclr.git Improve performance of string.IndexOfAny for 2 & 3 char searches (#13219) --- diff --git a/src/classlibnative/bcltype/stringnative.cpp b/src/classlibnative/bcltype/stringnative.cpp index af6593a..1a92e04 100644 --- a/src/classlibnative/bcltype/stringnative.cpp +++ b/src/classlibnative/bcltype/stringnative.cpp @@ -327,8 +327,6 @@ FCIMPL4(INT32, COMString::IndexOfCharArray, StringObject* thisRef, CHARArray* va if (thisRef == NULL) FCThrow(kNullReferenceException); - if (valueRef == NULL) - FCThrowArgumentNull(W("anyOf")); WCHAR *thisChars; WCHAR *valueChars; @@ -337,14 +335,6 @@ FCIMPL4(INT32, COMString::IndexOfCharArray, StringObject* thisRef, CHARArray* va thisRef->RefInterpretGetStringValuesDangerousForGC(&thisChars, &thisLength); - if (startIndex < 0 || startIndex > thisLength) { - FCThrowArgumentOutOfRange(W("startIndex"), W("ArgumentOutOfRange_Index")); - } - - if (count < 0 || count > thisLength - startIndex) { - FCThrowArgumentOutOfRange(W("count"), W("ArgumentOutOfRange_Count")); - } - int endIndex = startIndex + count; valueLength = valueRef->GetNumComponents(); @@ -494,19 +484,31 @@ void InitializeProbabilisticMap(int* charMap, __in_ecount(length) const WCHAR* c _ASSERTE(charArray != NULL); _ASSERTE(length >= 0); + bool hasAscii = false; + for(int i = 0; i < length; ++i) { int hi,lo; - WCHAR c = charArray[i]; + int c = charArray[i]; - hi = (c >> 8) & 0xFF; lo = c & 0xFF; + hi = (c >> 8) & 0xFF; int* value = &charMap[lo & PROBABILISTICMAP_BLOCK_INDEX_MASK]; SetBit(value, lo >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); - value = &charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK]; - SetBit(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); + if (hi > 0) { + value = &charMap[hi & PROBABILISTICMAP_BLOCK_INDEX_MASK]; + SetBit(value, hi >> PROBABILISTICMAP_BLOCK_INDEX_SHIFT); + } + else { + hasAscii = true; + } + } + + if (hasAscii) { + // Common to search for ASCII symbols. Just the high value once. + charMap[0] |= 1; } } diff --git a/src/mscorlib/src/System/String.Searching.cs b/src/mscorlib/src/System/String.Searching.cs index 22e2a80..b95ed7f 100644 --- a/src/mscorlib/src/System/String.Searching.cs +++ b/src/mscorlib/src/System/String.Searching.cs @@ -96,8 +96,106 @@ namespace System } [Pure] + public int IndexOfAny(char[] anyOf, int startIndex, int count) + { + if (anyOf == null) + { + throw new ArgumentNullException(nameof(anyOf)); + } + + if ((uint)startIndex > (uint)Length) + { + throw new ArgumentOutOfRangeException(nameof(startIndex), SR.ArgumentOutOfRange_Index); + } + + if ((uint)count > (uint)(Length - startIndex)) + { + throw new ArgumentOutOfRangeException(nameof(count), SR.ArgumentOutOfRange_Count); + } + + if (anyOf.Length == 2) + { + // Very common optimization for directory separators (/, \), quotes (", '), brackets, etc + return IndexOfAny(anyOf[0], anyOf[1], startIndex, count); + } + else if (anyOf.Length == 3) + { + return IndexOfAny(anyOf[0], anyOf[1], anyOf[2], startIndex, count); + } + else if (anyOf.Length > 3) + { + return IndexOfCharArray(anyOf, startIndex, count); + } + else if (anyOf.Length == 1) + { + return IndexOf(anyOf[0], startIndex, count); + } + else // anyOf.Length == 0 + { + return -1; + } + } + + private unsafe int IndexOfAny(char value1, char value2, int startIndex, int count) + { + fixed (char* pChars = &_firstChar) + { + char* pCh = pChars + startIndex; + + while (count > 0) + { + char c = *pCh; + + if (c == value1 || c == value2) + goto ReturnIndex; + + // Possibly reads outside of count and can include null terminator + // Handled in the return logic + c = *(pCh + 1); + + if (c == value1 || c == value2) + goto ReturnIndex1; + + pCh += 2; + count -= 2; + } + + return -1; + + ReturnIndex: + return (int)(pCh - pChars); + + ReturnIndex1: + return (count == 1 ? -1 : (int)(pCh - pChars) + 1); + } + } + + private unsafe int IndexOfAny(char value1, char value2, char value3, int startIndex, int count) + { + fixed (char* pChars = &_firstChar) + { + char* pCh = pChars + startIndex; + + while (count > 0) + { + char c = *pCh; + + if (c == value1 || c == value2 || c == value3) + goto ReturnIndex; + + pCh++; + count--; + } + + return -1; + + ReturnIndex: + return (int)(pCh - pChars); + } + } + [MethodImplAttribute(MethodImplOptions.InternalCall)] - public extern int IndexOfAny(char[] anyOf, int startIndex, int count); + private extern int IndexOfCharArray(char[] anyOf, int startIndex, int count); // Determines the position within this string of the first occurrence of the specified diff --git a/src/vm/ecalllist.h b/src/vm/ecalllist.h index f5dfc9d..114d2cf 100644 --- a/src/vm/ecalllist.h +++ b/src/vm/ecalllist.h @@ -119,7 +119,7 @@ FCFuncStart(gStringFuncs) FCIntrinsic("get_Chars", COMString::GetCharAt, CORINFO_INTRINSIC_StringGetChar) FCFuncElement("IsAscii", COMString::IsAscii) FCFuncElement("CompareOrdinalHelper", COMString::CompareOrdinalEx) - FCFuncElement("IndexOfAny", COMString::IndexOfCharArray) + FCFuncElement("IndexOfCharArray", COMString::IndexOfCharArray) FCFuncElement("LastIndexOfAny", COMString::LastIndexOfCharArray) FCFuncElementSig("ReplaceInternal", &gsig_IM_Str_Str_RetStr, COMString::ReplaceString) #ifdef FEATURE_COMINTEROP