Reimplement string.Replace in managed code (#16214)
authorJan Kotas <jkotas@microsoft.com>
Tue, 6 Feb 2018 05:24:50 +0000 (21:24 -0800)
committerGitHub <noreply@github.com>
Tue, 6 Feb 2018 05:24:50 +0000 (21:24 -0800)
src/classlibnative/bcltype/stringnative.cpp
src/classlibnative/bcltype/stringnative.h
src/mscorlib/src/System/String.Manipulation.cs
src/vm/ecalllist.h
src/vm/object.cpp
src/vm/object.h

index dc9be01..cc91d01 100644 (file)
@@ -321,137 +321,6 @@ FCIMPL1(INT32, COMString::Length, StringObject* str) {
 FCIMPLEND
 
 
-/*================================ReplaceString=================================
-**Action:
-**Returns:
-**Arguments:
-**Exceptions:
-==============================================================================*/
-FCIMPL3(Object*, COMString::ReplaceString, StringObject* thisRefUNSAFE, StringObject* oldValueUNSAFE, StringObject* newValueUNSAFE)
-{
-    FCALL_CONTRACT;
-
-    struct _gc
-    {
-        STRINGREF     thisRef;
-        STRINGREF     oldValue;
-        STRINGREF     newValue;
-        STRINGREF     retValString;
-    } gc;
-
-    gc.thisRef        = ObjectToSTRINGREF(thisRefUNSAFE);
-    gc.oldValue       = ObjectToSTRINGREF(oldValueUNSAFE);
-    gc.newValue       = ObjectToSTRINGREF(newValueUNSAFE);
-    gc.retValString   = NULL;
-
-    HELPER_METHOD_FRAME_BEGIN_RET_PROTECT(gc);
-
-    int *replaceIndex;
-    int index=0;
-    int replaceCount=0;
-    int readPos, writePos;
-    WCHAR *thisBuffer, *oldBuffer, *newBuffer, *retValBuffer;
-    int thisLength, oldLength, newLength;
-    int endIndex;
-    CQuickBytes replaceIndices;
-
-
-    if (gc.thisRef==NULL) {
-        COMPlusThrow(kNullReferenceException, W("NullReference_This"));
-    }
-
-    //Verify all of the arguments.
-    if (!gc.oldValue) {
-        COMPlusThrowArgumentNull(W("oldValue"), W("ArgumentNull_Generic"));
-    }
-
-    //If they asked to replace oldValue with a null, replace all occurances
-    //with the empty string.
-    if (!gc.newValue) {
-        gc.newValue = StringObject::GetEmptyString();
-    }
-
-    gc.thisRef->RefInterpretGetStringValuesDangerousForGC(&thisBuffer, &thisLength);
-    gc.oldValue->RefInterpretGetStringValuesDangerousForGC(&oldBuffer,  &oldLength);
-    gc.newValue->RefInterpretGetStringValuesDangerousForGC(&newBuffer,  &newLength);
-
-    //Record the endIndex so that we don't need to do this calculation all over the place.
-    endIndex = thisLength;
-
-    //If our old Length is 0, we won't know what to replace
-    if (oldLength==0) {
-        COMPlusThrowArgumentException(W("oldValue"), W("Argument_StringZeroLength"));
-    }
-
-    //replaceIndex is made large enough to hold the maximum number of replacements possible:
-    //The case where every character in the current buffer gets replaced.
-    replaceIndex = (int *)replaceIndices.AllocThrows((thisLength/oldLength+1)*sizeof(int));
-    index=0;
-
-    _ASSERTE(endIndex - oldLength <= endIndex);
-    //Prefix: oldLength validated in mscorlib.dll!String.Replace
-    PREFIX_ASSUME(endIndex - oldLength <= endIndex);
-
-    while (((index=StringBufferObject::LocalIndexOfString(thisBuffer,oldBuffer,thisLength,oldLength,index))>-1) && (index<=endIndex-oldLength))
-    {
-        replaceIndex[replaceCount++] = index;
-        index+=oldLength;
-    }
-
-    if (replaceCount != 0)
-    {
-        //Calculate the new length of the string and ensure that we have sufficent room.
-        INT64 retValBuffLength = thisLength - ((oldLength - newLength) * (INT64)replaceCount);
-        _ASSERTE(retValBuffLength >= 0);
-        if (retValBuffLength > 0x7FFFFFFF)
-            COMPlusThrowOM();
-
-        gc.retValString = StringObject::NewString((INT32)retValBuffLength);
-        retValBuffer = gc.retValString->GetBuffer();
-
-        //Get the update buffers for all the Strings since the allocation could have triggered a GC.
-        thisBuffer  = gc.thisRef->GetBuffer();
-        newBuffer   = gc.newValue->GetBuffer();
-        oldBuffer   = gc.oldValue->GetBuffer();
-
-
-        //Set replaceHolder to be the upper limit of our array.
-        int replaceHolder = replaceCount;
-        replaceCount=0;
-
-        //Walk the array forwards copying each character as we go.  If we reach an instance
-        //of the string being replaced, replace the old string with the new string.
-        readPos = 0;
-        writePos = 0;
-        while (readPos<thisLength)
-        {
-            if (replaceCount<replaceHolder&&readPos==replaceIndex[replaceCount])
-            {
-                replaceCount++;
-                readPos+=(oldLength);
-                memcpyNoGCRefs(&retValBuffer[writePos], newBuffer, newLength*sizeof(WCHAR));
-                writePos+=(newLength);
-            }
-            else
-            {
-                retValBuffer[writePos++] = thisBuffer[readPos++];
-            }
-        }
-        retValBuffer[retValBuffLength]='\0';
-    }
-    else
-    {
-        gc.retValString = gc.thisRef;
-    }
-
-    HELPER_METHOD_FRAME_END();
-
-    return OBJECTREFToObject(gc.retValString);
-}
-FCIMPLEND
-
-
-
 #ifdef FEATURE_COMINTEROP
 
 FCIMPL2(FC_BOOL_RET, COMString::FCTryGetTrailByte, StringObject* thisRefUNSAFE, UINT8 *pbData)
index 24326ed..53b7b86 100644 (file)
@@ -65,15 +65,6 @@ public:
     static FCDECL1(INT32, Length, StringObject* pThisRef);
 
     //
-    // Modifiers
-    //
-    static FCDECL3(Object*, ReplaceString, StringObject* thisRef, StringObject* oldValue, StringObject* newValue);
-
-    static FCDECL3(Object*, Insert, StringObject* thisRefUNSAFE, INT32 startIndex, StringObject* valueUNSAFE);
-
-    static FCDECL3(Object*, Remove, StringObject* thisRefUNSAFE, INT32 startIndex, INT32 count);
-
-    //
     // Interop
     //
 #ifdef FEATURE_COMINTEROP
index 025bf2e..6ee5444 100644 (file)
@@ -2,13 +2,14 @@
 // The .NET Foundation licenses this file to you under the MIT license.
 // See the LICENSE file in the project root for more information.
 
+using System.Buffers;
 using System.Collections.Generic;
 using System.Diagnostics;
 using System.Globalization;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
 using System.Text;
 using Internal.Runtime.CompilerServices;
-using System.Buffers;
 
 namespace System
 {
@@ -16,6 +17,13 @@ namespace System
     {
         private const int StackallocIntBufferSizeLimit = 128;
 
+        // Workaround for https://github.com/dotnet/coreclr/issues/16197
+        [StructLayout(LayoutKind.Sequential, Size = StackallocIntBufferSizeLimit * sizeof(int))]
+        struct StackallocIntBuffer
+        {
+            private int _dummy;
+        }
+
         unsafe private static void FillStringChecked(String dest, int destPos, String src)
         {
             Debug.Assert(dest != null);
@@ -949,7 +957,7 @@ namespace System
                     return ReplaceCore(oldValue, newValue, CultureInfo.InvariantCulture, CompareOptions.IgnoreCase);
 
                 case StringComparison.Ordinal:
-                    return ReplaceCore(oldValue, newValue, CultureInfo.InvariantCulture, CompareOptions.Ordinal);
+                    return Replace(oldValue, newValue);
 
                 case StringComparison.OrdinalIgnoreCase:
                     return ReplaceCore(oldValue, newValue, CultureInfo.InvariantCulture, CompareOptions.OrdinalIgnoreCase);
@@ -1079,18 +1087,98 @@ namespace System
             }
         }
 
-        // This method contains the same functionality as StringBuilder Replace. The only difference is that
-        // a new String has to be allocated since Strings are immutable
-        [MethodImplAttribute(MethodImplOptions.InternalCall)]
-        private extern String ReplaceInternal(String oldValue, String newValue);
-
-        public String Replace(String oldValue, String newValue)
+        public string Replace(string oldValue, string newValue)
         {
             if (oldValue == null)
                 throw new ArgumentNullException(nameof(oldValue));
-            // Note that if newValue is null, we treat it like String.Empty.
+            if (oldValue.Length == 0)
+                throw new ArgumentException(SR.Argument_StringZeroLength, nameof(oldValue));
+
+            // Api behavior: if newValue is null, instances of oldValue are to be removed.
+            if (newValue == null)
+                newValue = string.Empty;
+
+            // Workaround for https://github.com/dotnet/coreclr/issues/16197
+            // Span<int> initialSpan = stackalloc int[StackallocIntBufferSizeLimit];
+            Span<int> initialSpan; StackallocIntBuffer initialBuffer; unsafe { initialSpan = new Span<int>(&initialBuffer, StackallocIntBufferSizeLimit); }
+            var replacementIndices = new ValueListBuilder<int>(initialSpan);
+
+            unsafe
+            {
+                fixed (char* pThis = &_firstChar)
+                {
+                    int matchIdx = 0;
+                    int lastPossibleMatchIdx = this.Length - oldValue.Length;
+                    while (matchIdx <= lastPossibleMatchIdx)
+                    {
+                        char* pMatch = pThis + matchIdx;
+                        for (int probeIdx = 0; probeIdx < oldValue.Length; probeIdx++)
+                        {
+                            if (pMatch[probeIdx] != oldValue[probeIdx])
+                            {
+                                goto Next;
+                            }
+                        }
+                        // Found a match for the string. Record the location of the match and skip over the "oldValue."
+                        replacementIndices.Append(matchIdx);
+                        matchIdx += oldValue.Length;
+                        continue;
+
+                    Next:
+                        matchIdx++;
+                    }
+                }
+            }
+
+            if (replacementIndices.Length == 0)
+                return this;
+
+            // String allocation and copying is in separate method to make this method faster for the case where
+            // nothing needs replacing.
+            string dst = ReplaceHelper(oldValue.Length, newValue, replacementIndices.AsReadOnlySpan());
+
+            replacementIndices.Dispose();
+
+            return dst;
+        }
+
+        private string ReplaceHelper(int oldValueLength, string newValue, ReadOnlySpan<int> indices)
+        {
+            Debug.Assert(indices.Length > 0);
+
+            long dstLength = this.Length + ((long)(newValue.Length - oldValueLength)) * indices.Length;
+            if (dstLength > int.MaxValue)
+                throw new OutOfMemoryException();
+            string dst = FastAllocateString((int)dstLength);
+
+            Span<char> dstSpan = new Span<char>(ref dst.GetRawStringData(), dst.Length);
+
+            int thisIdx = 0;
+            int dstIdx = 0;
+
+            for (int r = 0; r < indices.Length; r++)
+            {
+                int replacementIdx = indices[r];
+
+                // Copy over the non-matching portion of the original that precedes this occurrence of oldValue.
+                int count = replacementIdx - thisIdx;
+                if (count != 0)
+                {
+                    this.AsReadOnlySpan().Slice(thisIdx, count).CopyTo(dstSpan.Slice(dstIdx));
+                    dstIdx += count;
+                }
+                thisIdx = replacementIdx + oldValueLength;
+
+                // Copy over newValue to replace the oldValue.
+                newValue.AsReadOnlySpan().CopyTo(dstSpan.Slice(dstIdx));
+                dstIdx += newValue.Length;
+            }
+
+            // Copy over the final non-matching portion at the end of the string.
+            Debug.Assert(this.Length - thisIdx == dstSpan.Length - dstIdx);
+            this.AsReadOnlySpan().Slice(thisIdx).CopyTo(dstSpan.Slice(dstIdx));
 
-            return ReplaceInternal(oldValue, newValue);
+            return dst;
         }
 
         public String[] Split(char separator, StringSplitOptions options = StringSplitOptions.None)
index 51c13ac..851ab2f 100644 (file)
@@ -112,7 +112,6 @@ FCFuncStart(gStringFuncs)
     FCIntrinsic("get_Chars", COMString::GetCharAt, CORINFO_INTRINSIC_StringGetChar)
     FCFuncElement("IsAscii", COMString::IsAscii)
     FCFuncElement("CompareOrdinalHelper", COMString::CompareOrdinalEx)
-    FCFuncElementSig("ReplaceInternal", &gsig_IM_Str_Str_RetStr, COMString::ReplaceString)
 #ifdef FEATURE_COMINTEROP
     FCFuncElement("SetTrailByte", COMString::FCSetTrailByte)
     FCFuncElement("TryGetTrailByte", COMString::FCTryGetTrailByte)
index aff6a83..8659084 100644 (file)
@@ -1519,9 +1519,6 @@ BOOL StringObject::SetTrailByte(BYTE bTrailByte) {
 }
 
 
-#define DEFAULT_CAPACITY 16
-#define DEFAULT_MAX_CAPACITY 0x7FFFFFFF
-
 /*================================ReplaceBuffer=================================
 **This is a helper function designed to be used by N/Direct it replaces the entire
 **contents of the String with a new string created by some native method.  This 
@@ -1578,35 +1575,6 @@ void StringBufferObject::ReplaceBufferAnsi(STRINGBUFFERREF *thisRef, __in_ecount
     (*thisRef)->ReplaceBufferWithAnsi(&newCharArray, newBuffer, newCapacity);
 }
 
-
-/*==============================LocalIndexOfString==============================
-**Finds search within base and returns the index where it was found.  The search
-**starts from startPos and we return -1 if search isn't found.  This is a direct 
-**copy from COMString::IndexOfString, but doesn't require that we build up
-**an instance of indexOfStringArgs before calling it.  
-**
-**Args:
-**base -- the string in which to search
-**search -- the string for which to search
-**strLength -- the length of base
-**patternLength -- the length of search
-**startPos -- the place from which to start searching.
-**
-==============================================================================*/
-/* static */ INT32 StringBufferObject::LocalIndexOfString(__in_ecount(strLength) WCHAR *base, __in_ecount(patternLength) WCHAR *search, int strLength, int patternLength, int startPos) {
-    LIMITED_METHOD_CONTRACT
-    _ASSERTE(base != NULL);
-    _ASSERTE(search != NULL);
-
-    int iThis, iPattern;
-    for (iThis=startPos; iThis < (strLength-patternLength+1); iThis++) {
-        for (iPattern=0; iPattern<patternLength && base[iThis+iPattern]==search[iPattern]; iPattern++);
-        if (iPattern == patternLength) return iThis;
-    }
-    return -1;
-}
-
-
 #ifdef USE_CHECKED_OBJECTREFS
 
 //-------------------------------------------------------------
index 3ba892a..3ffbf79 100644 (file)
@@ -2678,7 +2678,6 @@ class StringBufferObject : public Object
 
     static void ReplaceBuffer(STRINGBUFFERREF *thisRef, __in_ecount(newLength) WCHAR *newBuffer, INT32 newLength);
     static void ReplaceBufferAnsi(STRINGBUFFERREF *thisRef, __in_ecount(newCapacity) CHAR *newBuffer, INT32 newCapacity);    
-    static INT32 LocalIndexOfString(__in_ecount(strLength) WCHAR *base, __in_ecount(patternLength) WCHAR *search, int strLength, int patternLength, int startPos);
 };
 
 class SafeHandle : public Object