Use IndexOf in StringBuilder.Replace(string, string, ...) (#81098)
authorStephen Toub <stoub@microsoft.com>
Fri, 27 Jan 2023 18:15:27 +0000 (13:15 -0500)
committerGitHub <noreply@github.com>
Fri, 27 Jan 2023 18:15:27 +0000 (13:15 -0500)
* Use IndexOf in StringBuilder.Replace

StringBuilder.Replace(string, string, ...) currently walks character-by-character to find the next location of the oldValue.  This change adds the use of MemoryExtensions.IndexOf to search for the oldValue, and only falls back to the character-by-character walk when it nears and needs to account for a chunk boundary.  It also switches to track replacement indices using more stack space and the array pool (via ValueListBuilder) rather than less stack space and array allocations.

* Fix faulty assert

src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs

index 3305173..40fd4a2 100644 (file)
@@ -1934,8 +1934,7 @@ namespace System.Text
 
             newValue ??= string.Empty;
 
-            Span<int> replacements = stackalloc int[5]; // A list of replacement positions in a chunk to apply
-            int replacementsCount = 0;
+            var replacements = new ValueListBuilder<int>(stackalloc int[128]); // A list of replacement positions in a chunk to apply
 
             // Find the chunk, indexInChunk for the starting point
             StringBuilder chunk = FindChunkForIndex(startIndex);
@@ -1944,45 +1943,89 @@ namespace System.Text
             {
                 Debug.Assert(chunk != null, "chunk was null in replace");
 
-                // Look for a match in the chunk,indexInChunk reference
-                if (StartsWith(chunk, indexInChunk, count, oldValue))
+                // While the remaining search space is at least as large as the old value being replaced,
+                // find all occurrences of it contained entirely within the chunk. We stop searching
+                // once we're within oldValue.Length from the end of the chunk (or count limit), at which point
+                // we need to consider a value that bridges between two chunks.
+                ReadOnlySpan<char> remainingChunk = chunk.m_ChunkChars.AsSpan(indexInChunk, Math.Min(chunk.m_ChunkLength - indexInChunk, count));
+                while (oldValue.Length <= remainingChunk.Length)
                 {
-                    // Push it on the replacements array (with growth), we will do all replacements in a
-                    // given chunk in one operation below (see ReplaceAllInChunk) so we don't have to slide
-                    // many times.
-                    if (replacementsCount >= replacements.Length)
+                    // Find the next match.
+                    int foundPos = remainingChunk.IndexOf(oldValue);
+                    if (foundPos >= 0)
                     {
-                        int[] tmp = new int[replacements.Length * 3 / 2 + 4]; // Grow by ~1.5x, but more in the beginning
-                        replacements.CopyTo(tmp);
-                        replacements = tmp;
+                        // We found one.  Add it as a location for the replacement.
+                        indexInChunk += foundPos;
+                        replacements.Append(indexInChunk);
+
+                        // Move ahead to the next location.
+                        remainingChunk = remainingChunk.Slice(foundPos + oldValue.Length);
+                        indexInChunk += oldValue.Length;
+                        count -= foundPos + oldValue.Length;
+
+                        // If after accounting for moving past the match our count has
+                        // gone to 0, break out to stop searching.
+                        Debug.Assert(count >= 0, "count should never go negative");
+                        if (count == 0)
+                        {
+                            break;
+                        }
+                    }
+                    else
+                    {
+                        // No match found. Reposition to one character beyond the last starting
+                        // location searched, which will be oldValue.Length - 1 from the end.
+                        // Then break out so that we can start the cross-chunk matching from that location.
+                        int move = remainingChunk.Length - (oldValue.Length - 1);
+                        indexInChunk += move;
+                        count -= move;
+                        break;
                     }
-                    replacements[replacementsCount++] = indexInChunk;
-                    indexInChunk += oldValue.Length;
-                    count -= oldValue.Length;
                 }
-                else
+
+                Debug.Assert(oldValue.Length > Math.Min(count, chunk.m_ChunkLength - indexInChunk),
+                    $"oldValue.Length = {oldValue.Length}, chunk.m_ChunkLength - indexInChunk = {chunk.m_ChunkLength - indexInChunk}, count == {count}");
+
+                // Now do the more complicated cross-chunk matching.
+                while (indexInChunk < chunk.m_ChunkLength && count > 0)
                 {
-                    indexInChunk++;
-                    --count;
+                    if (StartsWith(chunk, indexInChunk, count, oldValue))
+                    {
+                        replacements.Append(indexInChunk);
+                        indexInChunk += oldValue.Length;
+                        count -= oldValue.Length;
+                    }
+                    else
+                    {
+                        indexInChunk++;
+                        --count;
+                    }
                 }
 
-                if (indexInChunk >= chunk.m_ChunkLength || count == 0) // Have we moved out of the current chunk?
-                {
-                    // Replacing mutates the blocks, so we need to convert to a logical index and back afterwards.
-                    int index = indexInChunk + chunk.m_ChunkOffset;
+                // We've either fully explored the chunk or we've reached our count limit.
+                Debug.Assert(indexInChunk >= chunk.m_ChunkLength || count == 0,
+                    $"indexInChunk = {indexInChunk}, chunk.m_ChunkLength == {chunk.m_ChunkLength}, count == {count}");
 
-                    // See if we accumulated any replacements, if so apply them.
-                    ReplaceAllInChunk(replacements.Slice(0, replacementsCount), chunk, oldValue.Length, newValue);
-                    // The replacement has affected the logical index.  Adjust it.
-                    index += ((newValue.Length - oldValue.Length) * replacementsCount);
-                    replacementsCount = 0;
+                // Replacing mutates the blocks, so we need to convert to a logical index and back afterwards.
+                int index = indexInChunk + chunk.m_ChunkOffset;
 
-                    chunk = FindChunkForIndex(index);
-                    indexInChunk = index - chunk.m_ChunkOffset;
-                    Debug.Assert(chunk != null || count == 0, "Chunks ended prematurely!");
+                // Apply any replacements we accumulated.
+                if (replacements.Length != 0)
+                {
+                    // Perform all replacements, and adjust the logical index if the new and old values
+                    // have different lengths, such that the replacements would have impacted it.
+                    ReplaceAllInChunk(replacements.AsSpan(), chunk, oldValue.Length, newValue);
+                    index += (newValue.Length - oldValue.Length) * replacements.Length;
+                    replacements.Length = 0;
                 }
+
+                chunk = FindChunkForIndex(index);
+                indexInChunk = index - chunk.m_ChunkOffset;
+                Debug.Assert(chunk != null || count == 0, "Chunks ended prematurely!");
             }
 
+            replacements.Dispose();
+
             AssertInvariants();
             return this;
         }
@@ -2154,10 +2197,7 @@ namespace System.Text
         /// </remarks>
         private void ReplaceAllInChunk(ReadOnlySpan<int> replacements, StringBuilder sourceChunk, int removeCount, string value)
         {
-            if (replacements.IsEmpty)
-            {
-                return;
-            }
+            Debug.Assert(!replacements.IsEmpty);
 
             // calculate the total amount of extra space or space needed for all the replacements.
             long longDelta = (value.Length - removeCount) * (long)replacements.Length;