From: Stephen Toub Date: Fri, 27 Jan 2023 18:15:27 +0000 (-0500) Subject: Use IndexOf in StringBuilder.Replace(string, string, ...) (#81098) X-Git-Tag: accepted/tizen/unified/riscv/20231226.055536~4385 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=f388963619f22949798f9445d57e3df01d60335f;p=platform%2Fupstream%2Fdotnet%2Fruntime.git Use IndexOf in StringBuilder.Replace(string, string, ...) (#81098) * Use IndexOf in StringBuilder.Replace StringBuilder.Replace(string, string, ...) currently walks character-by-character to find the next location of the oldValue. This change adds the use of MemoryExtensions.IndexOf to search for the oldValue, and only falls back to the character-by-character walk when it nears and needs to account for a chunk boundary. It also switches to track replacement indices using more stack space and the array pool (via ValueListBuilder) rather than less stack space and array allocations. * Fix faulty assert --- diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs b/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs index 3305173..40fd4a2 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/StringBuilder.cs @@ -1934,8 +1934,7 @@ namespace System.Text newValue ??= string.Empty; - Span replacements = stackalloc int[5]; // A list of replacement positions in a chunk to apply - int replacementsCount = 0; + var replacements = new ValueListBuilder(stackalloc int[128]); // A list of replacement positions in a chunk to apply // Find the chunk, indexInChunk for the starting point StringBuilder chunk = FindChunkForIndex(startIndex); @@ -1944,45 +1943,89 @@ namespace System.Text { Debug.Assert(chunk != null, "chunk was null in replace"); - // Look for a match in the chunk,indexInChunk reference - if (StartsWith(chunk, indexInChunk, count, oldValue)) + // While the remaining search space is at least as large as the old value being replaced, + // find all occurrences of it contained entirely within the chunk. We stop searching + // once we're within oldValue.Length from the end of the chunk (or count limit), at which point + // we need to consider a value that bridges between two chunks. + ReadOnlySpan remainingChunk = chunk.m_ChunkChars.AsSpan(indexInChunk, Math.Min(chunk.m_ChunkLength - indexInChunk, count)); + while (oldValue.Length <= remainingChunk.Length) { - // Push it on the replacements array (with growth), we will do all replacements in a - // given chunk in one operation below (see ReplaceAllInChunk) so we don't have to slide - // many times. - if (replacementsCount >= replacements.Length) + // Find the next match. + int foundPos = remainingChunk.IndexOf(oldValue); + if (foundPos >= 0) { - int[] tmp = new int[replacements.Length * 3 / 2 + 4]; // Grow by ~1.5x, but more in the beginning - replacements.CopyTo(tmp); - replacements = tmp; + // We found one. Add it as a location for the replacement. + indexInChunk += foundPos; + replacements.Append(indexInChunk); + + // Move ahead to the next location. + remainingChunk = remainingChunk.Slice(foundPos + oldValue.Length); + indexInChunk += oldValue.Length; + count -= foundPos + oldValue.Length; + + // If after accounting for moving past the match our count has + // gone to 0, break out to stop searching. + Debug.Assert(count >= 0, "count should never go negative"); + if (count == 0) + { + break; + } + } + else + { + // No match found. Reposition to one character beyond the last starting + // location searched, which will be oldValue.Length - 1 from the end. + // Then break out so that we can start the cross-chunk matching from that location. + int move = remainingChunk.Length - (oldValue.Length - 1); + indexInChunk += move; + count -= move; + break; } - replacements[replacementsCount++] = indexInChunk; - indexInChunk += oldValue.Length; - count -= oldValue.Length; } - else + + Debug.Assert(oldValue.Length > Math.Min(count, chunk.m_ChunkLength - indexInChunk), + $"oldValue.Length = {oldValue.Length}, chunk.m_ChunkLength - indexInChunk = {chunk.m_ChunkLength - indexInChunk}, count == {count}"); + + // Now do the more complicated cross-chunk matching. + while (indexInChunk < chunk.m_ChunkLength && count > 0) { - indexInChunk++; - --count; + if (StartsWith(chunk, indexInChunk, count, oldValue)) + { + replacements.Append(indexInChunk); + indexInChunk += oldValue.Length; + count -= oldValue.Length; + } + else + { + indexInChunk++; + --count; + } } - if (indexInChunk >= chunk.m_ChunkLength || count == 0) // Have we moved out of the current chunk? - { - // Replacing mutates the blocks, so we need to convert to a logical index and back afterwards. - int index = indexInChunk + chunk.m_ChunkOffset; + // We've either fully explored the chunk or we've reached our count limit. + Debug.Assert(indexInChunk >= chunk.m_ChunkLength || count == 0, + $"indexInChunk = {indexInChunk}, chunk.m_ChunkLength == {chunk.m_ChunkLength}, count == {count}"); - // See if we accumulated any replacements, if so apply them. - ReplaceAllInChunk(replacements.Slice(0, replacementsCount), chunk, oldValue.Length, newValue); - // The replacement has affected the logical index. Adjust it. - index += ((newValue.Length - oldValue.Length) * replacementsCount); - replacementsCount = 0; + // Replacing mutates the blocks, so we need to convert to a logical index and back afterwards. + int index = indexInChunk + chunk.m_ChunkOffset; - chunk = FindChunkForIndex(index); - indexInChunk = index - chunk.m_ChunkOffset; - Debug.Assert(chunk != null || count == 0, "Chunks ended prematurely!"); + // Apply any replacements we accumulated. + if (replacements.Length != 0) + { + // Perform all replacements, and adjust the logical index if the new and old values + // have different lengths, such that the replacements would have impacted it. + ReplaceAllInChunk(replacements.AsSpan(), chunk, oldValue.Length, newValue); + index += (newValue.Length - oldValue.Length) * replacements.Length; + replacements.Length = 0; } + + chunk = FindChunkForIndex(index); + indexInChunk = index - chunk.m_ChunkOffset; + Debug.Assert(chunk != null || count == 0, "Chunks ended prematurely!"); } + replacements.Dispose(); + AssertInvariants(); return this; } @@ -2154,10 +2197,7 @@ namespace System.Text /// private void ReplaceAllInChunk(ReadOnlySpan replacements, StringBuilder sourceChunk, int removeCount, string value) { - if (replacements.IsEmpty) - { - return; - } + Debug.Assert(!replacements.IsEmpty); // calculate the total amount of extra space or space needed for all the replacements. long longDelta = (value.Length - removeCount) * (long)replacements.Length;