From 442a42147ef23c3b9742abcd8b997e8f472af68a Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Tue, 25 Jan 2022 21:54:35 +0300 Subject: [PATCH] Faster IndexOf for substrings (#63285) * Improve "lastChar == firstChar" case, also, use IndexOf directly if value.Length == 1 * Try plain IndexOf first, to optimize cases where even first char of value is never met * add 1-byte implementation * copyrights * fix copy-paste mistake * Initial LastIndexOf impl * More efficient LastIndexOf * fix bug in Char version (we need two clear two lowest bits in the mask) & temporarily remove AdvSimd impl * use ResetLowestSetBit * Fix bug * Add two-byte LastIndexOf * Fix build * Minor optimizations * optimize cases with two-byte/two-char values * Remove gotos, fix build * fix bug in LastIndexOf * Make sure String.LastIndexOf is optimized * Use xplat simd helpers - implicit ARM support * fix arm * Delete \ * Use Vector128.IsHardwareAccelerated * Fix build * Use IsAllZero * Address feedback * Address feedback * micro-optimization, do-while is better here since mask is guaranteed to be non-zero * Address feedabc * Use clever trick I borrowed from IndexOfAny for trailing elements * give up on +1 bump for SequenceCompare * Clean up * Clean up * fix build * Add debug asserts * Clean up: give up on the unrolled trick - too little value from code bloat * Add a test * Fix build * Add byte-specific test * Fix build * Update IndexOfSequence.byte.cs --- THIRD-PARTY-NOTICES.TXT | 29 ++ .../tests/Span/IndexOfSequence.byte.cs | 101 +++++++ .../tests/Span/IndexOfSequence.char.cs | 101 +++++++ .../src/System/MemoryExtensions.cs | 25 +- .../src/System/Numerics/BitOperations.cs | 20 ++ .../src/System/SpanHelpers.Byte.cs | 310 ++++++++++++++++++--- .../src/System/SpanHelpers.Char.cs | 299 +++++++++++++++++++- .../src/System/SpanHelpers.T.cs | 10 +- 8 files changed, 833 insertions(+), 62 deletions(-) diff --git a/THIRD-PARTY-NOTICES.TXT b/THIRD-PARTY-NOTICES.TXT index e38f6ef..55329a8 100644 --- a/THIRD-PARTY-NOTICES.TXT +++ b/THIRD-PARTY-NOTICES.TXT @@ -697,6 +697,35 @@ License for fastmod (https://github.com/lemire/fastmod) and ibm-fpgen (https://g See the License for the specific language governing permissions and limitations under the License. +License for sse4-strstr (https://github.com/WojciechMula/sse4-strstr) +-------------------------------------- + + Copyright (c) 2008-2016, Wojciech Muła + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED + TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + License notice for The C++ REST SDK ----------------------------------- diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs index 3325056..1a9027d 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.byte.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using Xunit; +using System.Collections.Generic; +using System.Runtime.InteropServices; namespace System.SpanTests { @@ -115,5 +117,104 @@ namespace System.SpanTests int index = span.IndexOf(value); Assert.Equal(-1, index); } + + public static IEnumerable IndexOfSubSeqData_Byte() + { + // searchSpace, value, expected IndexOf value, expected LastIndexOf value + yield return new object[] { new byte[]{0,0,0,0,0},new byte[]{0,0,0}, 0, 2}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0},new byte[]{0,71,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0},new byte[]{0,0,0}, 0, 7}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,71,0,1,0}, 10, 10}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,71,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0},new byte[]{0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0},new byte[]{0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0},new byte[]{0,1,0,0,0}, 12, 12}; + yield return new object[] { new byte[]{0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0},new byte[]{0,1,0,0,0}, 11, 11}; + yield return new object[] { new byte[]{0,0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0},new byte[]{0,1,0,0,0}, 6, 6}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,0,0,0,0,1,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,0,0,0}, 5, 5}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,0,0,0}, 5, 5}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1},new byte[]{0,1,0,0,0,0,0}, 5, 5}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0}, 0, 44}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0}, 0, 43}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0}, 7, 42}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0}, 7, 41}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0}, 7, 11}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0}, 7, 10}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0}, 7, 9}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0}, 7, 7}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,1,0,0}, 5, 48}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,1,0}, 44, 44}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,1,0,0,0,0}, 5, 19}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,1,0,0,0,1,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0}, 7, 11}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,1,0,0,1,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,1,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,1,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,1,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,1,1,0,0,0,0,0,1,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,0,0,1,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{0,0,0,0,71,0,1,0,0,0,0,0,0,0,0,0,0,0,71,0,1,0,0,0,0,1,1,0,2,0,1,1,0,1,1,0,1,0,0,1,1,0,0,0,0,0,0,1,0,1,0,0,1,0},new byte[]{0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,0,0,0,0}, -1, -1}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133},new byte[]{159,133,159,133,159,133}, 0, 22}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133},new byte[]{159,133,255,159,133}, -1, -1}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,160,86,160,86,160,86,160,86,160,80},new byte[]{160,86,160,86,160,86,160,86,160,80}, 40, 40}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,159,127,160,86,160,86,160,86,160,86,160,80,160,80,160,80,160,80,160,80,160,80,160,80,160,86,160,86,160,86,160,86},new byte[]{160,86,160,86,160,86,160,86}, 40, 62}; + yield return new object[] { new byte[]{159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133,159,133},new byte[]{0,0,0,1}, -1, -1}; + yield return new object[] { new byte[]{255,160,82,159,134,159,127,255,159,141,160,85,160,82,160,88,255,159,141,159,127,159,134,255,160,88,160,85,160,82,159,141,159,127,159,134,160,88,160,85,160,82,159,141,255,159,127,159,134,160,88,160,85,160,82,159,141,159,127,159,134,160,88,159,141,160,85,255,160,82,159,134,159,141,159,134,160,85,160,82,159,141,159,127,159,134,160,82,159,141,160,85,159,134,255,160,88,159,127,160,82,160,85,159,134,255,159,141,159,127,159,134,160,85,159,141},new byte[]{255,159,141,159,127,159,134,255}, 16, 16}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{49,49}, 29, 29}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49},new byte[]{49,49}, 29, 29}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{49,49}, 29, 29}; + yield return new object[] { new byte[]{49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{49,49}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49},new byte[]{49,49,49}, 29, 29}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49,50},new byte[]{49,49,49}, 29, 29}; + yield return new object[] { new byte[]{49,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{49,49,49}, 0, 1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49},new byte[]{48,48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,49,50},new byte[]{48,48,48}, -1, -1}; + yield return new object[] { new byte[]{49,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{48,48,48}, -1, -1}; + yield return new object[] { new byte[]{48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,49,50},new byte[]{48,49,48,48}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{49,48,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,48,49,50},new byte[]{160,80,48,160,80,160,80}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 15}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 17}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 18, 32}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,49,49,49,49,49,49,49,49,49,49,49,49}, 20, 20}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49}, -1, -1}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49},new byte[]{49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49}, 0, 0}; + yield return new object[] { new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,71,71,71,71,71,71,71,71,71,71,71,71,71,71,71},new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,71}, 60, 60}; + yield return new object[] { new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,71,49,48,49,49,49,49,49,49,71,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,49,48,49,49,49,49,49,49,49,49,49,49,49,49,71,71,71,71,71,71,71,71,71,71,71,71,71,71},new byte[]{71,71,71,71,71,71,71,71,71,71,71,71,71,71,71}, 0, 0}; + } + + [Theory] + [MemberData(nameof(IndexOfSubSeqData_Byte))] + public static void ValueStartsAndEndsWithTheSameBytes(byte[] searchSpace, byte[] value, int expectedIndexOfValue, int expectedLastIndexOfValue) + { + Assert.Equal(expectedIndexOfValue, searchSpace.AsSpan().IndexOf(value)); + Assert.Equal(expectedLastIndexOfValue, searchSpace.AsSpan().LastIndexOf(value)); + } } } diff --git a/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs b/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs index bda6261..b341a79 100644 --- a/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs +++ b/src/libraries/System.Memory/tests/Span/IndexOfSequence.char.cs @@ -2,6 +2,8 @@ // The .NET Foundation licenses this file to you under the MIT license. using Xunit; +using System.Collections.Generic; +using System.Runtime.InteropServices; namespace System.SpanTests { @@ -115,5 +117,104 @@ namespace System.SpanTests int index = span.IndexOf(value); Assert.Equal(-1, index); } + + public static IEnumerable IndexOfSubSeqData_Char() + { + // searchSpace, value, expected IndexOf value, expected LastIndexOf value + yield return new object[] { "11111", "111", 0, 2 }; + yield return new object[] { "1111111111", "1x1", -1, -1 }; + yield return new object[] { "1111111111", "111", 0, 7 }; + yield return new object[] { "11111111111x12111", "1x121", 10, 10 }; + yield return new object[] { "11111111111x12111", "11121", -1, -1 }; + yield return new object[] { "1111111111x121111", "11121", -1, -1 }; + yield return new object[] { "11111x12111111111", "11121", -1, -1 }; + yield return new object[] { "11111111111x12111", "1x211", -1, -1 }; + yield return new object[] { "11111111111x12111", "11211", -1, -1 }; + yield return new object[] { "1111111111x121111", "11211", -1, -1 }; + yield return new object[] { "11111x12111111111", "11211", -1, -1 }; + yield return new object[] { "11111111111x12111", "12111", 12, 12 }; + yield return new object[] { "1111111111x121111", "12111", 11, 11 }; + yield return new object[] { "11111x12111111111", "12111", 6, 6 }; + yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "11121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1111121", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1211211", -1, -1 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12", "1211111", 5, 5 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111", 0, 44 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111", 0, 43 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111", 7, 42 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111", 7, 41 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111", 7, 10 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111", 7, 9 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111", 7, 7 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1211", 5, 48 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11121", 44, 44 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "121111", 5, 19 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "12111211", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111", 7, 11 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121121111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111211111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111111211111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1121111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11122111112111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "1111111211111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "111211111111111111", -1, -1 }; + yield return new object[] { "1111x1211111111111x12111122131221221211221111112121121", "11111211111121111111", -1, -1 }; + yield return new object[] { "жжжжжжжжжжжжжж", "жжж", 0, 11 }; + yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "ж0ж", -1, -1 }; + yield return new object[] { "жжжжжаааааааааааааааччччс", "ччччс", 20, 20 }; + yield return new object[] { "жжжжжаааааааааааааааччччсссссссчччч", "чччч", 20, 31 }; + yield return new object[] { "жжжжжжжжжжжжжжжжжжжжжжжжжжжж", "1112", -1, -1 }; + yield return new object[] { "0уза0оцущ0оаз0щцуоазщцуо0азщцуоазщоц0узозцуоазуоцз0щауцз0оазцо", "0оаз0", 9, 9 }; + yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; + yield return new object[] { "abababababababababababababababb", "bb", 29, 29 }; + yield return new object[] { "abababababababababababababababbc", "bb", 29, 29 }; + yield return new object[] { "babababababababababababababababc", "bb", -1, -1 }; + yield return new object[] { "abababababababababababababababbb", "bbb", 29, 29 }; + yield return new object[] { "abababababababababababababababbbc", "bbb", 29, 29 }; + yield return new object[] { "bbbbabababababababababababababababc", "bbb", 0, 1 }; + yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababb", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababbc", "aa", -1, -1 }; + yield return new object[] { "babababababababababababababababc", "aa", -1, -1 }; + yield return new object[] { "abababababababababababababababbb", "aaa", -1, -1 }; + yield return new object[] { "abababababababababababababababbbc", "aaa", -1, -1 }; + yield return new object[] { "bbbbabababababababababababababababc", "aaa", -1, -1 }; + yield return new object[] { "ababababababababababababababababbc", "abaa", -1, -1 }; + yield return new object[] { "babbbabababababababababababababababc", "babb", 0, 0 }; + yield return new object[] { "babbbabababababababababababababababc", "сaсс", -1, -1 }; + yield return new object[] { "babbbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 15 }; + yield return new object[] { "babbbbbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 0, 17 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbbbbbbb", 18, 32 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "bbbbbbbbbbbbb", 20, 20 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbb", -1, -1 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", 0, 0 }; + yield return new object[] { "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbb", "babbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbb", 0, 0 }; + yield return new object[] { "xxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 60, 60 }; + yield return new object[] { "xxxxxxxxxxxxxxxbabbbbbbxbbbbbbbbbbabbbbbbbbbbbbbabbbbbbbbbbbbxxxxxxxxxxxxxx", "xxxxxxxxxxxxxxx", 0, 0 }; + } + + [Theory] + [MemberData(nameof(IndexOfSubSeqData_Char))] + public static void ValueStartsAndEndsWithTheSameChars(string searchSpace, string value, int expectedIndexOfValue, int expectedLastIndexOfValue) + { + Assert.Equal(expectedIndexOfValue, searchSpace.AsSpan().IndexOf(value)); + Assert.Equal(expectedLastIndexOfValue, searchSpace.AsSpan().LastIndexOf(value)); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs index b49dea6..3c7c13f 100644 --- a/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs +++ b/src/libraries/System.Private.CoreLib/src/System/MemoryExtensions.cs @@ -580,12 +580,25 @@ namespace System [MethodImpl(MethodImplOptions.AggressiveInlining)] public static int LastIndexOf(this ReadOnlySpan span, ReadOnlySpan value) where T : IEquatable { - if (Unsafe.SizeOf() == sizeof(byte) && RuntimeHelpers.IsBitwiseEquatable()) - return SpanHelpers.LastIndexOf( - ref Unsafe.As(ref MemoryMarshal.GetReference(span)), - span.Length, - ref Unsafe.As(ref MemoryMarshal.GetReference(value)), - value.Length); + if (RuntimeHelpers.IsBitwiseEquatable()) + { + if (Unsafe.SizeOf() == sizeof(byte)) + { + return SpanHelpers.LastIndexOf( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref Unsafe.As(ref MemoryMarshal.GetReference(value)), + value.Length); + } + if (Unsafe.SizeOf() == sizeof(char)) + { + return SpanHelpers.LastIndexOf( + ref Unsafe.As(ref MemoryMarshal.GetReference(span)), + span.Length, + ref Unsafe.As(ref MemoryMarshal.GetReference(value)), + value.Length); + } + } return SpanHelpers.LastIndexOf(ref MemoryMarshal.GetReference(span), span.Length, ref MemoryMarshal.GetReference(value), value.Length); } diff --git a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs index 6a7141c..419a1c9 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Numerics/BitOperations.cs @@ -708,5 +708,25 @@ namespace System.Numerics return (nuint)RotateRight((uint)value, offset); #endif } + + /// + /// Reset the lowest significant bit in the given value + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static uint ResetLowestSetBit(uint value) + { + // It's lowered to BLSR on x86 + return value & (value - 1); + } + + /// + /// Reset specific bit in the given value + /// + [MethodImpl(MethodImplOptions.AggressiveInlining)] + internal static uint ResetBit(uint value, int bitPos) + { + // TODO: Recognize BTR on x86 and LSL+BIC on ARM + return value & ~(uint)(1 << bitPos); + } } } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs index e94b1df..3a3bc58 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Byte.cs @@ -22,12 +22,21 @@ namespace System if (valueLength == 0) return 0; // A zero-length sequence is always treated as "found" at the start of the search space. - byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); int valueTailLength = valueLength - 1; - int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; + if (valueTailLength == 0) + return IndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain IndexOf int offset = 0; + byte valueHead = value; + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_BYTES; + } + + ref byte valueTail = ref Unsafe.Add(ref value, 1); + int remainingSearchSpaceLength = searchSpaceMinusValueTailLength; + while (remainingSearchSpaceLength > 0) { // Do a quick search for the first element of "value". @@ -42,13 +51,264 @@ namespace System break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, offset + 1), ref valueTail, (nuint)valueTailLength)) // The (nuint)-cast is necessary to pick the correct overload + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + 1), + ref valueTail, (nuint)(uint)valueTailLength)) // The (nuint)-cast is necessary to pick the correct overload return offset; // The tail matched. Return a successful find. remainingSearchSpaceLength--; offset++; } return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_BYTES: + if (Avx2.IsSupported && searchSpaceMinusValueTailLength - Vector256.Count >= 0) + { + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create(value); + Vector256 ch2 = Vector256.Create(ch2Val); + + do + { + Debug.Assert(offset >= 0); + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload + { + return offset + bitPos; + } + mask = BitOperations.ResetLowestSetBit(mask); // Clear the lowest set bit + } while (mask != 0); + } + offset += Vector256.Count; + + if (offset == searchSpaceMinusValueTailLength) + return -1; + + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector256.Count) + offset = searchSpaceMinusValueTailLength - Vector256.Count; + } while (true); + } + else // 128bit vector path (SSE2 or AdvSimd) + { + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create(value); + Vector128 ch2 = Vector128.Create(ch2Val); + + do + { + Debug.Assert(offset >= 0); + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector128.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + if (valueLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload + { + return offset + bitPos; + } + // Clear the lowest set bit + mask = BitOperations.ResetLowestSetBit(mask); + } while (mask != 0); + } + offset += Vector128.Count; + + if (offset == searchSpaceMinusValueTailLength) + return -1; + + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector128.Count) + offset = searchSpaceMinusValueTailLength - Vector128.Count; + } while (true); + } + } + + public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength) + { + Debug.Assert(searchSpaceLength >= 0); + Debug.Assert(valueLength >= 0); + + if (valueLength == 0) + return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. + + int valueTailLength = valueLength - 1; + if (valueTailLength == 0) + return LastIndexOf(ref searchSpace, value, searchSpaceLength); // for single-byte values use plain LastIndexOf + + int offset = 0; + byte valueHead = value; + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_BYTES; + } + + ref byte valueTail = ref Unsafe.Add(ref value, 1); + + while (true) + { + Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". + int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength; + if (remainingSearchSpaceLength <= 0) + break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. + + // Do a quick search for the first element of "value". + int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength); + if (relativeIndex < 0) + break; + + // Found the first element of "value". See if the tail matches. + if (SequenceEqual( + ref Unsafe.Add(ref searchSpace, relativeIndex + 1), + ref valueTail, (nuint)(uint)valueTailLength)) // The (nuint)-cast is necessary to pick the correct overload + return relativeIndex; // The tail matched. Return a successful find. + + offset += remainingSearchSpaceLength - relativeIndex; + } + return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_BYTES: + if (Avx2.IsSupported && searchSpaceMinusValueTailLength >= Vector256.Count) + { + offset = searchSpaceMinusValueTailLength - Vector256.Count; + + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create(value); + Vector256 ch2 = Vector256.Create(ch2Val); + do + { + Vector256 cmpCh1 = Vector256.Equals(ch1, Vector256.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, Vector256.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload + { + return bitPos + offset; + } + // Clear the highest set bit. + mask = BitOperations.ResetBit(mask, bitPos); + } while (mask != 0); + } + + offset -= Vector256.Count; + if (offset == -Vector256.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + } while (true); + } + else // 128bit vector path (SSE2 or AdvSimd) + { + offset = searchSpaceMinusValueTailLength - Vector128.Count; + + // Find the last unique (which is not equal to ch1) byte + // the algorithm is fine if both are equal, just a little bit less efficient + byte ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create(value); + Vector128 ch2 = Vector128.Create(ch2Val); + + do + { + Vector128 cmpCh1 = Vector128.Equals(ch1, Vector128.LoadUnsafe(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, Vector128.LoadUnsafe(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + // it's especially important for ARM where ExtractMostSignificantBits is not cheap + if (cmpAnd != Vector128.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 31 - BitOperations.LeadingZeroCount(mask); + if (valueLength == 2 || // we already matched two bytes + SequenceEqual( + ref Unsafe.Add(ref searchSpace, offset + bitPos), + ref value, (nuint)(uint)valueLength)) // The (nuint)-cast is necessary to pick the correct overload + { + return bitPos + offset; + } + // Clear the highest set bit. + mask = BitOperations.ResetBit(mask, bitPos); + } while (mask != 0); + } + + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + + } while (true); + } } // Adapted from IndexOf(...) @@ -408,40 +668,6 @@ namespace System return (int)(offset + 7); } - public static int LastIndexOf(ref byte searchSpace, int searchSpaceLength, ref byte value, int valueLength) - { - Debug.Assert(searchSpaceLength >= 0); - Debug.Assert(valueLength >= 0); - - if (valueLength == 0) - return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. - - byte valueHead = value; - ref byte valueTail = ref Unsafe.Add(ref value, 1); - int valueTailLength = valueLength - 1; - - int offset = 0; - while (true) - { - Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". - int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength; - if (remainingSearchSpaceLength <= 0) - break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. - - // Do a quick search for the first element of "value". - int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength); - if (relativeIndex < 0) - break; - - // Found the first element of "value". See if the tail matches. - if (SequenceEqual(ref Unsafe.Add(ref searchSpace, relativeIndex + 1), ref valueTail, (nuint)(uint)valueTailLength)) // The (nunit)-cast is necessary to pick the correct overload - return relativeIndex; // The tail matched. Return a successful find. - - offset += remainingSearchSpaceLength - relativeIndex; - } - return -1; - } - [MethodImpl(MethodImplOptions.AggressiveOptimization)] public static int LastIndexOf(ref byte searchSpace, byte value, int length) { @@ -1564,13 +1790,11 @@ namespace System // This becomes a conditional jmp foward to not favor it. goto NotEqual; } - // Use Vector128.Size as Vector128.Count doesn't inline at R2R time - // https://github.com/dotnet/runtime/issues/32714 - else if (length >= Vector128.Size) + else if (length >= (nuint)Vector128.Count) { Vector128 vecResult; nuint offset = 0; - nuint lengthToExamine = length - Vector128.Size; + nuint lengthToExamine = length - (nuint)Vector128.Count; // Unsigned, so it shouldn't have overflowed larger than length (rather than negative) Debug.Assert(lengthToExamine < length); if (lengthToExamine != 0) @@ -1584,7 +1808,7 @@ namespace System { goto NotEqual; } - offset += Vector128.Size; + offset += (nuint)Vector128.Count; } while (lengthToExamine > offset); } diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs index 753e530..fb00b06 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.Char.cs @@ -22,38 +22,315 @@ namespace System if (valueLength == 0) return 0; // A zero-length sequence is always treated as "found" at the start of the search space. - char valueHead = value; - ref char valueTail = ref Unsafe.Add(ref value, 1); int valueTailLength = valueLength - 1; - int remainingSearchSpaceLength = searchSpaceLength - valueTailLength; + if (valueTailLength == 0) + { + // for single-char values use plain IndexOf + return IndexOf(ref searchSpace, value, searchSpaceLength); + } + + int offset = 0; + char valueHead = value; + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_CHARS; + } + + ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); + int remainingSearchSpaceLength = searchSpaceMinusValueTailLength; - int index = 0; while (remainingSearchSpaceLength > 0) { // Do a quick search for the first element of "value". - int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, index), valueHead, remainingSearchSpaceLength); + int relativeIndex = IndexOf(ref Unsafe.Add(ref searchSpace, offset), valueHead, remainingSearchSpaceLength); if (relativeIndex < 0) break; remainingSearchSpaceLength -= relativeIndex; - index += relativeIndex; + offset += relativeIndex; if (remainingSearchSpaceLength <= 0) break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. // Found the first element of "value". See if the tail matches. if (SequenceEqual( - ref Unsafe.As(ref Unsafe.Add(ref searchSpace, index + 1)), - ref Unsafe.As(ref valueTail), - (nuint)(uint)valueTailLength * 2)) + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + 1)), + ref valueTail, + (nuint)(uint)valueTailLength * 2)) { - return index; // The tail matched. Return a successful find. + return offset; // The tail matched. Return a successful find. } remainingSearchSpaceLength--; - index++; + offset++; } return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_CHARS: + if (Avx2.IsSupported && searchSpaceMinusValueTailLength - Vector256.Count >= 0) + { + // Find the last unique (which is not equal to ch1) character + // the algorithm is fine if both are equal, just a little bit less efficient + ushort ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == valueHead && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create((ushort)valueHead); + Vector256 ch2 = Vector256.Create(ch2Val); + + do + { + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector256.Count <= searchSpaceLength); + + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, offset + ch1ch2Distance)); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueLength == 2 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + { + return offset + charPos; + } + + // Clear two the lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); + } while (mask != 0); + } + offset += Vector256.Count; + + if (offset == searchSpaceMinusValueTailLength) + return -1; + + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector256.Count) + offset = searchSpaceMinusValueTailLength - Vector256.Count; + } while (true); + } + else // 128bit vector path (SSE2 or AdvSimd) + { + // Find the last unique (which is not equal to ch1) character + // the algorithm is fine if both are equal, just a little bit less efficient + ushort ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == valueHead && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create((ushort)valueHead); + Vector128 ch2 = Vector128.Create(ch2Val); + + do + { + // Make sure we don't go out of bounds + Debug.Assert(offset + ch1ch2Distance + Vector128.Count <= searchSpaceLength); + + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, offset + ch1ch2Distance)); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector128.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + int bitPos = BitOperations.TrailingZeroCount(mask); + // div by 2 (shr) because we work with 2-byte chars + int charPos = (int)((uint)bitPos / 2); + if (valueLength == 2 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + { + return offset + charPos; + } + + // Clear two lowest set bits + if (Bmi1.IsSupported) + mask = Bmi1.ResetLowestSetBit(Bmi1.ResetLowestSetBit(mask)); + else + mask &= ~(uint)(0b11 << bitPos); + } while (mask != 0); + } + offset += Vector128.Count; + + if (offset == searchSpaceMinusValueTailLength) + return -1; + + // Overlap with the current chunk for trailing elements + if (offset > searchSpaceMinusValueTailLength - Vector128.Count) + offset = searchSpaceMinusValueTailLength - Vector128.Count; + } while (true); + } + } + + public static int LastIndexOf(ref char searchSpace, int searchSpaceLength, ref char value, int valueLength) + { + Debug.Assert(searchSpaceLength >= 0); + Debug.Assert(valueLength >= 0); + + if (valueLength == 0) + return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. + + int valueTailLength = valueLength - 1; + if (valueTailLength == 0) + return LastIndexOf(ref searchSpace, value, searchSpaceLength); // for single-char values use plain LastIndexOf + + int offset = 0; + char valueHead = value; + int searchSpaceMinusValueTailLength = searchSpaceLength - valueTailLength; + if (Vector128.IsHardwareAccelerated && searchSpaceMinusValueTailLength >= Vector128.Count) + { + goto SEARCH_TWO_CHARS; + } + + ref byte valueTail = ref Unsafe.As(ref Unsafe.Add(ref value, 1)); + + while (true) + { + Debug.Assert(0 <= offset && offset <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". + int remainingSearchSpaceLength = searchSpaceLength - offset - valueTailLength; + if (remainingSearchSpaceLength <= 0) + break; // The unsearched portion is now shorter than the sequence we're looking for. So it can't be there. + + // Do a quick search for the first element of "value". + int relativeIndex = LastIndexOf(ref searchSpace, valueHead, remainingSearchSpaceLength); + if (relativeIndex == -1) + break; + + // Found the first element of "value". See if the tail matches. + if (SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, relativeIndex + 1)), + ref valueTail, (nuint)(uint)valueTailLength * 2)) + { + return relativeIndex; // The tail matched. Return a successful find. + } + + offset += remainingSearchSpaceLength - relativeIndex; + } + return -1; + + // Based on http://0x80.pl/articles/simd-strfind.html#algorithm-1-generic-simd "Algorithm 1: Generic SIMD" by Wojciech Muła + // Some details about the implementation can also be found in https://github.com/dotnet/runtime/pull/63285 + SEARCH_TWO_CHARS: + if (Avx2.IsSupported && searchSpaceMinusValueTailLength >= Vector256.Count) + { + offset = searchSpaceMinusValueTailLength - Vector256.Count; + + // Find the last unique (which is not equal to ch1) char + // the algorithm is fine if both are equal, just a little bit less efficient + char ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == valueHead && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector256 ch1 = Vector256.Create((ushort)valueHead); + Vector256 ch2 = Vector256.Create((ushort)ch2Val); + + do + { + + Vector256 cmpCh1 = Vector256.Equals(ch1, LoadVector256(ref searchSpace, (nuint)offset)); + Vector256 cmpCh2 = Vector256.Equals(ch2, LoadVector256(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector256 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + if (cmpAnd != Vector256.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (valueLength == 2 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + { + return charPos + offset; + } + mask &= ~(uint)(0b11 << bitPos); // clear two highest set bits. + } while (mask != 0); + } + + offset -= Vector256.Count; + if (offset == -Vector256.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + } while (true); + } + else // 128bit vector path (SSE2 or AdvSimd) + { + offset = searchSpaceMinusValueTailLength - Vector128.Count; + + // Find the last unique (which is not equal to ch1) char + // the algorithm is fine if both are equal, just a little bit less efficient + char ch2Val = Unsafe.Add(ref value, valueTailLength); + int ch1ch2Distance = valueTailLength; + while (ch2Val == value && ch1ch2Distance > 1) + ch2Val = Unsafe.Add(ref value, --ch1ch2Distance); + + Vector128 ch1 = Vector128.Create((ushort)value); + Vector128 ch2 = Vector128.Create((ushort)ch2Val); + + do + { + Vector128 cmpCh1 = Vector128.Equals(ch1, LoadVector128(ref searchSpace, (nuint)offset)); + Vector128 cmpCh2 = Vector128.Equals(ch2, LoadVector128(ref searchSpace, (nuint)(offset + ch1ch2Distance))); + Vector128 cmpAnd = (cmpCh1 & cmpCh2).AsByte(); + + // Early out: cmpAnd is all zeros + // it's especially important for ARM where ExtractMostSignificantBits is not cheap + if (cmpAnd != Vector128.Zero) + { + uint mask = cmpAnd.ExtractMostSignificantBits(); + do + { + // unlike IndexOf, here we use LZCNT to process matches starting from the end + int bitPos = 30 - BitOperations.LeadingZeroCount(mask); + int charPos = (int)((uint)bitPos / 2); + + if (valueLength == 2 || // we already matched two chars + SequenceEqual( + ref Unsafe.As(ref Unsafe.Add(ref searchSpace, offset + charPos)), + ref Unsafe.As(ref value), (nuint)(uint)valueLength * 2)) + { + return charPos + offset; + } + mask &= ~(uint)(0b11 << bitPos); // clear two the highest set bits. + } while (mask != 0); + } + + offset -= Vector128.Count; + if (offset == -Vector128.Count) + return -1; + // Overlap with the current chunk if there is not enough room for the next one + if (offset < 0) + offset = 0; + } while (true); + } } [MethodImpl(MethodImplOptions.AggressiveOptimization)] diff --git a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs index 91d2a34..27944af 100644 --- a/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs +++ b/src/libraries/System.Private.CoreLib/src/System/SpanHelpers.T.cs @@ -774,11 +774,17 @@ namespace System if (valueLength == 0) return searchSpaceLength; // A zero-length sequence is always treated as "found" at the end of the search space. - T valueHead = value; - ref T valueTail = ref Unsafe.Add(ref value, 1); int valueTailLength = valueLength - 1; + if (valueTailLength == 0) + { + return LastIndexOf(ref searchSpace, value, searchSpaceLength); + } int index = 0; + + T valueHead = value; + ref T valueTail = ref Unsafe.Add(ref value, 1); + while (true) { Debug.Assert(0 <= index && index <= searchSpaceLength); // Ensures no deceptive underflows in the computation of "remainingSearchSpaceLength". -- 2.7.4