{
internal static bool IsVectorizationSupported => Ssse3.IsSupported || AdvSimd.Arm64.IsSupported || PackedSimd.IsSupported;
- internal static unsafe void ComputeBitmap256(ReadOnlySpan<byte> values, out Vector128<byte> bitmap0, out Vector128<byte> bitmap1, out BitVector256 lookup)
+ internal static unsafe void ComputeBitmap256(ReadOnlySpan<byte> values, out Vector256<byte> bitmap0, out Vector256<byte> bitmap1, out BitVector256 lookup)
{
// The exact format of these bitmaps differs from the other ComputeBitmap overloads as it's meant for the full [0, 255] range algorithm.
// See http://0x80.pl/articles/simd-byte-lookup.html#universal-algorithm
}
}
- bitmap0 = bitmapSpace0;
- bitmap1 = bitmapSpace1;
+ bitmap0 = Vector256.Create(bitmapSpace0, bitmapSpace0);
+ bitmap1 = Vector256.Create(bitmapSpace1, bitmapSpace1);
lookup = lookupLocal;
}
- internal static unsafe void ComputeBitmap<T>(ReadOnlySpan<T> values, out Vector128<byte> bitmap, out BitVector256 lookup)
+ internal static unsafe void ComputeBitmap<T>(ReadOnlySpan<T> values, out Vector256<byte> bitmap, out BitVector256 lookup)
where T : struct, IUnsignedNumber<T>
{
Debug.Assert(typeof(T) == typeof(byte) || typeof(T) == typeof(char));
bitmapLocal[(uint)lowNibble] |= (byte)(1 << highNibble);
}
- bitmap = bitmapSpace;
+ bitmap = Vector256.Create(bitmapSpace, bitmapSpace);
lookup = lookupLocal;
}
Vector128<byte> bitmap = default;
if (TryComputeBitmap(asciiValues, (byte*)&bitmap, out bool needleContainsZero))
{
+ Vector256<byte> bitmap256 = Vector256.Create(bitmap, bitmap);
+
index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero
- ? IndexOfAnyVectorized<TNegator, Ssse3AndWasmHandleZeroInNeedle>(ref searchSpace, searchSpaceLength, bitmap)
- : IndexOfAnyVectorized<TNegator, Default>(ref searchSpace, searchSpaceLength, bitmap);
+ ? IndexOfAnyVectorized<TNegator, Ssse3AndWasmHandleZeroInNeedle>(ref searchSpace, searchSpaceLength, ref bitmap256)
+ : IndexOfAnyVectorized<TNegator, Default>(ref searchSpace, searchSpaceLength, ref bitmap256);
return true;
}
}
Vector128<byte> bitmap = default;
if (TryComputeBitmap(asciiValues, (byte*)&bitmap, out bool needleContainsZero))
{
+ Vector256<byte> bitmap256 = Vector256.Create(bitmap, bitmap);
+
index = (Ssse3.IsSupported || PackedSimd.IsSupported) && needleContainsZero
- ? LastIndexOfAnyVectorized<TNegator, Ssse3AndWasmHandleZeroInNeedle>(ref searchSpace, searchSpaceLength, bitmap)
- : LastIndexOfAnyVectorized<TNegator, Default>(ref searchSpace, searchSpaceLength, bitmap);
+ ? LastIndexOfAnyVectorized<TNegator, Ssse3AndWasmHandleZeroInNeedle>(ref searchSpace, searchSpaceLength, ref bitmap256)
+ : LastIndexOfAnyVectorized<TNegator, Default>(ref searchSpace, searchSpaceLength, ref bitmap256);
return true;
}
}
return false;
}
- internal static int IndexOfAnyVectorized<TNegator, TOptimizations>(ref short searchSpace, int searchSpaceLength, Vector128<byte> bitmap)
+ internal static int IndexOfAnyVectorized<TNegator, TOptimizations>(ref short searchSpace, int searchSpaceLength, ref Vector256<byte> bitmapRef)
where TNegator : struct, INegator
where TOptimizations : struct, IOptimizations
{
ref short currentSearchSpace = ref searchSpace;
- if (searchSpaceLength > 2 * Vector128<short>.Count)
+ if (Avx2.IsSupported && searchSpaceLength > 2 * Vector128<short>.Count)
{
- if (Avx2.IsSupported)
- {
- Vector256<byte> bitmap256 = Vector256.Create(bitmap, bitmap);
+ Vector256<byte> bitmap256 = bitmapRef;
- if (searchSpaceLength > 2 * Vector256<short>.Count)
- {
- // Process the input in chunks of 32 characters (2 * Vector256<short>).
- // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256<byte>.
- // As packing two Vector256<short>s into a Vector256<byte> is cheap compared to the lookup, we can effectively double the throughput.
- // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
- ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector256<short>.Count));
-
- do
- {
- Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
- Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
-
- Vector256<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap256);
- if (result != Vector256<byte>.Zero)
- {
- return ComputeFirstIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
-
- currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256<short>.Count);
- }
- while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
- }
+ if (searchSpaceLength > 2 * Vector256<short>.Count)
+ {
+ // Process the input in chunks of 32 characters (2 * Vector256<short>).
+ // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256<byte>.
+ // As packing two Vector256<short>s into a Vector256<byte> is cheap compared to the lookup, we can effectively double the throughput.
+ // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
+ ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector256<short>.Count));
- // We have 1-32 characters remaining. Process the first and last vector in the search space.
- // They may overlap, but we'll handle that in the index calculation if we do get a match.
- Debug.Assert(searchSpaceLength >= Vector256<short>.Count, "We expect that the input is long enough for us to load a whole vector.");
+ do
{
- ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256<short>.Count);
-
- ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
- ? ref oneVectorAwayFromEnd
- : ref currentSearchSpace;
-
- Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
- Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
+ Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
+ Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
Vector256<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap256);
if (result != Vector256<byte>.Zero)
{
- return ComputeFirstIndexOverlapped<short, TNegator>(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
+ return ComputeFirstIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- }
- return -1;
+ currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector256<short>.Count);
+ }
+ while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
- else
+
+ // We have 1-32 characters remaining. Process the first and last vector in the search space.
+ // They may overlap, but we'll handle that in the index calculation if we do get a match.
+ Debug.Assert(searchSpaceLength >= Vector256<short>.Count, "We expect that the input is long enough for us to load a whole vector.");
{
- // Process the input in chunks of 16 characters (2 * Vector128<short>).
- // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128<byte>.
- // As packing two Vector128<short>s into a Vector128<byte> is cheap compared to the lookup, we can effectively double the throughput.
- // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
- ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector128<short>.Count));
+ ref short oneVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256<short>.Count);
- do
+ ref short firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAwayFromEnd)
+ ? ref oneVectorAwayFromEnd
+ : ref currentSearchSpace;
+
+ Vector256<short> source0 = Vector256.LoadUnsafe(ref firstVector);
+ Vector256<short> source1 = Vector256.LoadUnsafe(ref oneVectorAwayFromEnd);
+
+ Vector256<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap256);
+ if (result != Vector256<byte>.Zero)
{
- Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
- Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
+ return ComputeFirstIndexOverlapped<short, TNegator>(ref searchSpace, ref firstVector, ref oneVectorAwayFromEnd, result);
+ }
+ }
- Vector128<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap);
- if (result != Vector128<byte>.Zero)
- {
- return ComputeFirstIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
+ return -1;
+ }
+
+ Vector128<byte> bitmap = bitmapRef._lower;
- currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128<short>.Count);
+ if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128<short>.Count)
+ {
+ // Process the input in chunks of 16 characters (2 * Vector128<short>).
+ // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128<byte>.
+ // As packing two Vector128<short>s into a Vector128<byte> is cheap compared to the lookup, we can effectively double the throughput.
+ // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
+ ref short twoVectorsAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - (2 * Vector128<short>.Count));
+
+ do
+ {
+ Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
+ Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
+
+ Vector128<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap);
+ if (result != Vector128<byte>.Zero)
+ {
+ return ComputeFirstIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
+
+ currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, 2 * Vector128<short>.Count);
}
+ while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref twoVectorsAwayFromEnd));
}
// We have 1-16 characters remaining. Process the first and last vector in the search space.
return -1;
}
- internal static int LastIndexOfAnyVectorized<TNegator, TOptimizations>(ref short searchSpace, int searchSpaceLength, Vector128<byte> bitmap)
+ internal static int LastIndexOfAnyVectorized<TNegator, TOptimizations>(ref short searchSpace, int searchSpaceLength, ref Vector256<byte> bitmapRef)
where TNegator : struct, INegator
where TOptimizations : struct, IOptimizations
{
ref short currentSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
- if (searchSpaceLength > 2 * Vector128<short>.Count)
+ if (Avx2.IsSupported && searchSpaceLength > 2 * Vector128<short>.Count)
{
- if (Avx2.IsSupported)
- {
- Vector256<byte> bitmap256 = Vector256.Create(bitmap, bitmap);
-
- if (searchSpaceLength > 2 * Vector256<short>.Count)
- {
- // Process the input in chunks of 32 characters (2 * Vector256<short>).
- // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256<byte>.
- // As packing two Vector256<short>s into a Vector256<byte> is cheap compared to the lookup, we can effectively double the throughput.
- // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
- ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector256<short>.Count);
-
- do
- {
- currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector256<short>.Count);
+ Vector256<byte> bitmap256 = bitmapRef;
- Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
- Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
-
- Vector256<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap256);
- if (result != Vector256<byte>.Zero)
- {
- return ComputeLastIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
- }
- while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart));
- }
+ if (searchSpaceLength > 2 * Vector256<short>.Count)
+ {
+ // Process the input in chunks of 32 characters (2 * Vector256<short>).
+ // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector256<byte>.
+ // As packing two Vector256<short>s into a Vector256<byte> is cheap compared to the lookup, we can effectively double the throughput.
+ // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
+ ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector256<short>.Count);
- // We have 1-32 characters remaining. Process the first and last vector in the search space.
- // They may overlap, but we'll handle that in the index calculation if we do get a match.
- Debug.Assert(searchSpaceLength >= Vector256<short>.Count, "We expect that the input is long enough for us to load a whole vector.");
+ do
{
- ref short oneVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256<short>.Count);
+ currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector256<short>.Count);
- ref short secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAfterStart)
- ? ref Unsafe.Subtract(ref currentSearchSpace, Vector256<short>.Count)
- : ref searchSpace;
-
- Vector256<short> source0 = Vector256.LoadUnsafe(ref searchSpace);
- Vector256<short> source1 = Vector256.LoadUnsafe(ref secondVector);
+ Vector256<short> source0 = Vector256.LoadUnsafe(ref currentSearchSpace);
+ Vector256<short> source1 = Vector256.LoadUnsafe(ref currentSearchSpace, (nuint)Vector256<short>.Count);
Vector256<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap256);
if (result != Vector256<byte>.Zero)
{
- return ComputeLastIndexOverlapped<short, TNegator>(ref searchSpace, ref secondVector, result);
+ return ComputeLastIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
}
-
- return -1;
+ while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart));
}
- else
+
+ // We have 1-32 characters remaining. Process the first and last vector in the search space.
+ // They may overlap, but we'll handle that in the index calculation if we do get a match.
+ Debug.Assert(searchSpaceLength >= Vector256<short>.Count, "We expect that the input is long enough for us to load a whole vector.");
{
- // Process the input in chunks of 16 characters (2 * Vector128<short>).
- // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128<byte>.
- // As packing two Vector128<short>s into a Vector128<byte> is cheap compared to the lookup, we can effectively double the throughput.
- // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
- ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector128<short>.Count);
+ ref short oneVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256<short>.Count);
- do
+ ref short secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref oneVectorAfterStart)
+ ? ref Unsafe.Subtract(ref currentSearchSpace, Vector256<short>.Count)
+ : ref searchSpace;
+
+ Vector256<short> source0 = Vector256.LoadUnsafe(ref searchSpace);
+ Vector256<short> source1 = Vector256.LoadUnsafe(ref secondVector);
+
+ Vector256<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap256);
+ if (result != Vector256<byte>.Zero)
{
- currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector128<short>.Count);
+ return ComputeLastIndexOverlapped<short, TNegator>(ref searchSpace, ref secondVector, result);
+ }
+ }
- Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
- Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
+ return -1;
+ }
- Vector128<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap);
- if (result != Vector128<byte>.Zero)
- {
- return ComputeLastIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
+ Vector128<byte> bitmap = bitmapRef._lower;
+
+ if (!Avx2.IsSupported && searchSpaceLength > 2 * Vector128<short>.Count)
+ {
+ // Process the input in chunks of 16 characters (2 * Vector128<short>).
+ // We're mainly interested in a single byte of each character, and the core lookup operates on a Vector128<byte>.
+ // As packing two Vector128<short>s into a Vector128<byte> is cheap compared to the lookup, we can effectively double the throughput.
+ // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
+ ref short twoVectorsAfterStart = ref Unsafe.Add(ref searchSpace, 2 * Vector128<short>.Count);
+
+ do
+ {
+ currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, 2 * Vector128<short>.Count);
+
+ Vector128<short> source0 = Vector128.LoadUnsafe(ref currentSearchSpace);
+ Vector128<short> source1 = Vector128.LoadUnsafe(ref currentSearchSpace, (nuint)Vector128<short>.Count);
+
+ Vector128<byte> result = IndexOfAnyLookup<TNegator, TOptimizations>(source0, source1, bitmap);
+ if (result != Vector128<byte>.Zero)
+ {
+ return ComputeLastIndex<short, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart));
}
+ while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref twoVectorsAfterStart));
}
// We have 1-16 characters remaining. Process the first and last vector in the search space.
return -1;
}
- internal static int IndexOfAnyVectorized<TNegator>(ref byte searchSpace, int searchSpaceLength, Vector128<byte> bitmap)
+ internal static int IndexOfAnyVectorized<TNegator>(ref byte searchSpace, int searchSpaceLength, ref Vector256<byte> bitmapRef)
where TNegator : struct, INegator
{
ref byte currentSearchSpace = ref searchSpace;
- if (searchSpaceLength > Vector128<byte>.Count)
+ if (Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
{
- if (Avx2.IsSupported)
+ Vector256<byte> bitmap256 = bitmapRef;
+
+ if (searchSpaceLength > Vector256<byte>.Count)
{
- Vector256<byte> bitmap256 = Vector256.Create(bitmap, bitmap);
+ // Process the input in chunks of 32 bytes.
+ // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
+ ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256<byte>.Count);
- if (searchSpaceLength > Vector256<byte>.Count)
+ do
{
- // Process the input in chunks of 32 bytes.
- // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
- ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256<byte>.Count);
+ Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
- do
+ Vector256<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
+ if (result != Vector256<byte>.Zero)
{
- Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
-
- Vector256<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
- if (result != Vector256<byte>.Zero)
- {
- return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
-
- currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256<byte>.Count);
+ return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
+
+ currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256<byte>.Count);
}
+ while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
+ }
- // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
- // They may overlap, but we'll handle that in the index calculation if we do get a match.
- Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
- {
- ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
+ // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
+ // They may overlap, but we'll handle that in the index calculation if we do get a match.
+ Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
+ {
+ ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
- ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd)
- ? ref halfVectorAwayFromEnd
- : ref currentSearchSpace;
+ ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd)
+ ? ref halfVectorAwayFromEnd
+ : ref currentSearchSpace;
- Vector128<byte> source0 = Vector128.LoadUnsafe(ref firstVector);
- Vector128<byte> source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd);
- Vector256<byte> source = Vector256.Create(source0, source1);
+ Vector128<byte> source0 = Vector128.LoadUnsafe(ref firstVector);
+ Vector128<byte> source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd);
+ Vector256<byte> source = Vector256.Create(source0, source1);
- Vector256<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
- if (result != Vector256<byte>.Zero)
- {
- return ComputeFirstIndexOverlapped<byte, TNegator>(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result);
- }
+ Vector256<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
+ if (result != Vector256<byte>.Zero)
+ {
+ return ComputeFirstIndexOverlapped<byte, TNegator>(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result);
}
-
- return -1;
}
- else
- {
- // Process the input in chunks of 16 bytes.
- // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
- ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
- do
- {
- Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+ return -1;
+ }
- Vector128<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap));
- if (result != Vector128<byte>.Zero)
- {
- return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
+ Vector128<byte> bitmap = bitmapRef._lower;
+
+ if (!Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
+ {
+ // Process the input in chunks of 16 bytes.
+ // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
+ ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
- currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector128<byte>.Count);
+ do
+ {
+ Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+
+ Vector128<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap));
+ if (result != Vector128<byte>.Zero)
+ {
+ return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
+
+ currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector128<byte>.Count);
}
+ while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
}
// We have 1-16 bytes remaining. Process the first and last half vectors in the search space.
return -1;
}
- internal static int LastIndexOfAnyVectorized<TNegator>(ref byte searchSpace, int searchSpaceLength, Vector128<byte> bitmap)
+ internal static int LastIndexOfAnyVectorized<TNegator>(ref byte searchSpace, int searchSpaceLength, ref Vector256<byte> bitmapRef)
where TNegator : struct, INegator
{
ref byte currentSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
- if (searchSpaceLength > Vector128<byte>.Count)
+ if (Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
{
- if (Avx2.IsSupported)
- {
- Vector256<byte> bitmap256 = Vector256.Create(bitmap, bitmap);
-
- if (searchSpaceLength > Vector256<byte>.Count)
- {
- // Process the input in chunks of 32 bytes.
- // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
- ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256<byte>.Count);
-
- do
- {
- currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector256<byte>.Count);
+ Vector256<byte> bitmap256 = bitmapRef;
- Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
-
- Vector256<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
- if (result != Vector256<byte>.Zero)
- {
- return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
- }
- while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
- }
+ if (searchSpaceLength > Vector256<byte>.Count)
+ {
+ // Process the input in chunks of 32 bytes.
+ // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
+ ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256<byte>.Count);
- // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
- // They may overlap, but we'll handle that in the index calculation if we do get a match.
- Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
+ do
{
- ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
+ currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector256<byte>.Count);
- ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart)
- ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count)
- : ref searchSpace;
-
- Vector128<byte> source0 = Vector128.LoadUnsafe(ref searchSpace);
- Vector128<byte> source1 = Vector128.LoadUnsafe(ref secondVector);
- Vector256<byte> source = Vector256.Create(source0, source1);
+ Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
if (result != Vector256<byte>.Zero)
{
- return ComputeLastIndexOverlapped<byte, TNegator>(ref searchSpace, ref secondVector, result);
+ return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
}
-
- return -1;
+ while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
}
- else
+
+ // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
+ // They may overlap, but we'll handle that in the index calculation if we do get a match.
+ Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
{
- // Process the input in chunks of 16 bytes.
- // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
- ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
+ ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
- do
+ ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart)
+ ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count)
+ : ref searchSpace;
+
+ Vector128<byte> source0 = Vector128.LoadUnsafe(ref searchSpace);
+ Vector128<byte> source1 = Vector128.LoadUnsafe(ref secondVector);
+ Vector256<byte> source = Vector256.Create(source0, source1);
+
+ Vector256<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap256));
+ if (result != Vector256<byte>.Zero)
{
- currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count);
+ return ComputeLastIndexOverlapped<byte, TNegator>(ref searchSpace, ref secondVector, result);
+ }
+ }
- Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+ return -1;
+ }
- Vector128<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap));
- if (result != Vector128<byte>.Zero)
- {
- return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
+ Vector128<byte> bitmap = bitmapRef._lower;
+
+ if (!Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
+ {
+ // Process the input in chunks of 16 bytes.
+ // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
+ ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
+
+ do
+ {
+ currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count);
+
+ Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+
+ Vector128<byte> result = TNegator.NegateIfNeeded(IndexOfAnyLookupCore(source, bitmap));
+ if (result != Vector128<byte>.Zero)
+ {
+ return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
}
+ while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
}
// We have 1-16 bytes remaining. Process the first and last half vectors in the search space.
return -1;
}
- internal static int IndexOfAnyVectorized<TNegator>(ref byte searchSpace, int searchSpaceLength, Vector128<byte> bitmap0, Vector128<byte> bitmap1)
+ internal static int IndexOfAnyVectorizedAnyByte<TNegator>(ref byte searchSpace, int searchSpaceLength, ref Vector512<byte> bitmapsRef)
where TNegator : struct, INegator
{
ref byte currentSearchSpace = ref searchSpace;
- if (searchSpaceLength > Vector128<byte>.Count)
+ if (Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
{
- if (Avx2.IsSupported)
+ Vector256<byte> bitmap256_0 = bitmapsRef._lower;
+ Vector256<byte> bitmap256_1 = bitmapsRef._upper;
+
+ if (searchSpaceLength > Vector256<byte>.Count)
{
- Vector256<byte> bitmap256_0 = Vector256.Create(bitmap0, bitmap0);
- Vector256<byte> bitmap256_1 = Vector256.Create(bitmap1, bitmap1);
+ // Process the input in chunks of 32 bytes.
+ // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
+ ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256<byte>.Count);
- if (searchSpaceLength > Vector256<byte>.Count)
+ do
{
- // Process the input in chunks of 32 bytes.
- // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
- ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector256<byte>.Count);
+ Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
- do
+ Vector256<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap256_0, bitmap256_1);
+ if (result != Vector256<byte>.Zero)
{
- Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
-
- Vector256<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap256_0, bitmap256_1);
- if (result != Vector256<byte>.Zero)
- {
- return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
-
- currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256<byte>.Count);
+ return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
+
+ currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector256<byte>.Count);
}
+ while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
+ }
- // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
- // They may overlap, but we'll handle that in the index calculation if we do get a match.
- Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
- {
- ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
+ // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
+ // They may overlap, but we'll handle that in the index calculation if we do get a match.
+ Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
+ {
+ ref byte halfVectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
- ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd)
- ? ref halfVectorAwayFromEnd
- : ref currentSearchSpace;
+ ref byte firstVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAwayFromEnd)
+ ? ref halfVectorAwayFromEnd
+ : ref currentSearchSpace;
- Vector128<byte> source0 = Vector128.LoadUnsafe(ref firstVector);
- Vector128<byte> source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd);
- Vector256<byte> source = Vector256.Create(source0, source1);
+ Vector128<byte> source0 = Vector128.LoadUnsafe(ref firstVector);
+ Vector128<byte> source1 = Vector128.LoadUnsafe(ref halfVectorAwayFromEnd);
+ Vector256<byte> source = Vector256.Create(source0, source1);
- Vector256<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap256_0, bitmap256_1);
- if (result != Vector256<byte>.Zero)
- {
- return ComputeFirstIndexOverlapped<byte, TNegator>(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result);
- }
+ Vector256<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap256_0, bitmap256_1);
+ if (result != Vector256<byte>.Zero)
+ {
+ return ComputeFirstIndexOverlapped<byte, TNegator>(ref searchSpace, ref firstVector, ref halfVectorAwayFromEnd, result);
}
-
- return -1;
}
- else
- {
- // Process the input in chunks of 16 bytes.
- // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
- ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
- do
- {
- Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+ return -1;
+ }
- Vector128<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap0, bitmap1);
- if (result != Vector128<byte>.Zero)
- {
- return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
+ Vector128<byte> bitmap0 = bitmapsRef._lower._lower;
+ Vector128<byte> bitmap1 = bitmapsRef._upper._lower;
+
+ if (!Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
+ {
+ // Process the input in chunks of 16 bytes.
+ // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressLessThan" is used instead of "!IsAddressGreaterThan".
+ ref byte vectorAwayFromEnd = ref Unsafe.Add(ref searchSpace, searchSpaceLength - Vector128<byte>.Count);
- currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector128<byte>.Count);
+ do
+ {
+ Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+
+ Vector128<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap0, bitmap1);
+ if (result != Vector128<byte>.Zero)
+ {
+ return ComputeFirstIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
+
+ currentSearchSpace = ref Unsafe.Add(ref currentSearchSpace, Vector128<byte>.Count);
}
+ while (Unsafe.IsAddressLessThan(ref currentSearchSpace, ref vectorAwayFromEnd));
}
// We have 1-16 bytes remaining. Process the first and last half vectors in the search space.
return -1;
}
- internal static int LastIndexOfAnyVectorized<TNegator>(ref byte searchSpace, int searchSpaceLength, Vector128<byte> bitmap0, Vector128<byte> bitmap1)
+ internal static int LastIndexOfAnyVectorizedAnyByte<TNegator>(ref byte searchSpace, int searchSpaceLength, ref Vector512<byte> bitmapsRef)
where TNegator : struct, INegator
{
ref byte currentSearchSpace = ref Unsafe.Add(ref searchSpace, searchSpaceLength);
- if (searchSpaceLength > Vector128<byte>.Count)
+ if (Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
{
- if (Avx2.IsSupported)
- {
- Vector256<byte> bitmap256_0 = Vector256.Create(bitmap0, bitmap0);
- Vector256<byte> bitmap256_1 = Vector256.Create(bitmap1, bitmap1);
-
- if (searchSpaceLength > Vector256<byte>.Count)
- {
- // Process the input in chunks of 32 bytes.
- // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
- ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256<byte>.Count);
-
- do
- {
- currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector256<byte>.Count);
-
- Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
+ Vector256<byte> bitmap256_0 = bitmapsRef._lower;
+ Vector256<byte> bitmap256_1 = bitmapsRef._upper;
- Vector256<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap256_0, bitmap256_1);
- if (result != Vector256<byte>.Zero)
- {
- return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
- }
- while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
- }
+ if (searchSpaceLength > Vector256<byte>.Count)
+ {
+ // Process the input in chunks of 32 bytes.
+ // If the input length is a multiple of 32, don't consume the last 32 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
+ ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector256<byte>.Count);
- // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
- // They may overlap, but we'll handle that in the index calculation if we do get a match.
- Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
+ do
{
- ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
-
- ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart)
- ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count)
- : ref searchSpace;
+ currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector256<byte>.Count);
- Vector128<byte> source0 = Vector128.LoadUnsafe(ref searchSpace);
- Vector128<byte> source1 = Vector128.LoadUnsafe(ref secondVector);
- Vector256<byte> source = Vector256.Create(source0, source1);
+ Vector256<byte> source = Vector256.LoadUnsafe(ref currentSearchSpace);
Vector256<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap256_0, bitmap256_1);
if (result != Vector256<byte>.Zero)
{
- return ComputeLastIndexOverlapped<byte, TNegator>(ref searchSpace, ref secondVector, result);
+ return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
}
-
- return -1;
+ while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
}
- else
+
+ // We have 1-32 bytes remaining. Process the first and last half vectors in the search space.
+ // They may overlap, but we'll handle that in the index calculation if we do get a match.
+ Debug.Assert(searchSpaceLength >= Vector128<byte>.Count, "We expect that the input is long enough for us to load a Vector128.");
{
- // Process the input in chunks of 16 bytes.
- // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
- // Let the fallback below handle it instead. This is why the condition is
- // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
- ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
+ ref byte halfVectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
- do
+ ref byte secondVector = ref Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref halfVectorAfterStart)
+ ? ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count)
+ : ref searchSpace;
+
+ Vector128<byte> source0 = Vector128.LoadUnsafe(ref searchSpace);
+ Vector128<byte> source1 = Vector128.LoadUnsafe(ref secondVector);
+ Vector256<byte> source = Vector256.Create(source0, source1);
+
+ Vector256<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap256_0, bitmap256_1);
+ if (result != Vector256<byte>.Zero)
{
- currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count);
+ return ComputeLastIndexOverlapped<byte, TNegator>(ref searchSpace, ref secondVector, result);
+ }
+ }
- Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+ return -1;
+ }
- Vector128<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap0, bitmap1);
- if (result != Vector128<byte>.Zero)
- {
- return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
- }
+ Vector128<byte> bitmap0 = bitmapsRef._lower._lower;
+ Vector128<byte> bitmap1 = bitmapsRef._upper._lower;
+
+ if (!Avx2.IsSupported && searchSpaceLength > Vector128<byte>.Count)
+ {
+ // Process the input in chunks of 16 bytes.
+ // If the input length is a multiple of 16, don't consume the last 16 characters in this loop.
+ // Let the fallback below handle it instead. This is why the condition is
+ // ">" instead of ">=" above, and why "IsAddressGreaterThan" is used instead of "!IsAddressLessThan".
+ ref byte vectorAfterStart = ref Unsafe.Add(ref searchSpace, Vector128<byte>.Count);
+
+ do
+ {
+ currentSearchSpace = ref Unsafe.Subtract(ref currentSearchSpace, Vector128<byte>.Count);
+
+ Vector128<byte> source = Vector128.LoadUnsafe(ref currentSearchSpace);
+
+ Vector128<byte> result = IndexOfAnyLookup<TNegator>(source, bitmap0, bitmap1);
+ if (result != Vector128<byte>.Zero)
+ {
+ return ComputeLastIndex<byte, TNegator>(ref searchSpace, ref currentSearchSpace, result);
}
- while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
}
+ while (Unsafe.IsAddressGreaterThan(ref currentSearchSpace, ref vectorAfterStart));
}
// We have 1-16 bytes remaining. Process the first and last half vectors in the search space.