From 9d88a948ba3c18a9bea2217ad51828913701c2eb Mon Sep 17 00:00:00 2001 From: Carlos Sanchez Lopez <1175054+carlossanlop@users.noreply.github.com> Date: Mon, 20 Jul 2020 15:26:22 -0700 Subject: [PATCH] AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirstInvalidByte (#38653) * AdvSimd support for System.Text.Unicode.Utf8Utility.GetPointerToFirstInvalidByte * Move comment to the top, add shims. * Little endian checks * Use custom MoveMask method for AdvSimd * Address suggestions to improve the AdvSimdMoveMask method * Define initialMask outside MoveMask method * UInt64 in Arm64MoveMask * Add unit test case to verify intrinsics improvement * Avoid casting to smaller integer type * Typo and comment * Use ShiftRightArithmetic instead of CompareEqual + And. Remove test case causing other unit tests to fail. * Use AddPairwise version of GetNotAsciiBytes * Add missing shims causing Linux build to fail * Simplify GetNonAsciiBytes to only one AddPairwise call, shorter bitmask * Respect data type returned by masking method * Address suggestions - assert trailingzerocount and bring back uint mask * Trailing zeroes in AdvSimd need to be divided by 4, and total number should not be larger than 16 * Avoid declaring static field which causes PNSE in Utf8String.Experimental (S.P.Corelib code is used for being NetStandard) * Prefer using nuint for BitConverter.TrailingZeroCount --- .../System/Text/Unicode/Utf8Utility.Validation.cs | 64 ++++++++++++++++------ 1 file changed, 47 insertions(+), 17 deletions(-) diff --git a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs index 33e5181..7730708 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Text/Unicode/Utf8Utility.Validation.cs @@ -4,6 +4,8 @@ using System.Diagnostics; using System.Numerics; using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; using System.Runtime.Intrinsics.X86; #if SYSTEM_PRIVATE_CORELIB @@ -117,22 +119,35 @@ namespace System.Text.Unicode // the alignment check consumes at most a single DWORD.) byte* pInputBufferFinalPosAtWhichCanSafelyLoop = pFinalPosWhereCanReadDWordFromInputBuffer - 3 * sizeof(uint); // can safely read 4 DWORDs here - uint mask; + nuint trailingZeroCount; + + Vector128 bitMask128 = BitConverter.IsLittleEndian ? + Vector128.Create((ushort)0x1001).AsByte() : + Vector128.Create((ushort)0x0110).AsByte(); do { - if (Sse2.IsSupported) + // pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're + // going to perform an unaligned load. We don't necessarily care about aligning + // this because we pessimistically assume we'll encounter non-ASCII data at some + // point in the not-too-distant future (otherwise we would've stayed entirely + // within the all-ASCII vectorized code at the entry to this method). + if (AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) { - // pInputBuffer is 32-bit aligned but not necessary 128-bit aligned, so we're - // going to perform an unaligned load. We don't necessarily care about aligning - // this because we pessimistically assume we'll encounter non-ASCII data at some - // point in the not-too-distant future (otherwise we would've stayed entirely - // within the all-ASCII vectorized code at the entry to this method). - - mask = (uint)Sse2.MoveMask(Sse2.LoadVector128((byte*)pInputBuffer)); + ulong mask = GetNonAsciiBytes(AdvSimd.LoadVector128(pInputBuffer), bitMask128); + if (mask != 0) + { + trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask) >> 2; + goto LoopTerminatedEarlyDueToNonAsciiData; + } + } + else if (Sse2.IsSupported) + { + uint mask = (uint)Sse2.MoveMask(Sse2.LoadVector128(pInputBuffer)); if (mask != 0) { - goto Sse2LoopTerminatedEarlyDueToNonAsciiData; + trailingZeroCount = (nuint)BitOperations.TrailingZeroCount(mask); + goto LoopTerminatedEarlyDueToNonAsciiData; } } else @@ -153,19 +168,20 @@ namespace System.Text.Unicode continue; // need to perform a bounds check because we might be running out of data - Sse2LoopTerminatedEarlyDueToNonAsciiData: + LoopTerminatedEarlyDueToNonAsciiData: + // x86 can only be little endian, while ARM can be big or little endian + // so if we reached this label we need to check both combinations are supported + Debug.Assert((AdvSimd.Arm64.IsSupported && BitConverter.IsLittleEndian) || Sse2.IsSupported); - Debug.Assert(BitConverter.IsLittleEndian); - Debug.Assert(Sse2.IsSupported); // The 'mask' value will have a 0 bit for each ASCII byte we saw and a 1 bit - // for each non-ASCII byte we saw. We can count the number of ASCII bytes, + // for each non-ASCII byte we saw. trailingZeroCount will count the number of ASCII bytes, // bump our input counter by that amount, and resume processing from the // "the first byte is no longer ASCII" portion of the main loop. + // We should not expect a total number of zeroes equal or larger than 16. + Debug.Assert(trailingZeroCount < 16); - Debug.Assert(mask != 0); - - pInputBuffer += BitOperations.TrailingZeroCount(mask); + pInputBuffer += trailingZeroCount; if (pInputBuffer > pFinalPosWhereCanReadDWordFromInputBuffer) { goto ProcessRemainingBytesSlow; @@ -719,5 +735,19 @@ namespace System.Text.Unicode scalarCountAdjustment = tempScalarCountAdjustment; return pInputBuffer; } + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + private static ulong GetNonAsciiBytes(Vector128 value, Vector128 bitMask128) + { + if (!AdvSimd.Arm64.IsSupported || !BitConverter.IsLittleEndian) + { + throw new PlatformNotSupportedException(); + } + + Vector128 mostSignificantBitIsSet = AdvSimd.ShiftRightArithmetic(value.AsSByte(), 7).AsByte(); + Vector128 extractedBits = AdvSimd.And(mostSignificantBitIsSet, bitMask128); + extractedBits = AdvSimd.Arm64.AddPairwise(extractedBits, extractedBits); + return extractedBits.AsUInt64().ToScalar(); + } } } -- 2.7.4