Vectorize Convert.FromHexString (#82521)
authorEgor Bogatov <egorbo@gmail.com>
Fri, 31 Mar 2023 19:02:21 +0000 (21:02 +0200)
committerGitHub <noreply@github.com>
Fri, 31 Mar 2023 19:02:21 +0000 (21:02 +0200)
Co-authored-by: Günther Foidl <gue@korporal.at>
THIRD-PARTY-NOTICES.TXT
src/libraries/Common/src/System/HexConverter.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
src/libraries/System.Runtime.Extensions/tests/System/Convert.FromHexString.cs

index feb4d4f..f60a240 100644 (file)
@@ -374,6 +374,36 @@ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
 NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
+License notice for vectorized hex parsing
+--------------------------------------------------------
+
+Copyright (c) 2022, Geoff Langdale
+Copyright (c) 2022, Wojciech Mula
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright notice,
+  this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+  notice, this list of conditions and the following disclaimer in the
+  documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
 License notice for RFC 3492
 ---------------------------
 
index 81b5697..b80e404 100644 (file)
@@ -4,10 +4,12 @@
 using System.Diagnostics;
 using System.Runtime.CompilerServices;
 #if SYSTEM_PRIVATE_CORELIB
+using System.Buffers.Binary;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.Arm;
 using System.Runtime.Intrinsics.X86;
+using System.Text.Unicode;
 #endif
 
 namespace System
@@ -223,9 +225,99 @@ namespace System
 
         public static bool TryDecodeFromUtf16(ReadOnlySpan<char> chars, Span<byte> bytes)
         {
+#if SYSTEM_PRIVATE_CORELIB
+            if (BitConverter.IsLittleEndian && (Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) &&
+                chars.Length >= Vector128<ushort>.Count * 2)
+            {
+                return TryDecodeFromUtf16_Vector128(chars, bytes);
+            }
+#endif
             return TryDecodeFromUtf16(chars, bytes, out _);
         }
 
+#if SYSTEM_PRIVATE_CORELIB
+        public static bool TryDecodeFromUtf16_Vector128(ReadOnlySpan<char> chars, Span<byte> bytes)
+        {
+            Debug.Assert(Ssse3.IsSupported || AdvSimd.Arm64.IsSupported);
+            Debug.Assert(chars.Length <= bytes.Length * 2);
+            Debug.Assert(chars.Length % 2 == 0);
+            Debug.Assert(chars.Length >= Vector128<ushort>.Count * 2);
+
+            nuint offset = 0;
+            nuint lengthSubTwoVector128 = (nuint)chars.Length - ((nuint)Vector128<ushort>.Count * 2);
+
+            ref ushort srcRef = ref Unsafe.As<char, ushort>(ref MemoryMarshal.GetReference(chars));
+            ref byte destRef = ref MemoryMarshal.GetReference(bytes);
+
+            do
+            {
+                // The algorithm is UTF8 so we'll be loading two UTF-16 vectors to narrow them into a
+                // single UTF8 ASCII vector - the implementation can be shared with UTF8 paths.
+                Vector128<ushort> vec1 = Vector128.LoadUnsafe(ref srcRef, offset);
+                Vector128<ushort> vec2 = Vector128.LoadUnsafe(ref srcRef, offset + (nuint)Vector128<ushort>.Count);
+                Vector128<byte> vec = Vector128.Narrow(vec1, vec2);
+
+                // Based on "Algorithm #3" https://github.com/WojciechMula/toys/blob/master/simd-parse-hex/geoff_algorithm.cpp
+                // by Geoff Langdale and Wojciech Mula
+                // Move digits '0'..'9' into range 0xf6..0xff.
+                Vector128<byte> t1 = vec + Vector128.Create((byte)(0xFF - '9'));
+                // And then correct the range to 0xf0..0xf9.
+                // All other bytes become less than 0xf0.
+                Vector128<byte> t2 = Vector128.SubtractSaturate(t1, Vector128.Create((byte)6));
+                // Convert into uppercase 'a'..'f' => 'A'..'F' and
+                // move hex letter 'A'..'F' into range 0..5.
+                Vector128<byte> t3 = (vec & Vector128.Create((byte)0xDF)) - Vector128.Create((byte)'A');
+                // And correct the range into 10..15.
+                // The non-hex letters bytes become greater than 0x0f.
+                Vector128<byte> t4 = Vector128.AddSaturate(t3, Vector128.Create((byte)10));
+                // Convert '0'..'9' into nibbles 0..9. Non-digit bytes become
+                // greater than 0x0f. Finally choose the result: either valid nibble (0..9/10..15)
+                // or some byte greater than 0x0f.
+                Vector128<byte> nibbles = Vector128.Min(t2 - Vector128.Create((byte)0xF0), t4);
+                // Any high bit is a sign that input is not a valid hex data
+                if (!Utf16Utility.AllCharsInVector128AreAscii(vec1 | vec2) ||
+                    Vector128.AddSaturate(nibbles, Vector128.Create((byte)(127 - 15))).ExtractMostSignificantBits() != 0)
+                {
+                    // Input is either non-ASCII or invalid hex data
+                    break;
+                }
+                Vector128<byte> output;
+                if (Ssse3.IsSupported)
+                {
+                    output = Ssse3.MultiplyAddAdjacent(nibbles,
+                        Vector128.Create((short)0x0110).AsSByte()).AsByte();
+                }
+                else
+                {
+                    // Workaround for missing MultiplyAddAdjacent on ARM
+                    Vector128<short> even = AdvSimd.Arm64.TransposeEven(nibbles, Vector128<byte>.Zero).AsInt16();
+                    Vector128<short> odd = AdvSimd.Arm64.TransposeOdd(nibbles, Vector128<byte>.Zero).AsInt16();
+                    even = AdvSimd.ShiftLeftLogical(even, 4).AsInt16();
+                    output = AdvSimd.AddSaturate(even, odd).AsByte();
+                }
+                // Accumulate output in lower INT64 half and take care about endianness
+                output = Vector128.Shuffle(output, Vector128.Create((byte)0, 2, 4, 6, 8, 10, 12, 14, 0, 0, 0, 0, 0, 0, 0, 0));
+                // Store 8 bytes in dest by given offset
+                Unsafe.WriteUnaligned(ref Unsafe.Add(ref destRef, offset / 2), output.AsUInt64().ToScalar());
+
+                offset += (nuint)Vector128<ushort>.Count * 2;
+                if (offset == (nuint)chars.Length)
+                {
+                    return true;
+                }
+                // Overlap with the current chunk for trailing elements
+                if (offset > lengthSubTwoVector128)
+                {
+                    offset = lengthSubTwoVector128;
+                }
+            }
+            while (true);
+
+            // Fall back to the scalar routine in case of invalid input.
+            return TryDecodeFromUtf16(chars.Slice((int)offset), bytes.Slice((int)(offset / 2)), out _);
+        }
+#endif
+
         public static bool TryDecodeFromUtf16(ReadOnlySpan<char> chars, Span<byte> bytes, out int charsProcessed)
         {
             Debug.Assert(chars.Length % 2 == 0, "Un-even number of characters provided");
index 69426ca..d1c7d5e 100644 (file)
@@ -3244,5 +3244,35 @@ namespace System.Runtime.Intrinsics
             }
             return AdvSimd.Arm64.ZipHigh(left, right);
         }
+
+        // TODO: Make generic versions of these public, see https://github.com/dotnet/runtime/issues/82559
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector128<byte> AddSaturate(Vector128<byte> left, Vector128<byte> right)
+        {
+            if (Sse2.IsSupported)
+            {
+                return Sse2.AddSaturate(left, right);
+            }
+            else if (!AdvSimd.Arm64.IsSupported)
+            {
+                ThrowHelper.ThrowNotSupportedException();
+            }
+            return AdvSimd.AddSaturate(left, right);
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector128<byte> SubtractSaturate(Vector128<byte> left, Vector128<byte> right)
+        {
+            if (Sse2.IsSupported)
+            {
+                return Sse2.SubtractSaturate(left, right);
+            }
+            else if (!AdvSimd.Arm64.IsSupported)
+            {
+                ThrowHelper.ThrowNotSupportedException();
+            }
+            return AdvSimd.SubtractSaturate(left, right);
+        }
     }
 }
index da7a83a..6ed2c2e 100644 (file)
@@ -102,5 +102,25 @@ namespace System.Tests
         {
             Assert.Same(Array.Empty<byte>(), Convert.FromHexString(string.Empty));
         }
+
+        [Fact]
+        public static void ToHexFromHexRoundtrip()
+        {
+            for (int i = 1; i < 50; i++)
+            {
+                byte[] data = System.Security.Cryptography.RandomNumberGenerator.GetBytes(i);
+                string hex = Convert.ToHexString(data);
+                Assert.Equal(data, Convert.FromHexString(hex.ToLowerInvariant()));
+                Assert.Equal(data, Convert.FromHexString(hex.ToUpperInvariant()));
+                string mixedCase1 = hex.Substring(0, hex.Length / 2).ToUpperInvariant() +
+                                    hex.Substring(hex.Length / 2).ToLowerInvariant();
+                string mixedCase2 = hex.Substring(0, hex.Length / 2).ToLowerInvariant() +
+                                    hex.Substring(hex.Length / 2).ToUpperInvariant();
+                Assert.Equal(data, Convert.FromHexString(mixedCase1));
+                Assert.Equal(data, Convert.FromHexString(mixedCase2));
+                Assert.Throws<FormatException>(() => Convert.FromHexString(hex + "  "));
+                Assert.Throws<FormatException>(() => Convert.FromHexString("\uAAAA" + hex));
+            }
+        }
     }
 }