From b70829298deab33c60bb83b4cbdfa5413aef45c2 Mon Sep 17 00:00:00 2001 From: Egor Bogatov Date: Sun, 5 Feb 2023 19:55:37 +0100 Subject: [PATCH] Vectorize Guid.ToString (#81650) Co-authored-by: Miha Zupan Co-authored-by: Stephen Toub --- .../System.Private.CoreLib/src/System/Guid.cs | 67 ++++++++++++++++++++++ 1 file changed, 67 insertions(+) diff --git a/src/libraries/System.Private.CoreLib/src/System/Guid.cs b/src/libraries/System.Private.CoreLib/src/System/Guid.cs index 1ce66bb..1c16da3 100644 --- a/src/libraries/System.Private.CoreLib/src/System/Guid.cs +++ b/src/libraries/System.Private.CoreLib/src/System/Guid.cs @@ -9,6 +9,8 @@ using System.Numerics; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; using System.Runtime.Intrinsics; +using System.Runtime.Intrinsics.Arm; +using System.Runtime.Intrinsics.X86; using System.Runtime.Versioning; namespace System @@ -1203,8 +1205,73 @@ namespace System p += HexsToCharsHexOutput(p, _j, _k); *p++ = '}'; } + else if ((Ssse3.IsSupported || AdvSimd.Arm64.IsSupported) && BitConverter.IsLittleEndian) + { + // Vectorized implementation for D, N, P and B formats: + // [{|(]dddddddd[-]dddd[-]dddd[-]dddd[-]dddddddddddd[}|)] + + Vector128 srcVec = Unsafe.As>(ref Unsafe.AsRef(in this)); + + // The algorithm is simple: a single srcVec (contains the whole 16b Guid) is converted + // into nibbles and then, via hexMap, converted into a HEX representation via + // Shuffle(nibbles, srcVec). ASCII is then expanded to UTF-16. + Vector128 hexMap = Vector128.Create("0123456789abcdef"u8); + Vector128 nibbles = Vector128.ShiftRightLogical(srcVec.AsUInt64(), 4).AsByte(); + Vector128 lowNibbles = UnpackLow(nibbles, srcVec) & Vector128.Create((byte)0xF); + Vector128 highNibbles = UnpackHigh(nibbles, srcVec) & Vector128.Create((byte)0xF); + (Vector128 v0, Vector128 v1) = Vector128.Widen(Shuffle(hexMap, lowNibbles)); + (Vector128 v2, Vector128 v3) = Vector128.Widen(Shuffle(hexMap, highNibbles)); + + // Because of Guid's layout (int _a, short _b, _c, byte ...) + // we have to handle v0 and v1 separately: + v0 = Vector128.Shuffle(v0.AsInt32(), Vector128.Create(3, 2, 1, 0)).AsUInt16(); + v1 = Vector128.Shuffle(v1.AsInt32(), Vector128.Create(1, 0, 3, 2)).AsUInt16(); + + ushort* pChar = (ushort*)p; + if (dash) + { + // v0v0v0v0-v1v1-v1v1-v2v2-v2v2v3v3v3v3 + v0.Store(pChar + 0); + v1.Store(pChar + 9); + v1 = Vector128.Shuffle(v1.AsInt64(), Vector128.Create(1, 0)).AsUInt16(); + v1.Store(pChar + 14); + v2.Store(pChar + 19); + v2 = Vector128.Shuffle(v2.AsInt64(), Vector128.Create(1, 0)).AsUInt16(); + v2.Store(pChar + 24); + v3.Store(pChar + 28); + pChar[8] = pChar[13] = pChar[18] = pChar[23] = '-'; + + // We could be smarter here by doing only 5 SIMD stores + permutations + // but extra complexity is not worth it according to benchmarks + p += 36; + } + else + { + // v0v0v0v0v1v1v1v1v2v2v2v2v3v3v3v3 + v0.Store(pChar + 0); + v1.Store(pChar + 8); + v2.Store(pChar + 16); + v3.Store(pChar + 24); + p += 32; + } + + // https://github.com/dotnet/runtime/issues/81609 + // VectorTableLookup is not exactly the same but it doesn't matter for the given use case + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 Shuffle(Vector128 value, Vector128 mask) => + Ssse3.IsSupported ? Ssse3.Shuffle(value, mask) : AdvSimd.Arm64.VectorTableLookup(value, mask); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 UnpackLow(Vector128 left, Vector128 right) => + Sse2.IsSupported ? Sse2.UnpackLow(left, right) : AdvSimd.Arm64.ZipLow(left, right); + + [MethodImpl(MethodImplOptions.AggressiveInlining)] + static Vector128 UnpackHigh(Vector128 left, Vector128 right) => + Sse2.IsSupported ? Sse2.UnpackHigh(left, right) : AdvSimd.Arm64.ZipHigh(left, right); + } else { + // Non-vectorized fallback for D, N, P and B formats: // [{|(]dddddddd[-]dddd[-]dddd[-]dddd[-]dddddddddddd[}|)] p += HexsToChars(p, _a >> 24, _a >> 16); p += HexsToChars(p, _a >> 8, _a); -- 2.7.4