From 615a321f5226e0d9389741f4fcec9b6944e319cb Mon Sep 17 00:00:00 2001 From: Tanner Gooding Date: Fri, 7 Dec 2018 15:01:23 -0800 Subject: [PATCH] Moving various Vector128/256 helper method to be implemented using other intrinsics (#21432) * Updating Vector128.CreateScalar and Vector256.CreateScalar to be implemented using other intrinsics * Updating Vector128.Equals and Vector256.Equals to be implemented using other intrinsics * Updating Vector256.WithLower, Vector256.GetUpper, and Vector256.WithUpper to be implemented using other intrinsics * Updating Vector128.Create(T) and Vector256.Create(T) to be implemented using other intrinsics * Fixing the `NI_Base_Vector256_As` intrinsics to only fold the cast if AVX is supported and add a clarifying comment to the Vector128/256.Equals code * Changing the various `*Software()` local functions in Vector128/256 to be `SoftwareFallback()` --- .../shared/System/Runtime/Intrinsics/Vector128.cs | 586 +++++++++++++---- .../System/Runtime/Intrinsics/Vector128_1.cs | 51 +- .../shared/System/Runtime/Intrinsics/Vector256.cs | 694 +++++++++++++++------ .../System/Runtime/Intrinsics/Vector256_1.cs | 119 +++- src/jit/importer.cpp | 36 +- 5 files changed, 1132 insertions(+), 354 deletions(-) diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs index 445ccb9..b877bbe 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs @@ -3,10 +3,22 @@ // See the LICENSE file in the project root for more information. using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics.X86; using Internal.Runtime.CompilerServices; namespace System.Runtime.Intrinsics { + // We mark certain methods with AggressiveInlining to ensure that the JIT will + // inline them. The JIT would otherwise not inline the method since it, at the + // point it tries to determine inline profability, currently cannot determine + // that most of the code-paths will be optimized away as "dead code". + // + // We then manually inline cases (such as certain intrinsic code-paths) that + // will generate code small enough to make the AgressiveInlining profitable. The + // other cases (such as the software fallback) are placed in their own method. + // This ensures we get good codegen for the "fast-path" and allows the JIT to + // determine inline profitability of the other paths as it would normally. + public static class Vector128 { internal const int Size = 16; @@ -14,191 +26,399 @@ namespace System.Runtime.Intrinsics /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 Create(byte value) { - var pResult = stackalloc byte[16] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Ssse3.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Ssse3.Shuffle(result, Vector128.Zero); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + if (Sse2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + result = Sse2.UnpackLow(result.AsUInt16(), result.AsUInt16()).AsByte(); // < v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Sse2.Shuffle(result.AsUInt32(), 0x00).AsByte(); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(byte x) + { + var pResult = stackalloc byte[16] + { + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 Create(double value) { - var pResult = stackalloc double[2] + if (Sse3.IsSupported) { - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ? > + return Sse3.MoveAndDuplicate(result); // < v, v > + } - return Unsafe.AsRef>(pResult); + if (Sse.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ? > + return Sse.MoveLowToHigh(result.AsSingle(), result.AsSingle()).AsDouble(); // < v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(double x) + { + var pResult = stackalloc double[2] + { + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 Create(short value) { - var pResult = stackalloc short[8] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Sse2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? > + result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ? > + return Sse2.Shuffle(result.AsInt32(), 0x00).AsInt16(); // < v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(short x) + { + var pResult = stackalloc short[8] + { + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 Create(int value) { - var pResult = stackalloc int[4] + if (Avx2.IsSupported) { - value, - value, - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Sse2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Sse2.Shuffle(result, 0x00); // < v, v, v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(int x) + { + var pResult = stackalloc int[4] + { + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 Create(long value) { - var pResult = stackalloc long[2] + if (Sse2.X64.IsSupported) { - value, - value, - }; - - return Unsafe.AsRef>(pResult); + if (Avx2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v > + } + else + { + Vector128 result = CreateScalarUnsafe(value); // < v, ? > + return Sse2.UnpackLow(result, result); // < v, v > + } + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(long x) + { + var pResult = stackalloc long[2] + { + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 Create(sbyte value) { - var pResult = stackalloc sbyte[16] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Ssse3.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Ssse3.Shuffle(result, Vector128.Zero); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + if (Sse2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + result = Sse2.UnpackLow(result.AsInt16(), result.AsInt16()).AsSByte(); // < v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Sse2.Shuffle(result.AsInt32(), 0x00).AsSByte(); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(sbyte x) + { + var pResult = stackalloc sbyte[16] + { + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 Create(float value) { - var pResult = stackalloc float[4] + if (Avx2.IsSupported) { - value, - value, - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Avx.Permute(result, 0x00); // < v, v, v, v > + } + + if (Sse.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Sse.Shuffle(result, result, 0x00); // < v, v, v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(float x) + { + var pResult = stackalloc float[4] + { + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 Create(ushort value) { - var pResult = stackalloc ushort[8] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Sse2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? > + result = Sse2.UnpackLow(result, result); // < v, v, ?, ?, ?, ?, ?, ? > + return Sse2.Shuffle(result.AsUInt32(), 0x00).AsUInt16(); // < v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(ushort x) + { + var pResult = stackalloc ushort[8] + { + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 Create(uint value) { - var pResult = stackalloc uint[4] + if (Avx2.IsSupported) { - value, - value, - value, - value, - }; + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Sse2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Sse2.Shuffle(result, 0x00); // < v, v, v, v > + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(uint x) + { + var pResult = stackalloc uint[4] + { + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 Create(ulong value) { - var pResult = stackalloc ulong[2] + if (Sse2.X64.IsSupported) { - value, - value, - }; - - return Unsafe.AsRef>(pResult); + if (Avx2.IsSupported) + { + Vector128 result = CreateScalarUnsafe(value); // < v, ? > + return Avx2.BroadcastScalarToVector128(result); // < v, v > + } + else + { + Vector128 result = CreateScalarUnsafe(value); // < v, ? > + return Sse2.UnpackLow(result, result); // < v, v > + } + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(ulong x) + { + var pResult = stackalloc ulong[2] + { + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -604,41 +824,85 @@ namespace System.Runtime.Intrinsics /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 CreateScalar(byte value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.IsSupported) + { + return Sse2.ConvertScalarToVector128UInt32(value).AsByte(); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(byte x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), x); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 CreateScalar(double value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.IsSupported) + { + return Sse2.MoveScalar(Vector128.Zero, CreateScalarUnsafe(value)); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(double x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), x); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 CreateScalar(short value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.IsSupported) + { + return Sse2.ConvertScalarToVector128UInt32((ushort)(value)).AsInt16(); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(short x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 CreateScalar(int value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.IsSupported) + { + return Sse2.ConvertScalarToVector128Int32(value); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(int x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. @@ -646,63 +910,129 @@ namespace System.Runtime.Intrinsics /// A new instance with the first element initialized to and the remaining elements initialized to zero. public static unsafe Vector128 CreateScalar(long value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.X64.IsSupported) + { + return Sse2.X64.ConvertScalarToVector128Int64(value); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(long x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 CreateScalar(sbyte value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.IsSupported) + { + // Convert to byte so that we zero-extend, rather than sign-extend + return Sse2.ConvertScalarToVector128UInt32((byte)(value)).AsSByte(); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(sbyte x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector128 CreateScalar(float value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse.IsSupported) + { + return Sse.MoveScalar(Vector128.Zero, CreateScalarUnsafe(value)); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(float x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 CreateScalar(ushort value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.IsSupported) + { + return Sse2.ConvertScalarToVector128UInt32(value).AsUInt16(); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(ushort x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 CreateScalar(uint value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.IsSupported) + { + return Sse2.ConvertScalarToVector128UInt32(value); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(uint x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector128 CreateScalar(ulong value) { - var result = Vector128.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.X64.IsSupported) + { + return Sse2.X64.ConvertScalarToVector128UInt64(value); + } + + return SoftwareFallback(value); + + Vector128 SoftwareFallback(ulong x) + { + var result = Vector128.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs index 3c15de6..56f0e9e 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs @@ -6,11 +6,23 @@ using System.Diagnostics; using System.Globalization; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; using System.Text; using Internal.Runtime.CompilerServices; namespace System.Runtime.Intrinsics { + // We mark certain methods with AggressiveInlining to ensure that the JIT will + // inline them. The JIT would otherwise not inline the method since it, at the + // point it tries to determine inline profability, currently cannot determine + // that most of the code-paths will be optimized away as "dead code". + // + // We then manually inline cases (such as certain intrinsic code-paths) that + // will generate code small enough to make the AgressiveInlining profitable. The + // other cases (such as the software fallback) are placed in their own method. + // This ensures we get good codegen for the "fast-path" and allows the JIT to + // determine inline profitability of the other paths as it would normally. + [Intrinsic] [DebuggerDisplay("{DisplayString,nq}")] [DebuggerTypeProxy(typeof(Vector128DebugView<>))] @@ -169,19 +181,50 @@ namespace System.Runtime.Intrinsics /// The to compare with the current instance. /// true if is equal to the current instance; otherwise, false. /// The type of the current instance () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public bool Equals(Vector128 other) { ThrowIfUnsupportedType(); - for (int i = 0; i < Count; i++) + if (Sse.IsSupported && (typeof(T) == typeof(float))) + { + Vector128 result = Sse.CompareEqual(AsSingle(), other.AsSingle()); + return Sse.MoveMask(result) == 0b1111; // We have one bit per element + } + + if (Sse2.IsSupported) { - if (!((IEquatable)(GetElement(i))).Equals(other.GetElement(i))) + if (typeof(T) == typeof(double)) + { + Vector128 result = Sse2.CompareEqual(AsDouble(), other.AsDouble()); + return Sse2.MoveMask(result) == 0b11; // We have one bit per element + } + else { - return false; + // Unlike float/double, there are no special values to consider + // for integral types and we can just do a comparison that all + // bytes are exactly the same. + + Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))); + Vector128 result = Sse2.CompareEqual(AsByte(), other.AsByte()); + return Sse2.MoveMask(result) == 0b1111_1111_1111_1111; // We have one bit per element } } - return true; + return SoftwareFallback(in this, other); + + bool SoftwareFallback(in Vector128 x, Vector128 y) + { + for (int i = 0; i < Count; i++) + { + if (!((IEquatable)(x.GetElement(i))).Equals(y.GetElement(i))) + { + return false; + } + } + + return true; + } } /// Determines whether the specified object is equal to the current instance. diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs index d22e1c0..34066a5 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs @@ -3,10 +3,22 @@ // See the LICENSE file in the project root for more information. using System.Runtime.CompilerServices; +using System.Runtime.Intrinsics.X86; using Internal.Runtime.CompilerServices; namespace System.Runtime.Intrinsics { + // We mark certain methods with AggressiveInlining to ensure that the JIT will + // inline them. The JIT would otherwise not inline the method since it, at the + // point it tries to determine inline profability, currently cannot determine + // that most of the code-paths will be optimized away as "dead code". + // + // We then manually inline cases (such as certain intrinsic code-paths) that + // will generate code small enough to make the AgressiveInlining profitable. The + // other cases (such as the software fallback) are placed in their own method. + // This ensures we get good codegen for the "fast-path" and allows the JIT to + // determine inline profitability of the other paths as it would normally. + public static class Vector256 { internal const int Size = 32; @@ -14,257 +26,441 @@ namespace System.Runtime.Intrinsics /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 Create(byte value) { - var pResult = stackalloc byte[32] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(byte x) + { + var pResult = stackalloc byte[32] + { + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 Create(double value) { - var pResult = stackalloc double[4] + if (Avx2.IsSupported) { - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(double x) + { + var pResult = stackalloc double[4] + { + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 Create(short value) { - var pResult = stackalloc short[16] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(short x) + { + var pResult = stackalloc short[16] + { + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 Create(int value) { - var pResult = stackalloc int[8] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, v, v, ?, ?, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(int x) + { + var pResult = stackalloc int[8] + { + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 Create(long value) { - var pResult = stackalloc long[4] + if (Sse2.X64.IsSupported) { - value, - value, - value, - value, - }; - - return Unsafe.AsRef>(pResult); + if (Avx2.IsSupported) + { + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v > + } + else if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v > + } + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(long x) + { + var pResult = stackalloc long[4] + { + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 Create(sbyte value) { - var pResult = stackalloc sbyte[32] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(sbyte x) + { + var pResult = stackalloc sbyte[32] + { + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 Create(float value) { - var pResult = stackalloc float[8] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, v, v, ?, ?, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(float x) + { + var pResult = stackalloc float[8] + { + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 Create(ushort value) { - var pResult = stackalloc ushort[16] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(ushort x) + { + var pResult = stackalloc ushort[16] + { + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 Create(uint value) { - var pResult = stackalloc uint[8] + if (Avx2.IsSupported) { - value, - value, - value, - value, - value, - value, - value, - value, - }; + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ?, ?, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v, v, v, v, v > + } - return Unsafe.AsRef>(pResult); + if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, v, v, ?, ?, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v, v, v, v, v > + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(uint x) + { + var pResult = stackalloc uint[8] + { + x, + x, + x, + x, + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with all elements initialized to the specified value. /// The value that all elements will be initialized to. /// A new with all elements initialized to . + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 Create(ulong value) { - var pResult = stackalloc ulong[4] + if (Sse2.X64.IsSupported) { - value, - value, - value, - value, - }; - - return Unsafe.AsRef>(pResult); + if (Avx2.IsSupported) + { + Vector128 result = Vector128.CreateScalarUnsafe(value); // < v, ?, ?, ? > + return Avx2.BroadcastScalarToVector256(result); // < v, v, v, v > + } + else if (Avx.IsSupported) + { + Vector128 result = Vector128.Create(value); // < v, v, ?, ? > + return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1); // < v, v, v, v > + } + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(ulong x) + { + var pResult = stackalloc ulong[4] + { + x, + x, + x, + x, + }; + + return Unsafe.AsRef>(pResult); + } } /// Creates a new instance with each element initialized to the corresponding specified value. @@ -802,105 +998,215 @@ namespace System.Runtime.Intrinsics /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 CreateScalar(byte value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(byte x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 CreateScalar(double value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(double x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 CreateScalar(short value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(short x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 CreateScalar(int value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(int x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 CreateScalar(long value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.X64.IsSupported && Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(long x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 CreateScalar(sbyte value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(sbyte x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public static unsafe Vector256 CreateScalar(float value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(float x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 CreateScalar(ushort value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(ushort x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 CreateScalar(uint value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(uint x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements initialized to zero. /// The value that element 0 will be initialized to. /// A new instance with the first element initialized to and the remaining elements initialized to zero. + [MethodImpl(MethodImplOptions.AggressiveInlining)] [CLSCompliant(false)] public static unsafe Vector256 CreateScalar(ulong value) { - var result = Vector256.Zero; - Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); - return result; + if (Sse2.X64.IsSupported && Avx.IsSupported) + { + return Vector128.CreateScalar(value).ToVector256(); + } + + return SoftwareFallback(value); + + Vector256 SoftwareFallback(ulong x) + { + var result = Vector256.Zero; + Unsafe.WriteUnaligned(ref Unsafe.As, byte>(ref result), value); + return result; + } } /// Creates a new instance with the first element initialized to the specified value and the remaining elements left uninitialized. diff --git a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs index dd18a4c..6c7a108 100644 --- a/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs +++ b/src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs @@ -6,11 +6,23 @@ using System.Diagnostics; using System.Globalization; using System.Runtime.CompilerServices; using System.Runtime.InteropServices; +using System.Runtime.Intrinsics.X86; using System.Text; using Internal.Runtime.CompilerServices; namespace System.Runtime.Intrinsics { + // We mark certain methods with AggressiveInlining to ensure that the JIT will + // inline them. The JIT would otherwise not inline the method since it, at the + // point it tries to determine inline profability, currently cannot determine + // that most of the code-paths will be optimized away as "dead code". + // + // We then manually inline cases (such as certain intrinsic code-paths) that + // will generate code small enough to make the AgressiveInlining profitable. The + // other cases (such as the software fallback) are placed in their own method. + // This ensures we get good codegen for the "fast-path" and allows the JIT to + // determine inline profitability of the other paths as it would normally. + [Intrinsic] [DebuggerDisplay("{DisplayString,nq}")] [DebuggerTypeProxy(typeof(Vector256DebugView<>))] @@ -173,17 +185,46 @@ namespace System.Runtime.Intrinsics /// The type of the current instance () is not supported. public bool Equals(Vector256 other) { - ThrowIfUnsupportedType(); - - for (int i = 0; i < Count; i++) + if (Avx.IsSupported) { - if (!((IEquatable)(GetElement(i))).Equals(other.GetElement(i))) + if (typeof(T) == typeof(float)) + { + Vector256 result = Avx.Compare(AsSingle(), other.AsSingle(), FloatComparisonMode.EqualOrderedNonSignaling); + return Avx.MoveMask(result) == 0b1111_1111; // We have one bit per element + } + + if (typeof(T) == typeof(double)) { - return false; + Vector256 result = Avx.Compare(AsDouble(), other.AsDouble(), FloatComparisonMode.EqualOrderedNonSignaling); + return Avx.MoveMask(result) == 0b1111; // We have one bit per element } } - return true; + if (Avx2.IsSupported) + { + // Unlike float/double, there are no special values to consider + // for integral types and we can just do a comparison that all + // bytes are exactly the same. + + Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))); + Vector256 result = Avx2.CompareEqual(AsByte(), other.AsByte()); + return Avx2.MoveMask(result) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111)); // We have one bit per element + } + + return SoftwareFallback(in this, other); + + bool SoftwareFallback(in Vector256 x, Vector256 y) + { + for (int i = 0; i < Count; i++) + { + if (!((IEquatable)(x.GetElement(i))).Equals(y.GetElement(i))) + { + return false; + } + } + + return true; + } } /// Determines whether the specified object is equal to the current instance. @@ -266,41 +307,89 @@ namespace System.Runtime.Intrinsics /// The value of the lower 128-bits as a . /// A new with the lower 128-bits set to the specified value and the lower 128-bits set to the same value as that in the current instance. /// The type of the current instance () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector256 WithLower(Vector128 value) { ThrowIfUnsupportedType(); Vector128.ThrowIfUnsupportedType(); - Vector256 result = this; - Unsafe.As, Vector128>(ref result) = value; - return result; + if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) + { + return Avx2.InsertVector128(AsByte(), value.AsByte(), 0).As(); + } + + if (Avx.IsSupported) + { + return Avx.InsertVector128(AsSingle(), value.AsSingle(), 0).As(); + } + + return SoftwareFallback(in this, value); + + Vector256 SoftwareFallback(in Vector256 t, Vector128 x) + { + Vector256 result = t; + Unsafe.As, Vector128>(ref result) = x; + return result; + } } /// Gets the value of the upper 128-bits as a new . /// The value of the upper 128-bits as a new . /// The type of the current instance () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector128 GetUpper() { ThrowIfUnsupportedType(); Vector128.ThrowIfUnsupportedType(); - ref Vector128 lower = ref Unsafe.As, Vector128>(ref Unsafe.AsRef(in this)); - return Unsafe.Add(ref lower, 1); + if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) + { + return Avx2.ExtractVector128(AsByte(), 1).As(); + } + + if (Avx.IsSupported) + { + return Avx.ExtractVector128(AsSingle(), 1).As(); + } + + return SoftwareFallback(in this); + + Vector128 SoftwareFallback(in Vector256 t) + { + ref Vector128 lower = ref Unsafe.As, Vector128>(ref Unsafe.AsRef(in t)); + return Unsafe.Add(ref lower, 1); + } } /// Creates a new with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in the current instance. /// The value of the upper 128-bits as a . /// A new with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in the current instance. /// The type of the current instance () is not supported. + [MethodImpl(MethodImplOptions.AggressiveInlining)] public Vector256 WithUpper(Vector128 value) { ThrowIfUnsupportedType(); Vector128.ThrowIfUnsupportedType(); - Vector256 result = this; - ref Vector128 lower = ref Unsafe.As, Vector128>(ref result); - Unsafe.Add(ref lower, 1) = value; - return result; + if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)))) + { + return Avx2.InsertVector128(AsByte(), value.AsByte(), 1).As(); + } + + if (Avx.IsSupported) + { + return Avx.InsertVector128(AsSingle(), value.AsSingle(), 1).As(); + } + + return SoftwareFallback(in this, value); + + Vector256 SoftwareFallback(in Vector256 t, Vector128 x) + { + Vector256 result = t; + ref Vector128 lower = ref Unsafe.As, Vector128>(ref result); + Unsafe.Add(ref lower, 1) = x; + return result; + } } /// Converts the current instance to a scalar containing the value of the first element. diff --git a/src/jit/importer.cpp b/src/jit/importer.cpp index 96badba..1b101d0 100644 --- a/src/jit/importer.cpp +++ b/src/jit/importer.cpp @@ -4157,6 +4157,29 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, switch (intrinsic) { +#if defined(_TARGET_XARCH_) + case NI_Base_Vector256_As: + case NI_Base_Vector256_AsByte: + case NI_Base_Vector256_AsDouble: + case NI_Base_Vector256_AsInt16: + case NI_Base_Vector256_AsInt32: + case NI_Base_Vector256_AsInt64: + case NI_Base_Vector256_AsSByte: + case NI_Base_Vector256_AsSingle: + case NI_Base_Vector256_AsUInt16: + case NI_Base_Vector256_AsUInt32: + case NI_Base_Vector256_AsUInt64: + { + if (!compSupports(InstructionSet_AVX)) + { + // We don't want to deal with TYP_SIMD32 if the compiler doesn't otherwise support the type. + break; + } + + __fallthrough; + } +#endif // _TARGET_XARCH_ + #if defined(_TARGET_ARM64_) case NI_Base_Vector64_AsByte: case NI_Base_Vector64_AsInt16: @@ -4177,19 +4200,6 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic intrinsic, case NI_Base_Vector128_AsUInt16: case NI_Base_Vector128_AsUInt32: case NI_Base_Vector128_AsUInt64: -#if defined(_TARGET_XARCH_) - case NI_Base_Vector256_As: - case NI_Base_Vector256_AsByte: - case NI_Base_Vector256_AsDouble: - case NI_Base_Vector256_AsInt16: - case NI_Base_Vector256_AsInt32: - case NI_Base_Vector256_AsInt64: - case NI_Base_Vector256_AsSByte: - case NI_Base_Vector256_AsSingle: - case NI_Base_Vector256_AsUInt16: - case NI_Base_Vector256_AsUInt32: - case NI_Base_Vector256_AsUInt64: -#endif // _TARGET_XARCH_ { // We fold away the cast here, as it only exists to satisfy // the type system. It is safe to do this here since the retNode type -- 2.7.4