Moving various Vector128/256 helper method to be implemented using other intrinsics...
authorTanner Gooding <tagoo@outlook.com>
Fri, 7 Dec 2018 23:01:23 +0000 (15:01 -0800)
committerGitHub <noreply@github.com>
Fri, 7 Dec 2018 23:01:23 +0000 (15:01 -0800)
* Updating Vector128.CreateScalar and Vector256.CreateScalar to be implemented using other intrinsics

* Updating Vector128.Equals and Vector256.Equals to be implemented using other intrinsics

* Updating Vector256.WithLower, Vector256.GetUpper, and Vector256.WithUpper to be implemented using other intrinsics

* Updating Vector128.Create(T) and Vector256.Create(T) to be implemented using other intrinsics

* Fixing the `NI_Base_Vector256_As` intrinsics to only fold the cast if AVX is supported and add a clarifying comment to the Vector128/256.Equals code

* Changing the various `*Software()` local functions in Vector128/256 to be `SoftwareFallback()`

src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128.cs
src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector128_1.cs
src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256.cs
src/System.Private.CoreLib/shared/System/Runtime/Intrinsics/Vector256_1.cs
src/jit/importer.cpp

index 445ccb9..b877bbe 100644 (file)
@@ -3,10 +3,22 @@
 // See the LICENSE file in the project root for more information.
 
 using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics.X86;
 using Internal.Runtime.CompilerServices;
 
 namespace System.Runtime.Intrinsics
 {
+    // We mark certain methods with AggressiveInlining to ensure that the JIT will
+    // inline them. The JIT would otherwise not inline the method since it, at the
+    // point it tries to determine inline profability, currently cannot determine
+    // that most of the code-paths will be optimized away as "dead code".
+    //
+    // We then manually inline cases (such as certain intrinsic code-paths) that
+    // will generate code small enough to make the AgressiveInlining profitable. The
+    // other cases (such as the software fallback) are placed in their own method.
+    // This ensures we get good codegen for the "fast-path" and allows the JIT to
+    // determine inline profitability of the other paths as it would normally.
+
     public static class Vector128
     {
         internal const int Size = 16;
@@ -14,191 +26,399 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Byte}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<byte> Create(byte value)
         {
-            var pResult = stackalloc byte[16]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<byte> result = CreateScalarUnsafe(value);                         // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector128(result);                             // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<byte>>(pResult);
+            if (Ssse3.IsSupported)
+            {
+                Vector128<byte> result = CreateScalarUnsafe(value);                         // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Ssse3.Shuffle(result, Vector128<byte>.Zero);                         // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            if (Sse2.IsSupported)
+            {
+                Vector128<byte> result = CreateScalarUnsafe(value);                         // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                result = Sse2.UnpackLow(result, result);                                    // < v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                result = Sse2.UnpackLow(result.AsUInt16(), result.AsUInt16()).AsByte();     // < v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Sse2.Shuffle(result.AsUInt32(), 0x00).AsByte();                      // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<byte> SoftwareFallback(byte x)
+            {
+                var pResult = stackalloc byte[16]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<byte>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Double}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Double}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<double> Create(double value)
         {
-            var pResult = stackalloc double[2]
+            if (Sse3.IsSupported)
             {
-                value,
-                value,
-            };
+                Vector128<double> result = CreateScalarUnsafe(value);                       // < v, ? >
+                return Sse3.MoveAndDuplicate(result);                                       // < v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<double>>(pResult);
+            if (Sse.IsSupported)
+            {
+                Vector128<double> result = CreateScalarUnsafe(value);                       // < v, ? >
+                return Sse.MoveLowToHigh(result.AsSingle(), result.AsSingle()).AsDouble();  // < v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<double> SoftwareFallback(double x)
+            {
+                var pResult = stackalloc double[2]
+                {
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<double>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int16}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<short> Create(short value)
         {
-            var pResult = stackalloc short[8]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<short> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector128(result);                             // < v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<short>>(pResult);
+            if (Sse2.IsSupported)
+            {
+                Vector128<short> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ?, ?, ?, ?, ? >
+                result = Sse2.UnpackLow(result, result);                                    // < v, v, ?, ?, ?, ?, ?, ? >
+                return Sse2.Shuffle(result.AsInt32(), 0x00).AsInt16();                      // < v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<short> SoftwareFallback(short x)
+            {
+                var pResult = stackalloc short[8]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<short>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int32}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<int> Create(int value)
         {
-            var pResult = stackalloc int[4]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<int> result = CreateScalarUnsafe(value);                          // < v, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector128(result);                             // < v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<int>>(pResult);
+            if (Sse2.IsSupported)
+            {
+                Vector128<int> result = CreateScalarUnsafe(value);                          // < v, ?, ?, ? >
+                return Sse2.Shuffle(result, 0x00);                                          // < v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<int> SoftwareFallback(int x)
+            {
+                var pResult = stackalloc int[4]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<int>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int64}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<long> Create(long value)
         {
-            var pResult = stackalloc long[2]
+            if (Sse2.X64.IsSupported)
             {
-                value,
-                value,
-            };
-
-            return Unsafe.AsRef<Vector128<long>>(pResult);
+                if (Avx2.IsSupported)
+                {
+                    Vector128<long> result = CreateScalarUnsafe(value);                     // < v, ? >
+                    return Avx2.BroadcastScalarToVector128(result);                         // < v, v >
+                }
+                else
+                {
+                    Vector128<long> result = CreateScalarUnsafe(value);                     // < v, ? >
+                    return Sse2.UnpackLow(result, result);                                  // < v, v >
+                }
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<long> SoftwareFallback(long x)
+            {
+                var pResult = stackalloc long[2]
+                {
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<long>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{SByte}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<sbyte> Create(sbyte value)
         {
-            var pResult = stackalloc sbyte[16]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<sbyte> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector128(result);                             // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<sbyte>>(pResult);
+            if (Ssse3.IsSupported)
+            {
+                Vector128<sbyte> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Ssse3.Shuffle(result, Vector128<sbyte>.Zero);                        // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            if (Sse2.IsSupported)
+            {
+                Vector128<sbyte> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                result = Sse2.UnpackLow(result, result);                                    // < v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                result = Sse2.UnpackLow(result.AsInt16(), result.AsInt16()).AsSByte();      // < v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Sse2.Shuffle(result.AsInt32(), 0x00).AsSByte();                      // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<sbyte> SoftwareFallback(sbyte x)
+            {
+                var pResult = stackalloc sbyte[16]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<sbyte>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Single}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Single}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<float> Create(float value)
         {
-            var pResult = stackalloc float[4]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<float> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector128(result);                             // < v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<float>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<float> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ? >
+                return Avx.Permute(result, 0x00);                                           // < v, v, v, v >
+            }
+
+            if (Sse.IsSupported)
+            {
+                Vector128<float> result = CreateScalarUnsafe(value);                        // < v, ?, ?, ? >
+                return Sse.Shuffle(result, result, 0x00);                                   // < v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<float> SoftwareFallback(float x)
+            {
+                var pResult = stackalloc float[4]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<float>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt16}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<ushort> Create(ushort value)
         {
-            var pResult = stackalloc ushort[8]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<ushort> result = CreateScalarUnsafe(value);                       // < v, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector128(result);                             // < v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<ushort>>(pResult);
+            if (Sse2.IsSupported)
+            {
+                Vector128<ushort> result = CreateScalarUnsafe(value);                       // < v, ?, ?, ?, ?, ?, ?, ? >
+                result = Sse2.UnpackLow(result, result);                                    // < v, v, ?, ?, ?, ?, ?, ? >
+                return Sse2.Shuffle(result.AsUInt32(), 0x00).AsUInt16();                    // < v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<ushort> SoftwareFallback(ushort x)
+            {
+                var pResult = stackalloc ushort[8]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<ushort>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt32}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<uint> Create(uint value)
         {
-            var pResult = stackalloc uint[4]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<uint> result = CreateScalarUnsafe(value);                         // < v, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector128(result);                             // < v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector128<uint>>(pResult);
+            if (Sse2.IsSupported)
+            {
+                Vector128<uint> result = CreateScalarUnsafe(value);                         // < v, ?, ?, ? >
+                return Sse2.Shuffle(result, 0x00);                                          // < v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<uint> SoftwareFallback(uint x)
+            {
+                var pResult = stackalloc uint[4]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<uint>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt64}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<ulong> Create(ulong value)
         {
-            var pResult = stackalloc ulong[2]
+            if (Sse2.X64.IsSupported)
             {
-                value,
-                value,
-            };
-
-            return Unsafe.AsRef<Vector128<ulong>>(pResult);
+                if (Avx2.IsSupported)
+                {
+                    Vector128<ulong> result = CreateScalarUnsafe(value);                    // < v, ? >
+                    return Avx2.BroadcastScalarToVector128(result);                         // < v, v >
+                }
+                else
+                {
+                    Vector128<ulong> result = CreateScalarUnsafe(value);                    // < v, ? >
+                    return Sse2.UnpackLow(result, result);                                  // < v, v >
+                }
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<ulong> SoftwareFallback(ulong x)
+            {
+                var pResult = stackalloc ulong[2]
+                {
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector128<ulong>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -604,41 +824,85 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<byte> CreateScalar(byte value)
         {
-            var result = Vector128<byte>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<byte>, byte>(ref result), value);
-            return result;
+            if (Sse2.IsSupported)
+            {
+                return Sse2.ConvertScalarToVector128UInt32(value).AsByte();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<byte> SoftwareFallback(byte x)
+            {
+                var result = Vector128<byte>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<byte>, byte>(ref result), x);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<double> CreateScalar(double value)
         {
-            var result = Vector128<double>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<double>, byte>(ref result), value);
-            return result;
+            if (Sse2.IsSupported)
+            {
+                return Sse2.MoveScalar(Vector128<double>.Zero, CreateScalarUnsafe(value));
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<double> SoftwareFallback(double x)
+            {
+                var result = Vector128<double>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<double>, byte>(ref result), x);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<short> CreateScalar(short value)
         {
-            var result = Vector128<short>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<short>, byte>(ref result), value);
-            return result;
+            if (Sse2.IsSupported)
+            {
+                return Sse2.ConvertScalarToVector128UInt32((ushort)(value)).AsInt16();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<short> SoftwareFallback(short x)
+            {
+                var result = Vector128<short>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<short>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<int> CreateScalar(int value)
         {
-            var result = Vector128<int>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<int>, byte>(ref result), value);
-            return result;
+            if (Sse2.IsSupported)
+            {
+                return Sse2.ConvertScalarToVector128Int32(value);
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<int> SoftwareFallback(int x)
+            {
+                var result = Vector128<int>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<int>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
@@ -646,63 +910,129 @@ namespace System.Runtime.Intrinsics
         /// <returns>A new <see cref="Vector128{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         public static unsafe Vector128<long> CreateScalar(long value)
         {
-            var result = Vector128<long>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<long>, byte>(ref result), value);
-            return result;
+            if (Sse2.X64.IsSupported)
+            {
+                return Sse2.X64.ConvertScalarToVector128Int64(value);
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<long> SoftwareFallback(long x)
+            {
+                var result = Vector128<long>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<long>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<sbyte> CreateScalar(sbyte value)
         {
-            var result = Vector128<sbyte>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<sbyte>, byte>(ref result), value);
-            return result;
+            if (Sse2.IsSupported)
+            {
+                // Convert to byte so that we zero-extend, rather than sign-extend
+                return Sse2.ConvertScalarToVector128UInt32((byte)(value)).AsSByte();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<sbyte> SoftwareFallback(sbyte x)
+            {
+                var result = Vector128<sbyte>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<sbyte>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector128<float> CreateScalar(float value)
         {
-            var result = Vector128<float>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<float>, byte>(ref result), value);
-            return result;
+            if (Sse.IsSupported)
+            {
+                return Sse.MoveScalar(Vector128<float>.Zero, CreateScalarUnsafe(value));
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<float> SoftwareFallback(float x)
+            {
+                var result = Vector128<float>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<float>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<ushort> CreateScalar(ushort value)
         {
-            var result = Vector128<ushort>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<ushort>, byte>(ref result), value);
-            return result;
+            if (Sse2.IsSupported)
+            {
+                return Sse2.ConvertScalarToVector128UInt32(value).AsUInt16();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<ushort> SoftwareFallback(ushort x)
+            {
+                var result = Vector128<ushort>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<ushort>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<uint> CreateScalar(uint value)
         {
-            var result = Vector128<uint>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<uint>, byte>(ref result), value);
-            return result;
+            if (Sse2.IsSupported)
+            {
+                return Sse2.ConvertScalarToVector128UInt32(value);
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<uint> SoftwareFallback(uint x)
+            {
+                var result = Vector128<uint>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<uint>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector128<ulong> CreateScalar(ulong value)
         {
-            var result = Vector128<ulong>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<ulong>, byte>(ref result), value);
-            return result;
+            if (Sse2.X64.IsSupported)
+            {
+                return Sse2.X64.ConvertScalarToVector128UInt64(value);
+            }
+
+            return SoftwareFallback(value);
+
+            Vector128<ulong> SoftwareFallback(ulong x)
+            {
+                var result = Vector128<ulong>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector128<ulong>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
index 3c15de6..56f0e9e 100644 (file)
@@ -6,11 +6,23 @@ using System.Diagnostics;
 using System.Globalization;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
 using System.Text;
 using Internal.Runtime.CompilerServices;
 
 namespace System.Runtime.Intrinsics
 {
+    // We mark certain methods with AggressiveInlining to ensure that the JIT will
+    // inline them. The JIT would otherwise not inline the method since it, at the
+    // point it tries to determine inline profability, currently cannot determine
+    // that most of the code-paths will be optimized away as "dead code".
+    //
+    // We then manually inline cases (such as certain intrinsic code-paths) that
+    // will generate code small enough to make the AgressiveInlining profitable. The
+    // other cases (such as the software fallback) are placed in their own method.
+    // This ensures we get good codegen for the "fast-path" and allows the JIT to
+    // determine inline profitability of the other paths as it would normally.
+
     [Intrinsic]
     [DebuggerDisplay("{DisplayString,nq}")]
     [DebuggerTypeProxy(typeof(Vector128DebugView<>))]
@@ -169,19 +181,50 @@ namespace System.Runtime.Intrinsics
         /// <param name="other">The <see cref="Vector128{T}" /> to compare with the current instance.</param>
         /// <returns><c>true</c> if <paramref name="other" /> is equal to the current instance; otherwise, <c>false</c>.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public bool Equals(Vector128<T> other)
         {
             ThrowIfUnsupportedType();
 
-            for (int i = 0; i < Count; i++)
+            if (Sse.IsSupported && (typeof(T) == typeof(float)))
+            {
+                Vector128<float> result = Sse.CompareEqual(AsSingle(), other.AsSingle());
+                return Sse.MoveMask(result) == 0b1111; // We have one bit per element
+            }
+
+            if (Sse2.IsSupported)
             {
-                if (!((IEquatable<T>)(GetElement(i))).Equals(other.GetElement(i)))
+                if (typeof(T) == typeof(double))
+                {
+                    Vector128<double> result = Sse2.CompareEqual(AsDouble(), other.AsDouble());
+                    return Sse2.MoveMask(result) == 0b11; // We have one bit per element
+                }
+                else
                 {
-                    return false;
+                    // Unlike float/double, there are no special values to consider
+                    // for integral types and we can just do a comparison that all
+                    // bytes are exactly the same.
+
+                    Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)));
+                    Vector128<byte> result = Sse2.CompareEqual(AsByte(), other.AsByte());
+                    return Sse2.MoveMask(result) == 0b1111_1111_1111_1111; // We have one bit per element
                 }
             }
 
-            return true;
+            return SoftwareFallback(in this, other);
+
+            bool SoftwareFallback(in Vector128<T> x, Vector128<T> y)
+            {
+                for (int i = 0; i < Count; i++)
+                {
+                    if (!((IEquatable<T>)(x.GetElement(i))).Equals(y.GetElement(i)))
+                    {
+                        return false;
+                    }
+                }
+
+                return true;
+            }
         }
 
         /// <summary>Determines whether the specified object is equal to the current instance.</summary>
index d22e1c0..34066a5 100644 (file)
@@ -3,10 +3,22 @@
 // See the LICENSE file in the project root for more information.
 
 using System.Runtime.CompilerServices;
+using System.Runtime.Intrinsics.X86;
 using Internal.Runtime.CompilerServices;
 
 namespace System.Runtime.Intrinsics
 {
+    // We mark certain methods with AggressiveInlining to ensure that the JIT will
+    // inline them. The JIT would otherwise not inline the method since it, at the
+    // point it tries to determine inline profability, currently cannot determine
+    // that most of the code-paths will be optimized away as "dead code".
+    //
+    // We then manually inline cases (such as certain intrinsic code-paths) that
+    // will generate code small enough to make the AgressiveInlining profitable. The
+    // other cases (such as the software fallback) are placed in their own method.
+    // This ensures we get good codegen for the "fast-path" and allows the JIT to
+    // determine inline profitability of the other paths as it would normally.
+
     public static class Vector256
     {
         internal const int Size = 32;
@@ -14,257 +26,441 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Byte}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<byte> Create(byte value)
         {
-            var pResult = stackalloc byte[32]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<byte> result = Vector128.CreateScalarUnsafe(value);           // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<byte>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<byte> result = Vector128.Create(value);                       // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<byte> SoftwareFallback(byte x)
+            {
+                var pResult = stackalloc byte[32]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<byte>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Double}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<double> Create(double value)
         {
-            var pResult = stackalloc double[4]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<double> result = Vector128.CreateScalarUnsafe(value);         // < v, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<double>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<double> result = Vector128.Create(value);                     // < v, v, ?, ? >
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<double> SoftwareFallback(double x)
+            {
+                var pResult = stackalloc double[4]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<double>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int16}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<short> Create(short value)
         {
-            var pResult = stackalloc short[16]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<short> result = Vector128.CreateScalarUnsafe(value);          // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<short>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<short> result = Vector128.Create(value);                      // < v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<short> SoftwareFallback(short x)
+            {
+                var pResult = stackalloc short[16]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<short>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int32}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<int> Create(int value)
         {
-            var pResult = stackalloc int[8]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<int> result = Vector128.CreateScalarUnsafe(value);            // < v, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<int>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<int> result = Vector128.Create(value);                        // < v, v, v, v, ?, ?, ?, ? >
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<int> SoftwareFallback(int x)
+            {
+                var pResult = stackalloc int[8]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<int>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int64}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<long> Create(long value)
         {
-            var pResult = stackalloc long[4]
+            if (Sse2.X64.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-            };
-
-            return Unsafe.AsRef<Vector256<long>>(pResult);
+                if (Avx2.IsSupported)
+                {
+                    Vector128<long> result = Vector128.CreateScalarUnsafe(value);           // < v, ?, ?, ? >
+                    return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v >
+                }
+                else if (Avx.IsSupported)
+                {
+                    Vector128<long> result = Vector128.Create(value);                       // < v, v, ?, ? >
+                    return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v >
+                }
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<long> SoftwareFallback(long x)
+            {
+                var pResult = stackalloc long[4]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<long>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{SByte}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<sbyte> Create(sbyte value)
         {
-            var pResult = stackalloc sbyte[32]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<sbyte> result = Vector128.CreateScalarUnsafe(value);          // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<sbyte>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<sbyte> result = Vector128.Create(value);                      // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<sbyte> SoftwareFallback(sbyte x)
+            {
+                var pResult = stackalloc sbyte[32]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<sbyte>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Single}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<float> Create(float value)
         {
-            var pResult = stackalloc float[8]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<float> result = Vector128.CreateScalarUnsafe(value);          // < v, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<float>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<float> result = Vector128.Create(value);                      // < v, v, v, v, ?, ?, ?, ? >   
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<float> SoftwareFallback(float x)
+            {
+                var pResult = stackalloc float[8]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<float>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt16}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<ushort> Create(ushort value)
         {
-            var pResult = stackalloc ushort[16]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<ushort> result = Vector128.CreateScalarUnsafe(value);         // < v, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<ushort>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<ushort> result = Vector128.Create(value);                     // < v, v, v, v, v, v, v, v, ?, ?, ?, ?, ?, ?, ?, ? >
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<ushort> SoftwareFallback(ushort x)
+            {
+                var pResult = stackalloc ushort[16]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<ushort>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt32}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<uint> Create(uint value)
         {
-            var pResult = stackalloc uint[8]
+            if (Avx2.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
+                Vector128<uint> result = Vector128.CreateScalarUnsafe(value);           // < v, ?, ?, ?, ?, ?, ?, ? >
+                return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v, v, v, v, v >
+            }
 
-            return Unsafe.AsRef<Vector256<uint>>(pResult);
+            if (Avx.IsSupported)
+            {
+                Vector128<uint> result = Vector128.Create(value);                       // < v, v, v, v, ?, ?, ?, ? >
+                return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v, v, v, v, v >
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<uint> SoftwareFallback(uint x)
+            {
+                var pResult = stackalloc uint[8]
+                {
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<uint>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt64}" /> with all elements initialized to <paramref name="value" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<ulong> Create(ulong value)
         {
-            var pResult = stackalloc ulong[4]
+            if (Sse2.X64.IsSupported)
             {
-                value,
-                value,
-                value,
-                value,
-            };
-
-            return Unsafe.AsRef<Vector256<ulong>>(pResult);
+                if (Avx2.IsSupported)
+                {
+                    Vector128<ulong> result = Vector128.CreateScalarUnsafe(value);          // < v, ?, ?, ? >
+                    return Avx2.BroadcastScalarToVector256(result);                         // < v, v, v, v >
+                }
+                else if (Avx.IsSupported)
+                {
+                    Vector128<ulong> result = Vector128.Create(value);                      // < v, v, ?, ? >
+                    return Avx.InsertVector128(result.ToVector256Unsafe(), result, 1);      // < v, v, v, v >
+                }
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<ulong> SoftwareFallback(ulong x)
+            {
+                var pResult = stackalloc ulong[4]
+            {
+                    x,
+                    x,
+                    x,
+                    x,
+                };
+
+                return Unsafe.AsRef<Vector256<ulong>>(pResult);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -802,105 +998,215 @@ namespace System.Runtime.Intrinsics
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<byte> CreateScalar(byte value)
         {
-            var result = Vector256<byte>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<byte>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<byte> SoftwareFallback(byte x)
+            {
+                var result = Vector256<byte>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<byte>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<double> CreateScalar(double value)
         {
-            var result = Vector256<double>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<double>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<double> SoftwareFallback(double x)
+            {
+                var result = Vector256<double>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<double>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<short> CreateScalar(short value)
         {
-            var result = Vector256<short>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<short>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<short> SoftwareFallback(short x)
+            {
+                var result = Vector256<short>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<short>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<int> CreateScalar(int value)
         {
-            var result = Vector256<int>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<int>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<int> SoftwareFallback(int x)
+            {
+                var result = Vector256<int>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<int>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<long> CreateScalar(long value)
         {
-            var result = Vector256<long>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<long>, byte>(ref result), value);
-            return result;
+            if (Sse2.X64.IsSupported && Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<long> SoftwareFallback(long x)
+            {
+                var result = Vector256<long>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<long>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<sbyte> CreateScalar(sbyte value)
         {
-            var result = Vector256<sbyte>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<sbyte>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<sbyte> SoftwareFallback(sbyte x)
+            {
+                var result = Vector256<sbyte>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<sbyte>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<float> CreateScalar(float value)
         {
-            var result = Vector256<float>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<float>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<float> SoftwareFallback(float x)
+            {
+                var result = Vector256<float>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<float>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<ushort> CreateScalar(ushort value)
         {
-            var result = Vector256<ushort>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<ushort>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<ushort> SoftwareFallback(ushort x)
+            {
+                var result = Vector256<ushort>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<ushort>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<uint> CreateScalar(uint value)
         {
-            var result = Vector256<uint>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<uint>, byte>(ref result), value);
-            return result;
+            if (Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<uint> SoftwareFallback(uint x)
+            {
+                var result = Vector256<uint>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<uint>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
         public static unsafe Vector256<ulong> CreateScalar(ulong value)
         {
-            var result = Vector256<ulong>.Zero;
-            Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<ulong>, byte>(ref result), value);
-            return result;
+            if (Sse2.X64.IsSupported && Avx.IsSupported)
+            {
+                return Vector128.CreateScalar(value).ToVector256();
+            }
+
+            return SoftwareFallback(value);
+
+            Vector256<ulong> SoftwareFallback(ulong x)
+            {
+                var result = Vector256<ulong>.Zero;
+                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<ulong>, byte>(ref result), value);
+                return result;
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
index dd18a4c..6c7a108 100644 (file)
@@ -6,11 +6,23 @@ using System.Diagnostics;
 using System.Globalization;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics.X86;
 using System.Text;
 using Internal.Runtime.CompilerServices;
 
 namespace System.Runtime.Intrinsics
 {
+    // We mark certain methods with AggressiveInlining to ensure that the JIT will
+    // inline them. The JIT would otherwise not inline the method since it, at the
+    // point it tries to determine inline profability, currently cannot determine
+    // that most of the code-paths will be optimized away as "dead code".
+    //
+    // We then manually inline cases (such as certain intrinsic code-paths) that
+    // will generate code small enough to make the AgressiveInlining profitable. The
+    // other cases (such as the software fallback) are placed in their own method.
+    // This ensures we get good codegen for the "fast-path" and allows the JIT to
+    // determine inline profitability of the other paths as it would normally.
+
     [Intrinsic]
     [DebuggerDisplay("{DisplayString,nq}")]
     [DebuggerTypeProxy(typeof(Vector256DebugView<>))]
@@ -173,17 +185,46 @@ namespace System.Runtime.Intrinsics
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         public bool Equals(Vector256<T> other)
         {
-            ThrowIfUnsupportedType();
-
-            for (int i = 0; i < Count; i++)
+            if (Avx.IsSupported)
             {
-                if (!((IEquatable<T>)(GetElement(i))).Equals(other.GetElement(i)))
+                if (typeof(T) == typeof(float))
+                {
+                    Vector256<float> result = Avx.Compare(AsSingle(), other.AsSingle(), FloatComparisonMode.EqualOrderedNonSignaling);
+                    return Avx.MoveMask(result) == 0b1111_1111; // We have one bit per element
+                }
+
+                if (typeof(T) == typeof(double))
                 {
-                    return false;
+                    Vector256<double> result = Avx.Compare(AsDouble(), other.AsDouble(), FloatComparisonMode.EqualOrderedNonSignaling);
+                    return Avx.MoveMask(result) == 0b1111; // We have one bit per element
                 }
             }
 
-            return true;
+            if (Avx2.IsSupported)
+            {
+                // Unlike float/double, there are no special values to consider
+                // for integral types and we can just do a comparison that all
+                // bytes are exactly the same.
+
+                Debug.Assert((typeof(T) != typeof(float)) && (typeof(T) != typeof(double)));
+                Vector256<byte> result = Avx2.CompareEqual(AsByte(), other.AsByte());
+                return Avx2.MoveMask(result) == unchecked((int)(0b1111_1111_1111_1111_1111_1111_1111_1111)); // We have one bit per element
+            }
+
+            return SoftwareFallback(in this, other);
+
+            bool SoftwareFallback(in Vector256<T> x, Vector256<T> y)
+            {
+                for (int i = 0; i < Count; i++)
+                {
+                    if (!((IEquatable<T>)(x.GetElement(i))).Equals(y.GetElement(i)))
+                    {
+                        return false;
+                    }
+                }
+
+                return true;
+            }
         }
 
         /// <summary>Determines whether the specified object is equal to the current instance.</summary>
@@ -266,41 +307,89 @@ namespace System.Runtime.Intrinsics
         /// <param name="value">The value of the lower 128-bits as a <see cref="Vector128{T}" />.</param>
         /// <returns>A new <see cref="Vector256{T}" /> with the lower 128-bits set to the specified value and the lower 128-bits set to the same value as that in the current instance.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public Vector256<T> WithLower(Vector128<T> value)
         {
             ThrowIfUnsupportedType();
             Vector128<T>.ThrowIfUnsupportedType();
 
-            Vector256<T> result = this;
-            Unsafe.As<Vector256<T>, Vector128<T>>(ref result) = value;
-            return result;
+            if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))))
+            {
+                return Avx2.InsertVector128(AsByte(), value.AsByte(), 0).As<T>();
+            }
+
+            if (Avx.IsSupported)
+            {
+                return Avx.InsertVector128(AsSingle(), value.AsSingle(), 0).As<T>();
+            }
+
+            return SoftwareFallback(in this, value);
+
+            Vector256<T> SoftwareFallback(in Vector256<T> t, Vector128<T> x)
+            {
+                Vector256<T> result = t;
+                Unsafe.As<Vector256<T>, Vector128<T>>(ref result) = x;
+                return result;
+            }
         }
 
         /// <summary>Gets the value of the upper 128-bits as a new <see cref="Vector128{T}" />.</summary>
         /// <returns>The value of the upper 128-bits as a new <see cref="Vector128{T}" />.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public Vector128<T> GetUpper()
         {
             ThrowIfUnsupportedType();
             Vector128<T>.ThrowIfUnsupportedType();
 
-            ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref Unsafe.AsRef(in this));
-            return Unsafe.Add(ref lower, 1);
+            if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))))
+            {
+                return Avx2.ExtractVector128(AsByte(), 1).As<T>();
+            }
+
+            if (Avx.IsSupported)
+            {
+                return Avx.ExtractVector128(AsSingle(), 1).As<T>();
+            }
+
+            return SoftwareFallback(in this);
+
+            Vector128<T> SoftwareFallback(in Vector256<T> t)
+            {
+                ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref Unsafe.AsRef(in t));
+                return Unsafe.Add(ref lower, 1);
+            }
         }
 
         /// <summary>Creates a new <see cref="Vector256{T}" /> with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in the current instance.</summary>
         /// <param name="value">The value of the upper 128-bits as a <see cref="Vector128{T}" />.</param>
         /// <returns>A new <see cref="Vector256{T}" /> with the upper 128-bits set to the specified value and the upper 128-bits set to the same value as that in the current instance.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public Vector256<T> WithUpper(Vector128<T> value)
         {
             ThrowIfUnsupportedType();
             Vector128<T>.ThrowIfUnsupportedType();
 
-            Vector256<T> result = this;
-            ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref result);
-            Unsafe.Add(ref lower, 1) = value;
-            return result;
+            if (Avx2.IsSupported && ((typeof(T) != typeof(float)) && (typeof(T) != typeof(double))))
+            {
+                return Avx2.InsertVector128(AsByte(), value.AsByte(), 1).As<T>();
+            }
+
+            if (Avx.IsSupported)
+            {
+                return Avx.InsertVector128(AsSingle(), value.AsSingle(), 1).As<T>();
+            }
+
+            return SoftwareFallback(in this, value);
+
+            Vector256<T> SoftwareFallback(in Vector256<T> t, Vector128<T> x)
+            {
+                Vector256<T> result = t;
+                ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref result);
+                Unsafe.Add(ref lower, 1) = x;
+                return result;
+            }
         }
 
         /// <summary>Converts the current instance to a scalar containing the value of the first element.</summary>
index 96badba..1b101d0 100644 (file)
@@ -4157,6 +4157,29 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
 
     switch (intrinsic)
     {
+#if defined(_TARGET_XARCH_)
+        case NI_Base_Vector256_As:
+        case NI_Base_Vector256_AsByte:
+        case NI_Base_Vector256_AsDouble:
+        case NI_Base_Vector256_AsInt16:
+        case NI_Base_Vector256_AsInt32:
+        case NI_Base_Vector256_AsInt64:
+        case NI_Base_Vector256_AsSByte:
+        case NI_Base_Vector256_AsSingle:
+        case NI_Base_Vector256_AsUInt16:
+        case NI_Base_Vector256_AsUInt32:
+        case NI_Base_Vector256_AsUInt64:
+        {
+            if (!compSupports(InstructionSet_AVX))
+            {
+                // We don't want to deal with TYP_SIMD32 if the compiler doesn't otherwise support the type.
+                break;
+            }
+
+            __fallthrough;
+        }
+#endif // _TARGET_XARCH_
+
 #if defined(_TARGET_ARM64_)
         case NI_Base_Vector64_AsByte:
         case NI_Base_Vector64_AsInt16:
@@ -4177,19 +4200,6 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
         case NI_Base_Vector128_AsUInt16:
         case NI_Base_Vector128_AsUInt32:
         case NI_Base_Vector128_AsUInt64:
-#if defined(_TARGET_XARCH_)
-        case NI_Base_Vector256_As:
-        case NI_Base_Vector256_AsByte:
-        case NI_Base_Vector256_AsDouble:
-        case NI_Base_Vector256_AsInt16:
-        case NI_Base_Vector256_AsInt32:
-        case NI_Base_Vector256_AsInt64:
-        case NI_Base_Vector256_AsSByte:
-        case NI_Base_Vector256_AsSingle:
-        case NI_Base_Vector256_AsUInt16:
-        case NI_Base_Vector256_AsUInt32:
-        case NI_Base_Vector256_AsUInt64:
-#endif // _TARGET_XARCH_
         {
             // We fold away the cast here, as it only exists to satisfy
             // the type system. It is safe to do this here since the retNode type