Updating Vector256 to have its software fallback be 2x Vector128<T> ops (#76221)
authorTanner Gooding <tagoo@outlook.com>
Mon, 3 Oct 2022 22:22:25 +0000 (15:22 -0700)
committerGitHub <noreply@github.com>
Mon, 3 Oct 2022 22:22:25 +0000 (15:22 -0700)
* Updating Vector256<T> to be implemented as 2x Vector128<T> ops

* Updating Vector256 to be implemented as 2x Vector128<T> ops

* Simplify the NRE handling

* A couple of small bug fixes

* Ensure MONO_TYPE_I and MONO_TYPE_U are handled in simd-intrinsics

* Ensure generic paths are handled for SN_CreateScalar and SN_CreateScalarUnsafe

* Ensure nint/nuint are handled in the relevant fallback paths

* Fixing braces to match mono code-styling

* Fixing the "Any" implementation to use `||`

* Fixing Narrow and Widen

* Use Unsafe.ReadUnaligned for the software fallback of Vector256.AsVector()

* Adjust an assert in fgMorphMultiregStructArg to account for certain Unsafe.As reinterprets

src/coreclr/jit/morph.cpp
src/coreclr/tools/Common/Compiler/VectorFieldLayoutAlgorithm.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
src/libraries/System.Private.CoreLib/src/System/ThrowHelper.cs
src/mono/mono/mini/simd-intrinsics.c

index cea8676..3b6f893 100644 (file)
@@ -3789,9 +3789,19 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
     ArgElem&       lastElem                 = elems[elemCount - 1];
     assert((elemCount == arg->AbiInfo.NumRegs) || arg->AbiInfo.IsSplit());
 
+    if (layout != nullptr)
+    {
+        assert(ClassLayout::AreCompatible(typGetObjLayout(arg->GetSignatureClassHandle()), layout));
+    }
+    else
+    {
+        assert(varTypeIsSIMD(argValue) && varTypeIsSIMD(arg->GetSignatureType()));
+    }
+
     if (arg->AbiInfo.IsHfaArg() && arg->AbiInfo.IsPassedInFloatRegisters())
     {
         var_types hfaType = arg->AbiInfo.GetHfaType();
+
         for (unsigned inx = 0; inx < elemCount; inx++)
         {
             elems[inx].Type   = hfaType;
@@ -3801,7 +3811,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
     else
     {
         assert(structSize <= MAX_ARG_REG_COUNT * TARGET_POINTER_SIZE);
-        assert((layout != nullptr) || varTypeIsSIMD(argValue));
 
         auto getSlotType = [layout](unsigned inx) {
             return (layout != nullptr) ? layout->GetGCPtrType(inx) : TYP_I_IMPL;
@@ -3974,14 +3983,6 @@ GenTree* Compiler::fgMorphMultiregStructArg(CallArg* arg)
             for (unsigned inx = 0; inx < elemCount; inx++)
             {
                 unsigned offset = lclOffset + elems[inx].Offset;
-#ifdef DEBUG
-                // Make sure we've set up correct GC types above.
-                unsigned  slotIdx   = offset / TARGET_POINTER_SIZE;
-                var_types argGcType = varTypeIsGC(elems[inx].Type) ? elems[inx].Type : TYP_I_IMPL;
-                var_types lclGcType = varDsc->HasGCPtr() ? varDsc->GetLayout()->GetGCPtrType(slotIdx) : TYP_I_IMPL;
-                assert(argGcType == lclGcType);
-#endif // DEBUG
-
                 GenTree* lclFld = gtNewLclFldNode(lclNum, elems[inx].Type, offset);
                 newArg->AddField(this, lclFld, offset, lclFld->TypeGet());
             }
index e2be877..ae5d8c5 100644 (file)
@@ -105,6 +105,7 @@ namespace ILCompiler
                 {
                     8 => ValueTypeShapeCharacteristics.Vector64Aggregate,
                     16 => ValueTypeShapeCharacteristics.Vector128Aggregate,
+                    32 => ValueTypeShapeCharacteristics.Vector128Aggregate,
                     _ => ValueTypeShapeCharacteristics.None
                 };
             }
index 658c48a..b605fa1 100644 (file)
@@ -60,6 +60,10 @@ namespace System.Runtime.Intrinsics
             {
                 return vector;
             }
+            else if (typeof(T) == typeof(nuint))
+            {
+                return vector;
+            }
             else if (typeof(T) == typeof(ushort))
             {
                 return vector;
@@ -709,6 +713,14 @@ namespace System.Runtime.Intrinsics
             {
                 return Create((long)(object)value).As<long, T>();
             }
+            else if (typeof(T) == typeof(nint))
+            {
+                return Create((nint)(object)value).As<nint, T>();
+            }
+            else if (typeof(T) == typeof(nuint))
+            {
+                return Create((nuint)(object)value).As<nuint, T>();
+            }
             else if (typeof(T) == typeof(sbyte))
             {
                 return Create((sbyte)(object)value).As<sbyte, T>();
@@ -1538,6 +1550,20 @@ namespace System.Runtime.Intrinsics
             }
         }
 
+        /// <summary>Creates a new <see cref="Vector128{T}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="value">The value that element 0 will be initialized to.</param>
+        /// <returns>A new <see cref="Vector128{T}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static unsafe Vector128<T> CreateScalar<T>(T value)
+            where T : struct
+        {
+            Vector128<T> result = Vector128<T>.Zero;
+            result.SetElementUnsafe(0, value);
+            return result;
+        }
+
         /// <summary>Creates a new <see cref="Vector128{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector128{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
index 9f3cc77..fbf87d3 100644 (file)
@@ -27,7 +27,7 @@ namespace System.Runtime.Intrinsics
     // value instead, thus reducing the number of locals and helping prevent us from hitting
     // the internal inlining limits of the JIT.
 
-    public static class Vector256
+    public static unsafe class Vector256
     {
         internal const int Size = 32;
 
@@ -49,72 +49,58 @@ namespace System.Runtime.Intrinsics
         }
 
         /// <summary>Computes the absolute value of each element in a vector.</summary>
-        /// <param name="vector">The vector that will have its absolute value computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="vector">The vector that will have its absolute value computed.</param>
         /// <returns>A vector whose elements are the absolute value of the elements in <paramref name="vector" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Abs<T>(Vector256<T> vector)
             where T : struct
         {
-            if (typeof(T) == typeof(byte))
-            {
-                return vector;
-            }
-            else if (typeof(T) == typeof(ushort))
-            {
-                return vector;
-            }
-            else if (typeof(T) == typeof(uint))
-            {
-                return vector;
-            }
-            else if (typeof(T) == typeof(ulong))
-            {
-                return vector;
-            }
-            else
-            {
-                return SoftwareFallback(vector);
-            }
-
-            static Vector256<T> SoftwareFallback(Vector256<T> vector)
-            {
-                Unsafe.SkipInit(out Vector256<T> result);
-
-                for (int index = 0; index < Vector256<T>.Count; index++)
-                {
-                    T value = Scalar<T>.Abs(vector.GetElementUnsafe(index));
-                    result.SetElementUnsafe(index, value);
-                }
-
-                return result;
-            }
+            return Create(
+                Vector128.Abs(vector.GetLower()),
+                Vector128.Abs(vector.GetUpper())
+            );
         }
 
         /// <summary>Adds two vectors to compute their sum.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to add with <paramref name="right" />.</param>
         /// <param name="right">The vector to add with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The sum of <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Add<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left + right;
+            where T : struct
+        {
+            return Create(
+                Vector128.Add(left.GetLower(), right.GetLower()),
+                Vector128.Add(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Computes the bitwise-and of a given vector and the ones complement of another vector.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to bitwise-and with <paramref name="right" />.</param>
         /// <param name="right">The vector to that is ones-complemented before being bitwise-and with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The bitwise-and of <paramref name="left" /> and the ones-complement of <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> AndNot<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left & ~right;
+            where T : struct
+        {
+            return Create(
+                Vector128.AndNot(left.GetLower(), right.GetLower()),
+                Vector128.AndNot(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Reinterprets a <see cref="Vector256{TFrom}" /> as a new <see cref="Vector256{TTo}" />.</summary>
-        /// <typeparam name="TFrom">The type of the input vector.</typeparam>
-        /// <typeparam name="TTo">The type of the vector <paramref name="vector" /> should be reinterpreted as.</typeparam>
+        /// <typeparam name="TFrom">The type of the elements in the input vector.</typeparam>
+        /// <typeparam name="TTo">The type of the elements in the output vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{TTo}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="TFrom" />) or the type of the target (<typeparamref name="TTo" />) is not supported.</exception>
@@ -131,124 +117,137 @@ namespace System.Runtime.Intrinsics
         }
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{Byte}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{Byte}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<byte> AsByte<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, byte>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{Double}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{Double}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<double> AsDouble<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, double>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{Int16}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{Int16}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<short> AsInt16<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, short>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{Int32}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{Int32}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<int> AsInt32<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, int>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{Int64}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{Int64}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<long> AsInt64<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, long>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{IntPtr}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{IntPtr}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<nint> AsNInt<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, nint>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{UIntPtr}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{UIntPtr}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<nuint> AsNUInt<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, nuint>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{SByte}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{SByte}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<sbyte> AsSByte<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, sbyte>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{Single}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{Single}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<float> AsSingle<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, float>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{UInt16}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{UInt16}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<ushort> AsUInt16<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, ushort>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{UInt32}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{UInt32}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<uint> AsUInt32<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, uint>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{UInt64}" />.</summary>
-        /// <typeparam name="T">The type of the input vector.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to reinterpret.</param>
         /// <returns><paramref name="vector" /> reinterpreted as a new <see cref="Vector256{UInt64}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<ulong> AsUInt64<T>(this Vector256<T> vector)
             where T : struct => vector.As<T, ulong>();
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{T}" />.</summary>
-        /// <typeparam name="T">The type of the vectors.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="value">The vector to reinterpret.</param>
         /// <returns><paramref name="value" /> reinterpreted as a new <see cref="Vector256{T}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> AsVector256<T>(this Vector<T> value)
             where T : struct
         {
@@ -261,55 +260,68 @@ namespace System.Runtime.Intrinsics
         }
 
         /// <summary>Reinterprets a <see cref="Vector256{T}" /> as a new <see cref="Vector256{T}" />.</summary>
-        /// <typeparam name="T">The type of the vectors.</typeparam>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="value">The vector to reinterpret.</param>
         /// <returns><paramref name="value" /> reinterpreted as a new <see cref="Vector256{T}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector<T> AsVector<T>(this Vector256<T> value)
             where T : struct
         {
             Debug.Assert(Vector256<T>.Count >= Vector<T>.Count);
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-            return Unsafe.As<Vector256<T>, Vector<T>>(ref value);
+
+            ref byte address = ref Unsafe.As<Vector256<T>, byte>(ref value);
+            return Unsafe.ReadUnaligned<Vector<T>>(ref address);
         }
 
         /// <summary>Computes the bitwise-and of two vectors.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to bitwise-and with <paramref name="right" />.</param>
         /// <param name="right">The vector to bitwise-and with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The bitwise-and of <paramref name="left" /> and <paramref name="right"/>.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> BitwiseAnd<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left & right;
+            where T : struct
+        {
+            return Create(
+                Vector128.BitwiseAnd(left.GetLower(), right.GetLower()),
+                Vector128.BitwiseAnd(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Computes the bitwise-or of two vectors.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to bitwise-or with <paramref name="right" />.</param>
         /// <param name="right">The vector to bitwise-or with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The bitwise-or of <paramref name="left" /> and <paramref name="right"/>.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> BitwiseOr<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left | right;
+            where T : struct
+        {
+            return Create(
+                Vector128.BitwiseOr(left.GetLower(), right.GetLower()),
+                Vector128.BitwiseOr(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Computes the ceiling of each element in a vector.</summary>
         /// <param name="vector">The vector that will have its ceiling computed.</param>
         /// <returns>A vector whose elements are the ceiling of the elements in <paramref name="vector" />.</returns>
         /// <seealso cref="MathF.Ceiling(float)" />
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<float> Ceiling(Vector256<float> vector)
         {
-            Unsafe.SkipInit(out Vector256<float> result);
-
-            for (int index = 0; index < Vector256<float>.Count; index++)
-            {
-                float value = Scalar<float>.Ceiling(vector.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Ceiling(vector.GetLower()),
+                Vector128.Ceiling(vector.GetUpper())
+            );
         }
 
         /// <summary>Computes the ceiling of each element in a vector.</summary>
@@ -317,36 +329,39 @@ namespace System.Runtime.Intrinsics
         /// <returns>A vector whose elements are the ceiling of the elements in <paramref name="vector" />.</returns>
         /// <seealso cref="Math.Ceiling(double)" />
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<double> Ceiling(Vector256<double> vector)
         {
-            Unsafe.SkipInit(out Vector256<double> result);
-
-            for (int index = 0; index < Vector256<double>.Count; index++)
-            {
-                double value = Scalar<double>.Ceiling(vector.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Ceiling(vector.GetLower()),
+                Vector128.Ceiling(vector.GetUpper())
+            );
         }
 
         /// <summary>Conditionally selects a value from two vectors on a bitwise basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="condition">The mask that is used to select a value from <paramref name="left" /> or <paramref name="right" />.</param>
         /// <param name="left">The vector that is selected when the corresponding bit in <paramref name="condition" /> is one.</param>
         /// <param name="right">The vector that is selected when the corresponding bit in <paramref name="condition" /> is zero.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose bits come from <paramref name="left" /> or <paramref name="right" /> based on the value of <paramref name="condition" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="condition" />, <paramref name="left" />, and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> ConditionalSelect<T>(Vector256<T> condition, Vector256<T> left, Vector256<T> right)
-            where T : struct => (left & condition) | (right & ~condition);
+            where T : struct
+        {
+            return Create(
+                Vector128.ConditionalSelect(condition.GetLower(), left.GetLower(), right.GetLower()),
+                Vector128.ConditionalSelect(condition.GetUpper(), left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Converts a <see cref="Vector256{Int64}" /> to a <see cref="Vector256{Double}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<double> ConvertToDouble(Vector256<long> vector)
+        public static Vector256<double> ConvertToDouble(Vector256<long> vector)
         {
             if (Avx2.IsSupported)
             {
@@ -366,30 +381,20 @@ namespace System.Runtime.Intrinsics
             }
             else
             {
-                return SoftwareFallback(vector);
-            }
-
-            static Vector256<double> SoftwareFallback(Vector256<long> vector)
-            {
-                Unsafe.SkipInit(out Vector256<double> result);
-
-                for (int i = 0; i < Vector256<double>.Count; i++)
-                {
-                    double value = vector.GetElementUnsafe(i);
-                    result.SetElementUnsafe(i, value);
-                }
-
-                return result;
+                return Create(
+                    Vector128.ConvertToDouble(vector.GetLower()),
+                    Vector128.ConvertToDouble(vector.GetUpper())
+                );
             }
         }
 
         /// <summary>Converts a <see cref="Vector256{UInt64}" /> to a <see cref="Vector256{Double}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
+        [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<double> ConvertToDouble(Vector256<ulong> vector)
+        public static Vector256<double> ConvertToDouble(Vector256<ulong> vector)
         {
             if (Avx2.IsSupported)
             {
@@ -409,20 +414,10 @@ namespace System.Runtime.Intrinsics
             }
             else
             {
-                return SoftwareFallback(vector);
-            }
-
-            static Vector256<double> SoftwareFallback(Vector256<ulong> vector)
-            {
-                Unsafe.SkipInit(out Vector256<double> result);
-
-                for (int i = 0; i < Vector256<double>.Count; i++)
-                {
-                    double value = vector.GetElementUnsafe(i);
-                    result.SetElementUnsafe(i, value);
-                }
-
-                return result;
+                return Create(
+                    Vector128.ConvertToDouble(vector.GetLower()),
+                    Vector128.ConvertToDouble(vector.GetUpper())
+                );
             }
         }
 
@@ -430,60 +425,48 @@ namespace System.Runtime.Intrinsics
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
-        public static unsafe Vector256<int> ConvertToInt32(Vector256<float> vector)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> ConvertToInt32(Vector256<float> vector)
         {
-            Unsafe.SkipInit(out Vector256<int> result);
-
-            for (int i = 0; i < Vector256<int>.Count; i++)
-            {
-                int value = (int)vector.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.ConvertToInt32(vector.GetLower()),
+                Vector128.ConvertToInt32(vector.GetUpper())
+            );
         }
 
         /// <summary>Converts a <see cref="Vector256{Double}" /> to a <see cref="Vector256{Int64}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
-        public static unsafe Vector256<long> ConvertToInt64(Vector256<double> vector)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<long> ConvertToInt64(Vector256<double> vector)
         {
-            Unsafe.SkipInit(out Vector256<long> result);
-
-            for (int i = 0; i < Vector256<long>.Count; i++)
-            {
-                long value = (long)vector.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.ConvertToInt64(vector.GetLower()),
+                Vector128.ConvertToInt64(vector.GetUpper())
+            );
         }
 
         /// <summary>Converts a <see cref="Vector256{Int32}" /> to a <see cref="Vector256{Single}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
         [Intrinsic]
-        public static unsafe Vector256<float> ConvertToSingle(Vector256<int> vector)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> ConvertToSingle(Vector256<int> vector)
         {
-            Unsafe.SkipInit(out Vector256<float> result);
-
-            for (int i = 0; i < Vector256<float>.Count; i++)
-            {
-                float value = vector.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.ConvertToSingle(vector.GetLower()),
+                Vector128.ConvertToSingle(vector.GetUpper())
+            );
         }
 
         /// <summary>Converts a <see cref="Vector256{UInt32}" /> to a <see cref="Vector256{Single}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
+        [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<float> ConvertToSingle(Vector256<uint> vector)
+        public static Vector256<float> ConvertToSingle(Vector256<uint> vector)
         {
             if (Avx2.IsSupported)
             {
@@ -517,67 +500,63 @@ namespace System.Runtime.Intrinsics
             }
             else
             {
-                return SoftwareFallback(vector);
-            }
-
-            static Vector256<float> SoftwareFallback(Vector256<uint> vector)
-            {
-                Unsafe.SkipInit(out Vector256<float> result);
-
-                for (int i = 0; i < Vector256<float>.Count; i++)
-                {
-                    float value = vector.GetElementUnsafe(i);
-                    result.SetElementUnsafe(i, value);
-                }
-
-                return result;
+                return Create(
+                    Vector128.ConvertToSingle(vector.GetLower()),
+                    Vector128.ConvertToSingle(vector.GetUpper())
+                );
             }
         }
 
         /// <summary>Converts a <see cref="Vector256{Single}" /> to a <see cref="Vector256{UInt32}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
-        public static unsafe Vector256<uint> ConvertToUInt32(Vector256<float> vector)
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> ConvertToUInt32(Vector256<float> vector)
         {
-            Unsafe.SkipInit(out Vector256<uint> result);
-
-            for (int i = 0; i < Vector256<uint>.Count; i++)
-            {
-                uint value = (uint)vector.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.ConvertToUInt32(vector.GetLower()),
+                Vector128.ConvertToUInt32(vector.GetUpper())
+            );
         }
 
         /// <summary>Converts a <see cref="Vector256{Double}" /> to a <see cref="Vector256{UInt64}" />.</summary>
         /// <param name="vector">The vector to convert.</param>
         /// <returns>The converted vector.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
-        public static unsafe Vector256<ulong> ConvertToUInt64(Vector256<double> vector)
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> ConvertToUInt64(Vector256<double> vector)
         {
-            Unsafe.SkipInit(out Vector256<ulong> result);
-
-            for (int i = 0; i < Vector256<ulong>.Count; i++)
-            {
-                ulong value = (ulong)vector.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.ConvertToUInt64(vector.GetLower()),
+                Vector128.ConvertToUInt64(vector.GetUpper())
+            );
         }
 
         /// <summary>Copies a <see cref="Vector256{T}" /> to a given array.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="vector">The vector to be copied.</param>
         /// <param name="destination">The array to which <paramref name="vector" /> is copied.</param>
-        /// <exception cref="NullReferenceException"><paramref name="destination" /> is <c>null</c>.</exception>
         /// <exception cref="ArgumentException">The length of <paramref name="destination" /> is less than <see cref="Vector256{T}.Count" />.</exception>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <exception cref="NullReferenceException"><paramref name="destination" /> is <c>null</c>.</exception>
         public static void CopyTo<T>(this Vector256<T> vector, T[] destination)
-            where T : struct => vector.CopyTo(destination, startIndex: 0);
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+
+            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
+
+            if (destination.Length < Vector256<T>.Count)
+            {
+                ThrowHelper.ThrowArgumentException_DestinationTooShort();
+            }
+
+            ref byte address = ref Unsafe.As<T, byte>(ref MemoryMarshal.GetArrayDataReference(destination));
+            Unsafe.WriteUnaligned(ref address, vector);
+        }
 
         /// <summary>Copies a <see cref="Vector256{T}" /> to a given array starting at the specified index.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -585,17 +564,15 @@ namespace System.Runtime.Intrinsics
         /// <param name="destination">The array to which <paramref name="vector" /> is copied.</param>
         /// <param name="startIndex">The starting index of <paramref name="destination" /> which <paramref name="vector" /> will be copied to.</param>
         /// <exception cref="ArgumentException">The length of <paramref name="destination" /> is less than <see cref="Vector256{T}.Count" />.</exception>
-        /// <exception cref="NullReferenceException"><paramref name="destination" /> is <c>null</c>.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="startIndex" /> is negative or greater than the length of <paramref name="destination" />.</exception>
-        public static unsafe void CopyTo<T>(this Vector256<T> vector, T[] destination, int startIndex)
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <exception cref="NullReferenceException"><paramref name="destination" /> is <c>null</c>.</exception>
+        public static void CopyTo<T>(this Vector256<T> vector, T[] destination, int startIndex)
             where T : struct
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
 
-            if (destination is null)
-            {
-                ThrowHelper.ThrowNullReferenceException();
-            }
+            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
 
             if ((uint)startIndex >= (uint)destination.Length)
             {
@@ -607,7 +584,8 @@ namespace System.Runtime.Intrinsics
                 ThrowHelper.ThrowArgumentException_DestinationTooShort();
             }
 
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref destination[startIndex]), vector);
+            ref byte address = ref Unsafe.As<T, byte>(ref MemoryMarshal.GetArrayDataReference(destination));
+            Unsafe.WriteUnaligned(ref Unsafe.Add(ref address, startIndex), vector);
         }
 
         /// <summary>Copies a <see cref="Vector256{T}" /> to a given span.</summary>
@@ -615,6 +593,7 @@ namespace System.Runtime.Intrinsics
         /// <param name="vector">The vector to be copied.</param>
         /// <param name="destination">The span to which the <paramref name="vector" /> is copied.</param>
         /// <exception cref="ArgumentException">The length of <paramref name="destination" /> is less than <see cref="Vector256{T}.Count" />.</exception>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         public static void CopyTo<T>(this Vector256<T> vector, Span<T> destination)
             where T : struct
         {
@@ -625,390 +604,168 @@ namespace System.Runtime.Intrinsics
                 ThrowHelper.ThrowArgumentException_DestinationTooShort();
             }
 
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), vector);
+            ref byte address = ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination));
+            Unsafe.WriteUnaligned(ref address, vector);
         }
 
         /// <summary>Creates a new <see cref="Vector256{T}" /> instance with all elements initialized to the specified value.</summary>
-        /// <param name="value">The value that all elements will be initialized to.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{T}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<T> Create<T>(T value)
+        public static Vector256<T> Create<T>(T value)
             where T : struct
         {
-            if (typeof(T) == typeof(byte))
-            {
-                return Create((byte)(object)value).As<byte, T>();
-            }
-            else if (typeof(T) == typeof(double))
-            {
-                return Create((double)(object)value).As<double, T>();
-            }
-            else if (typeof(T) == typeof(short))
-            {
-                return Create((short)(object)value).As<short, T>();
-            }
-            else if (typeof(T) == typeof(int))
-            {
-                return Create((int)(object)value).As<int, T>();
-            }
-            else if (typeof(T) == typeof(long))
-            {
-                return Create((long)(object)value).As<long, T>();
-            }
-            else if (typeof(T) == typeof(sbyte))
-            {
-                return Create((sbyte)(object)value).As<sbyte, T>();
-            }
-            else if (typeof(T) == typeof(float))
-            {
-                return Create((float)(object)value).As<float, T>();
-            }
-            else if (typeof(T) == typeof(ushort))
-            {
-                return Create((ushort)(object)value).As<ushort, T>();
-            }
-            else if (typeof(T) == typeof(uint))
-            {
-                return Create((uint)(object)value).As<uint, T>();
-            }
-            else if (typeof(T) == typeof(ulong))
-            {
-                return Create((ulong)(object)value).As<ulong, T>();
-            }
-            else
-            {
-                throw new NotSupportedException(SR.Arg_TypeNotSupported);
-            }
+            Vector128<T> vector = Vector128.Create(value);
+            return Create(vector, vector);
         }
 
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi8</remarks>
         /// <returns>A new <see cref="Vector256{Byte}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi8</remarks>
         [Intrinsic]
-        public static unsafe Vector256<byte> Create(byte value)
-        {
-            byte* pResult = stackalloc byte[32]
-            {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<byte>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<byte> Create(byte value) => Create<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256d _mm256_set1_pd</remarks>
         /// <returns>A new <see cref="Vector256{Double}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256d _mm256_set1_pd</remarks>
         [Intrinsic]
-        public static unsafe Vector256<double> Create(double value)
-        {
-            double* pResult = stackalloc double[4]
-            {
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<double>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<double> Create(double value) => Create<double>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi16</remarks>
         /// <returns>A new <see cref="Vector256{Int16}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi16</remarks>
         [Intrinsic]
-        public static unsafe Vector256<short> Create(short value)
-        {
-            short* pResult = stackalloc short[16]
-            {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<short>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<short> Create(short value) => Create<short>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi32</remarks>
         /// <returns>A new <see cref="Vector256{Int32}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi32</remarks>
         [Intrinsic]
-        public static unsafe Vector256<int> Create(int value)
-        {
-            int* pResult = stackalloc int[8]
-            {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<int>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> Create(int value) => Create<int>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi64x</remarks>
         /// <returns>A new <see cref="Vector256{Int64}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi64x</remarks>
         [Intrinsic]
-        public static unsafe Vector256<long> Create(long value)
-        {
-            long* pResult = stackalloc long[4]
-            {
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<long>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<long> Create(long value) => Create<long>(value);
 
         /// <summary>Creates a new <see cref="Vector256{IntPtr}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{IntPtr}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
-        public static unsafe Vector256<nint> Create(nint value)
-        {
-#if TARGET_64BIT
-            return Create((long)value).AsNInt();
-#else
-            return Create((int)value).AsNInt();
-#endif
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nint> Create(nint value) => Create<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UIntPtr}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UIntPtr}" /> with all elements initialized to <paramref name="value" />.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<nuint> Create(nuint value)
-        {
-#if TARGET_64BIT
-            return Create((ulong)value).AsNUInt();
-#else
-            return Create((uint)value).AsNUInt();
-#endif
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nuint> Create(nuint value) => Create<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi8</remarks>
         /// <returns>A new <see cref="Vector256{SByte}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi8</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<sbyte> Create(sbyte value)
-        {
-            sbyte* pResult = stackalloc sbyte[32]
-            {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<sbyte>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> Create(sbyte value) => Create<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256 _mm256_set1_ps</remarks>
         /// <returns>A new <see cref="Vector256{Single}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256 _mm256_set1_ps</remarks>
         [Intrinsic]
-        public static unsafe Vector256<float> Create(float value)
-        {
-            float* pResult = stackalloc float[8]
-            {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<float>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Create(float value) => Create<float>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi16</remarks>
         /// <returns>A new <see cref="Vector256{UInt16}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi16</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ushort> Create(ushort value)
-        {
-            ushort* pResult = stackalloc ushort[16]
-            {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<ushort>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> Create(ushort value) => Create<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi32</remarks>
         /// <returns>A new <see cref="Vector256{UInt32}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi32</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<uint> Create(uint value)
-        {
-            uint* pResult = stackalloc uint[8]
-            {
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<uint>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> Create(uint value) => Create<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with all elements initialized to the specified value.</summary>
         /// <param name="value">The value that all elements will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi64x</remarks>
         /// <returns>A new <see cref="Vector256{UInt64}" /> with all elements initialized to <paramref name="value" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_set1_epi64x</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ulong> Create(ulong value)
-        {
-            ulong* pResult = stackalloc ulong[4]
-            {
-                value,
-                value,
-                value,
-                value,
-            };
-            return Unsafe.AsRef<Vector256<ulong>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> Create(ulong value) => Create<ulong>(value);
 
         /// <summary>Creates a new <see cref="Vector256{T}" /> from a given array.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="values">The array from which the vector is created.</param>
         /// <returns>A new <see cref="Vector256{T}" /> with its elements set to the first <see cref="Vector256{T}.Count" /> elements from <paramref name="values" />.</returns>
-        /// <exception cref="NullReferenceException"><paramref name="values" /> is <c>null</c>.</exception>
         /// <exception cref="ArgumentOutOfRangeException">The length of <paramref name="values" /> is less than <see cref="Vector256{T}.Count" />.</exception>
+        /// <exception cref="NotSupportedException">The type of <paramref name="values" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <exception cref="NullReferenceException"><paramref name="values" /> is <c>null</c>.</exception>
         public static Vector256<T> Create<T>(T[] values)
-            where T : struct => Create(values, index: 0);
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+
+            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
+
+            if (values.Length < Vector256<T>.Count)
+            {
+                ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException();
+            }
+
+            ref byte address = ref Unsafe.As<T, byte>(ref MemoryMarshal.GetArrayDataReference(values));
+            return Unsafe.ReadUnaligned<Vector256<T>>(ref address);
+        }
 
         /// <summary>Creates a new <see cref="Vector256{T}" /> from a given array.</summary>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="values">The array from which the vector is created.</param>
         /// <param name="index">The index in <paramref name="values" /> at which to being reading elements.</param>
         /// <returns>A new <see cref="Vector256{T}" /> with its elements set to the first <see cref="Vector128{T}.Count" /> elements from <paramref name="values" />.</returns>
-        /// <exception cref="NullReferenceException"><paramref name="values" /> is <c>null</c>.</exception>
         /// <exception cref="ArgumentOutOfRangeException">The length of <paramref name="values" />, starting from <paramref name="index" />, is less than <see cref="Vector256{T}.Count" />.</exception>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        /// <exception cref="NotSupportedException">The type of <paramref name="values" /> (<typeparamref name="T" />) is not supported.</exception>
+        /// <exception cref="NullReferenceException"><paramref name="values" /> is <c>null</c>.</exception>
         public static Vector256<T> Create<T>(T[] values, int index)
             where T : struct
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
 
-            if (values is null)
-            {
-                ThrowHelper.ThrowNullReferenceException();
-            }
+            // We explicitly don't check for `null` because historically this has thrown `NullReferenceException` for perf reasons
 
             if ((index < 0) || ((values.Length - index) < Vector256<T>.Count))
             {
                 ThrowHelper.ThrowArgumentOutOfRange_IndexMustBeLessOrEqualException();
             }
 
-            return Unsafe.ReadUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref values[index]));
+            ref byte address = ref Unsafe.As<T, byte>(ref MemoryMarshal.GetArrayDataReference(values));
+            return Unsafe.ReadUnaligned<Vector256<T>>(ref Unsafe.Add(ref address, index));
         }
 
         /// <summary>Creates a new <see cref="Vector256{T}" /> from a given readonly span.</summary>
@@ -1016,6 +773,7 @@ namespace System.Runtime.Intrinsics
         /// <param name="values">The readonly span from which the vector is created.</param>
         /// <returns>A new <see cref="Vector256{T}" /> with its elements set to the first <see cref="Vector256{T}.Count" /> elements from <paramref name="values" />.</returns>
         /// <exception cref="ArgumentOutOfRangeException">The length of <paramref name="values" /> is less than <see cref="Vector256{T}.Count" />.</exception>
+        /// <exception cref="NotSupportedException">The type of <paramref name="values" /> (<typeparamref name="T" />) is not supported.</exception>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Create<T>(ReadOnlySpan<T> values)
             where T : struct
@@ -1027,7 +785,8 @@ namespace System.Runtime.Intrinsics
                 ThrowHelper.ThrowArgumentOutOfRangeException(ExceptionArgument.values);
             }
 
-            return Unsafe.ReadUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(values)));
+            ref byte address = ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(values));
+            return Unsafe.ReadUnaligned<Vector256<T>>(ref address);
         }
 
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1063,47 +822,16 @@ namespace System.Runtime.Intrinsics
         /// <param name="e29">The value that element 29 will be initialized to.</param>
         /// <param name="e30">The value that element 30 will be initialized to.</param>
         /// <param name="e31">The value that element 31 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi8</remarks>
         /// <returns>A new <see cref="Vector256{Byte}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi8</remarks>
         [Intrinsic]
-        public static unsafe Vector256<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15, byte e16, byte e17, byte e18, byte e19, byte e20, byte e21, byte e22, byte e23, byte e24, byte e25, byte e26, byte e27, byte e28, byte e29, byte e30, byte e31)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<byte> Create(byte e0, byte e1, byte e2, byte e3, byte e4, byte e5, byte e6, byte e7, byte e8, byte e9, byte e10, byte e11, byte e12, byte e13, byte e14, byte e15, byte e16, byte e17, byte e18, byte e19, byte e20, byte e21, byte e22, byte e23, byte e24, byte e25, byte e26, byte e27, byte e28, byte e29, byte e30, byte e31)
         {
-            byte* pResult = stackalloc byte[32]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-                e8,
-                e9,
-                e10,
-                e11,
-                e12,
-                e13,
-                e14,
-                e15,
-                e16,
-                e17,
-                e18,
-                e19,
-                e20,
-                e21,
-                e22,
-                e23,
-                e24,
-                e25,
-                e26,
-                e27,
-                e28,
-                e29,
-                e30,
-                e31,
-            };
-            return Unsafe.AsRef<Vector256<byte>>(pResult);
+            return Create(
+                Vector128.Create(e0,  e1,  e2,  e3,  e4,  e5,  e6,  e7,  e8,  e9,  e10, e11, e12, e13, e14, e15),
+                Vector128.Create(e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1111,19 +839,16 @@ namespace System.Runtime.Intrinsics
         /// <param name="e1">The value that element 1 will be initialized to.</param>
         /// <param name="e2">The value that element 2 will be initialized to.</param>
         /// <param name="e3">The value that element 3 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256d _mm256_setr_pd</remarks>
         /// <returns>A new <see cref="Vector256{Double}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256d _mm256_setr_pd</remarks>
         [Intrinsic]
-        public static unsafe Vector256<double> Create(double e0, double e1, double e2, double e3)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<double> Create(double e0, double e1, double e2, double e3)
         {
-            double* pResult = stackalloc double[4]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-            };
-            return Unsafe.AsRef<Vector256<double>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1),
+                Vector128.Create(e2, e3)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1143,31 +868,16 @@ namespace System.Runtime.Intrinsics
         /// <param name="e13">The value that element 13 will be initialized to.</param>
         /// <param name="e14">The value that element 14 will be initialized to.</param>
         /// <param name="e15">The value that element 15 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi16</remarks>
         /// <returns>A new <see cref="Vector256{Int16}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi16</remarks>
         [Intrinsic]
-        public static unsafe Vector256<short> Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7, short e8, short e9, short e10, short e11, short e12, short e13, short e14, short e15)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<short> Create(short e0, short e1, short e2, short e3, short e4, short e5, short e6, short e7, short e8, short e9, short e10, short e11, short e12, short e13, short e14, short e15)
         {
-            short* pResult = stackalloc short[16]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-                e8,
-                e9,
-                e10,
-                e11,
-                e12,
-                e13,
-                e14,
-                e15,
-            };
-            return Unsafe.AsRef<Vector256<short>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1, e2,  e3,  e4,  e5,  e6,  e7),
+                Vector128.Create(e8, e9, e10, e11, e12, e13, e14, e15)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1179,23 +889,16 @@ namespace System.Runtime.Intrinsics
         /// <param name="e5">The value that element 5 will be initialized to.</param>
         /// <param name="e6">The value that element 6 will be initialized to.</param>
         /// <param name="e7">The value that element 7 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi32</remarks>
         /// <returns>A new <see cref="Vector256{Int32}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi32</remarks>
         [Intrinsic]
-        public static unsafe Vector256<int> Create(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> Create(int e0, int e1, int e2, int e3, int e4, int e5, int e6, int e7)
         {
-            int* pResult = stackalloc int[8]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-            };
-            return Unsafe.AsRef<Vector256<int>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1, e2, e3),
+                Vector128.Create(e4, e5, e6, e7)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1203,19 +906,16 @@ namespace System.Runtime.Intrinsics
         /// <param name="e1">The value that element 1 will be initialized to.</param>
         /// <param name="e2">The value that element 2 will be initialized to.</param>
         /// <param name="e3">The value that element 3 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi64x</remarks>
         /// <returns>A new <see cref="Vector256{Int64}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi64x</remarks>
         [Intrinsic]
-        public static unsafe Vector256<long> Create(long e0, long e1, long e2, long e3)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<long> Create(long e0, long e1, long e2, long e3)
         {
-            long* pResult = stackalloc long[4]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-            };
-            return Unsafe.AsRef<Vector256<long>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1),
+                Vector128.Create(e2, e3)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1251,48 +951,17 @@ namespace System.Runtime.Intrinsics
         /// <param name="e29">The value that element 29 will be initialized to.</param>
         /// <param name="e30">The value that element 30 will be initialized to.</param>
         /// <param name="e31">The value that element 31 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi8</remarks>
         /// <returns>A new <see cref="Vector256{SByte}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi8</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15, sbyte e16, sbyte e17, sbyte e18, sbyte e19, sbyte e20, sbyte e21, sbyte e22, sbyte e23, sbyte e24, sbyte e25, sbyte e26, sbyte e27, sbyte e28, sbyte e29, sbyte e30, sbyte e31)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> Create(sbyte e0, sbyte e1, sbyte e2, sbyte e3, sbyte e4, sbyte e5, sbyte e6, sbyte e7, sbyte e8, sbyte e9, sbyte e10, sbyte e11, sbyte e12, sbyte e13, sbyte e14, sbyte e15, sbyte e16, sbyte e17, sbyte e18, sbyte e19, sbyte e20, sbyte e21, sbyte e22, sbyte e23, sbyte e24, sbyte e25, sbyte e26, sbyte e27, sbyte e28, sbyte e29, sbyte e30, sbyte e31)
         {
-            sbyte* pResult = stackalloc sbyte[32]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-                e8,
-                e9,
-                e10,
-                e11,
-                e12,
-                e13,
-                e14,
-                e15,
-                e16,
-                e17,
-                e18,
-                e19,
-                e20,
-                e21,
-                e22,
-                e23,
-                e24,
-                e25,
-                e26,
-                e27,
-                e28,
-                e29,
-                e30,
-                e31,
-            };
-            return Unsafe.AsRef<Vector256<sbyte>>(pResult);
+            return Create(
+                Vector128.Create(e0,  e1,  e2,  e3,  e4,  e5,  e6,  e7,  e8,  e9,  e10, e11, e12, e13, e14, e15),
+                Vector128.Create(e16, e17, e18, e19, e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1304,23 +973,16 @@ namespace System.Runtime.Intrinsics
         /// <param name="e5">The value that element 5 will be initialized to.</param>
         /// <param name="e6">The value that element 6 will be initialized to.</param>
         /// <param name="e7">The value that element 7 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256 _mm256_setr_ps</remarks>
         /// <returns>A new <see cref="Vector256{Single}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256 _mm256_setr_ps</remarks>
         [Intrinsic]
-        public static unsafe Vector256<float> Create(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Create(float e0, float e1, float e2, float e3, float e4, float e5, float e6, float e7)
         {
-            float* pResult = stackalloc float[8]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-            };
-            return Unsafe.AsRef<Vector256<float>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1, e2, e3),
+                Vector128.Create(e4, e5, e6, e7)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1340,32 +1002,17 @@ namespace System.Runtime.Intrinsics
         /// <param name="e13">The value that element 13 will be initialized to.</param>
         /// <param name="e14">The value that element 14 will be initialized to.</param>
         /// <param name="e15">The value that element 15 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi16</remarks>
         /// <returns>A new <see cref="Vector256{UInt16}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi16</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7, ushort e8, ushort e9, ushort e10, ushort e11, ushort e12, ushort e13, ushort e14, ushort e15)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> Create(ushort e0, ushort e1, ushort e2, ushort e3, ushort e4, ushort e5, ushort e6, ushort e7, ushort e8, ushort e9, ushort e10, ushort e11, ushort e12, ushort e13, ushort e14, ushort e15)
         {
-            ushort* pResult = stackalloc ushort[16]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-                e8,
-                e9,
-                e10,
-                e11,
-                e12,
-                e13,
-                e14,
-                e15,
-            };
-            return Unsafe.AsRef<Vector256<ushort>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1, e2,  e3,  e4,  e5,  e6,  e7),
+                Vector128.Create(e8, e9, e10, e11, e12, e13, e14, e15)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1377,24 +1024,17 @@ namespace System.Runtime.Intrinsics
         /// <param name="e5">The value that element 5 will be initialized to.</param>
         /// <param name="e6">The value that element 6 will be initialized to.</param>
         /// <param name="e7">The value that element 7 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi32</remarks>
         /// <returns>A new <see cref="Vector256{UInt32}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi32</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<uint> Create(uint e0, uint e1, uint e2, uint e3, uint e4, uint e5, uint e6, uint e7)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> Create(uint e0, uint e1, uint e2, uint e3, uint e4, uint e5, uint e6, uint e7)
         {
-            uint* pResult = stackalloc uint[8]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-                e4,
-                e5,
-                e6,
-                e7,
-            };
-            return Unsafe.AsRef<Vector256<uint>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1, e2, e3),
+                Vector128.Create(e4, e5, e6, e7)
+            );
         }
 
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with each element initialized to the corresponding specified value.</summary>
@@ -1402,781 +1042,388 @@ namespace System.Runtime.Intrinsics
         /// <param name="e1">The value that element 1 will be initialized to.</param>
         /// <param name="e2">The value that element 2 will be initialized to.</param>
         /// <param name="e3">The value that element 3 will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi64x</remarks>
         /// <returns>A new <see cref="Vector256{UInt64}" /> with each element initialized to corresponding specified value.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_epi64x</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ulong> Create(ulong e0, ulong e1, ulong e2, ulong e3)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> Create(ulong e0, ulong e1, ulong e2, ulong e3)
         {
-            ulong* pResult = stackalloc ulong[4]
-            {
-                e0,
-                e1,
-                e2,
-                e3,
-            };
-            return Unsafe.AsRef<Vector256<ulong>>(pResult);
+            return Create(
+                Vector128.Create(e0, e1),
+                Vector128.Create(e2, e3)
+            );
         }
 
-        /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance from two <see cref="Vector128{Byte}" /> instances.</summary>
+        /// <summary>Creates a new <see cref="Vector256{T}" /> instance from two <see cref="Vector128{T}" /> instances.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
-        /// <returns>A new <see cref="Vector256{Byte}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        /// <returns>A new <see cref="Vector256{T}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="lower" /> and <paramref name="upper" /> (<typeparamref name="T" />) is not supported.</exception>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<byte> Create(Vector128<byte> lower, Vector128<byte> upper)
+        internal static Vector256<T> Create<T>(Vector128<T> lower, Vector128<T> upper)
+            where T : struct
         {
             if (Avx.IsSupported)
             {
-                Vector256<byte> result = lower.ToVector256Unsafe();
+                Vector256<T> result = lower.ToVector256Unsafe();
                 return result.WithUpper(upper);
             }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<byte> SoftwareFallback(Vector128<byte> lower, Vector128<byte> upper)
+            else
             {
-                Vector256<byte> result256 = Vector256<byte>.Zero;
+                ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+                Unsafe.SkipInit(out Vector256<T> result);
 
-                ref Vector128<byte> result128 = ref Unsafe.As<Vector256<byte>, Vector128<byte>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
+                result.SetLowerUnsafe(lower);
+                result.SetUpperUnsafe(upper);
 
-                return result256;
+                return result;
             }
         }
 
+        /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance from two <see cref="Vector128{Byte}" /> instances.</summary>
+        /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
+        /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
+        /// <returns>A new <see cref="Vector256{Byte}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<byte> Create(Vector128<byte> lower, Vector128<byte> upper) => Create<byte>(lower, upper);
+
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance from two <see cref="Vector128{Double}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256d _mm256_setr_m128d (__m128d lo, __m128d hi)</remarks>
         /// <returns>A new <see cref="Vector256{Double}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256d _mm256_setr_m128d (__m128d lo, __m128d hi)</remarks>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<double> Create(Vector128<double> lower, Vector128<double> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<double> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<double> SoftwareFallback(Vector128<double> lower, Vector128<double> upper)
-            {
-                Vector256<double> result256 = Vector256<double>.Zero;
-
-                ref Vector128<double> result128 = ref Unsafe.As<Vector256<double>, Vector128<double>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
-
-                return result256;
-            }
-        }
+        public static Vector256<double> Create(Vector128<double> lower, Vector128<double> upper) => Create<double>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance from two <see cref="Vector128{Int16}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int16}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<short> Create(Vector128<short> lower, Vector128<short> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<short> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<short> SoftwareFallback(Vector128<short> lower, Vector128<short> upper)
-            {
-                Vector256<short> result256 = Vector256<short>.Zero;
-
-                ref Vector128<short> result128 = ref Unsafe.As<Vector256<short>, Vector128<short>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
-
-                return result256;
-            }
-        }
+        public static Vector256<short> Create(Vector128<short> lower, Vector128<short> upper) => Create<short>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance from two <see cref="Vector128{Int32}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_m128i (__m128i lo, __m128i hi)</remarks>
         /// <returns>A new <see cref="Vector256{Int32}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_m128i (__m128i lo, __m128i hi)</remarks>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<int> Create(Vector128<int> lower, Vector128<int> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<int> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<int> SoftwareFallback(Vector128<int> lower, Vector128<int> upper)
-            {
-                Vector256<int> result256 = Vector256<int>.Zero;
-
-                ref Vector128<int> result128 = ref Unsafe.As<Vector256<int>, Vector128<int>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
-
-                return result256;
-            }
-        }
+        public static Vector256<int> Create(Vector128<int> lower, Vector128<int> upper) => Create<int>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance from two <see cref="Vector128{Int64}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int64}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<long> Create(Vector128<long> lower, Vector128<long> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<long> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<long> SoftwareFallback(Vector128<long> lower, Vector128<long> upper)
-            {
-                Vector256<long> result256 = Vector256<long>.Zero;
+        public static Vector256<long> Create(Vector128<long> lower, Vector128<long> upper) => Create<long>(lower, upper);
 
-                ref Vector128<long> result128 = ref Unsafe.As<Vector256<long>, Vector128<long>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
+        /// <summary>Creates a new <see cref="Vector256{IntPtr}" /> instance from two <see cref="Vector128{IntPtr}" /> instances.</summary>
+        /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
+        /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
+        /// <returns>A new <see cref="Vector256{IntPtr}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<nint> Create(Vector128<nint> lower, Vector128<nint> upper) => Create<nint>(lower, upper);
 
-                return result256;
-            }
-        }
+        /// <summary>Creates a new <see cref="Vector256{UIntPtr}" /> instance from two <see cref="Vector128{UIntPtr}" /> instances.</summary>
+        /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
+        /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
+        /// <returns>A new <see cref="Vector256{UIntPtr}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<nuint> Create(Vector128<nuint> lower, Vector128<nuint> upper) => Create<nuint>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance from two <see cref="Vector128{SByte}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{SByte}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<sbyte> Create(Vector128<sbyte> lower, Vector128<sbyte> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<sbyte> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<sbyte> SoftwareFallback(Vector128<sbyte> lower, Vector128<sbyte> upper)
-            {
-                Vector256<sbyte> result256 = Vector256<sbyte>.Zero;
-
-                ref Vector128<sbyte> result128 = ref Unsafe.As<Vector256<sbyte>, Vector128<sbyte>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
-
-                return result256;
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> Create(Vector128<sbyte> lower, Vector128<sbyte> upper) => Create<sbyte>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance from two <see cref="Vector128{Single}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256 _mm256_setr_m128 (__m128 lo, __m128 hi)</remarks>
         /// <returns>A new <see cref="Vector256{Single}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
+        /// <remarks>On x86, this method corresponds to __m256 _mm256_setr_m128 (__m128 lo, __m128 hi)</remarks>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<float> Create(Vector128<float> lower, Vector128<float> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<float> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<float> SoftwareFallback(Vector128<float> lower, Vector128<float> upper)
-            {
-                Vector256<float> result256 = Vector256<float>.Zero;
-
-                ref Vector128<float> result128 = ref Unsafe.As<Vector256<float>, Vector128<float>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
-
-                return result256;
-            }
-        }
+        public static Vector256<float> Create(Vector128<float> lower, Vector128<float> upper) => Create<float>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance from two <see cref="Vector128{UInt16}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt16}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ushort> Create(Vector128<ushort> lower, Vector128<ushort> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<ushort> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<ushort> SoftwareFallback(Vector128<ushort> lower, Vector128<ushort> upper)
-            {
-                Vector256<ushort> result256 = Vector256<ushort>.Zero;
-
-                ref Vector128<ushort> result128 = ref Unsafe.As<Vector256<ushort>, Vector128<ushort>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
-
-                return result256;
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> Create(Vector128<ushort> lower, Vector128<ushort> upper) => Create<ushort>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance from two <see cref="Vector128{UInt32}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
-        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_m128i (__m128i lo, __m128i hi)</remarks>
         /// <returns>A new <see cref="Vector256{UInt32}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        /// <remarks>On x86, this method corresponds to __m256i _mm256_setr_m128i (__m128i lo, __m128i hi)</remarks>
         [CLSCompliant(false)]
-        public static unsafe Vector256<uint> Create(Vector128<uint> lower, Vector128<uint> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<uint> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<uint> SoftwareFallback(Vector128<uint> lower, Vector128<uint> upper)
-            {
-                Vector256<uint> result256 = Vector256<uint>.Zero;
-
-                ref Vector128<uint> result128 = ref Unsafe.As<Vector256<uint>, Vector128<uint>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
-
-                return result256;
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> Create(Vector128<uint> lower, Vector128<uint> upper) => Create<uint>(lower, upper);
 
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance from two <see cref="Vector128{UInt64}" /> instances.</summary>
         /// <param name="lower">The value that the lower 128-bits will be initialized to.</param>
         /// <param name="upper">The value that the upper 128-bits will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt64}" /> initialized from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ulong> Create(Vector128<ulong> lower, Vector128<ulong> upper)
-        {
-            if (Avx.IsSupported)
-            {
-                Vector256<ulong> result = lower.ToVector256Unsafe();
-                return result.WithUpper(upper);
-            }
-
-            return SoftwareFallback(lower, upper);
-
-            static Vector256<ulong> SoftwareFallback(Vector128<ulong> lower, Vector128<ulong> upper)
-            {
-                Vector256<ulong> result256 = Vector256<ulong>.Zero;
-
-                ref Vector128<ulong> result128 = ref Unsafe.As<Vector256<ulong>, Vector128<ulong>>(ref result256);
-                result128 = lower;
-                Unsafe.Add(ref result128, 1) = upper;
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> Create(Vector128<ulong> lower, Vector128<ulong> upper) => Create<ulong>(lower, upper);
 
-                return result256;
-            }
+        /// <summary>Creates a new <see cref="Vector256{T}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="value">The value that element 0 will be initialized to.</param>
+        /// <returns>A new <see cref="Vector256{T}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<T> CreateScalar<T>(T value)
+            where T : struct
+        {
+            return Vector128.CreateScalar(value).ToVector256();
         }
 
         /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<byte> CreateScalar(byte value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<byte> SoftwareFallback(byte value)
-            {
-                Vector256<byte> result = Vector256<byte>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<byte>, byte>(ref result), value);
-                return result;
-            }
-        }
+        public static Vector256<byte> CreateScalar(byte value) => CreateScalar<byte>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<double> CreateScalar(double value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<double> SoftwareFallback(double value)
-            {
-                Vector256<double> result = Vector256<double>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<double>, byte>(ref result), value);
-                return result;
-            }
-        }
+        public static Vector256<double> CreateScalar(double value) => CreateScalar<double>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<short> CreateScalar(short value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<short> SoftwareFallback(short value)
-            {
-                Vector256<short> result = Vector256<short>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<short>, byte>(ref result), value);
-                return result;
-            }
-        }
+        public static Vector256<short> CreateScalar(short value) => CreateScalar<short>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<int> CreateScalar(int value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<int> SoftwareFallback(int value)
-            {
-                Vector256<int> result = Vector256<int>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<int>, byte>(ref result), value);
-                return result;
-            }
-        }
+        public static Vector256<int> CreateScalar(int value) => CreateScalar<int>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<long> CreateScalar(long value)
-        {
-            if (Sse2.X64.IsSupported && Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<long> SoftwareFallback(long value)
-            {
-                Vector256<long> result = Vector256<long>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<long>, byte>(ref result), value);
-                return result;
-            }
-        }
+        public static Vector256<long> CreateScalar(long value) => CreateScalar<long>(value);
 
         /// <summary>Creates a new <see cref="Vector256{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<nint> CreateScalar(nint value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Create(value);
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<nint> SoftwareFallback(nint value)
-            {
-#if TARGET_64BIT
-                return CreateScalar((long)value).AsNInt();
-#else
-                return CreateScalar((int)value).AsNInt();
-#endif
-            }
-        }
+        public static Vector256<nint> CreateScalar(nint value) => CreateScalar<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements initialized to zero.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<nuint> CreateScalar(nuint value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Create(value);
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<nuint> SoftwareFallback(nuint value)
-            {
-#if TARGET_64BIT
-                return CreateScalar((ulong)value).AsNUInt();
-#else
-                return CreateScalar((uint)value).AsNUInt();
-#endif
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nuint> CreateScalar(nuint value) => CreateScalar<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<sbyte> CreateScalar(sbyte value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<sbyte> SoftwareFallback(sbyte value)
-            {
-                Vector256<sbyte> result = Vector256<sbyte>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<sbyte>, byte>(ref result), value);
-                return result;
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> CreateScalar(sbyte value) => CreateScalar<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<float> CreateScalar(float value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<float> SoftwareFallback(float value)
-            {
-                Vector256<float> result = Vector256<float>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<float>, byte>(ref result), value);
-                return result;
-            }
-        }
+        public static Vector256<float> CreateScalar(float value) => CreateScalar<float>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ushort> CreateScalar(ushort value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<ushort> SoftwareFallback(ushort value)
-            {
-                Vector256<ushort> result = Vector256<ushort>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<ushort>, byte>(ref result), value);
-                return result;
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> CreateScalar(ushort value) => CreateScalar<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<uint> CreateScalar(uint value)
-        {
-            if (Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<uint> SoftwareFallback(uint value)
-            {
-                Vector256<uint> result = Vector256<uint>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<uint>, byte>(ref result), value);
-                return result;
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> CreateScalar(uint value) => CreateScalar<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements initialized to zero.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements initialized to zero.</returns>
-        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ulong> CreateScalar(ulong value)
-        {
-            if (Sse2.X64.IsSupported && Avx.IsSupported)
-            {
-                return Vector128.CreateScalar(value).ToVector256();
-            }
-
-            return SoftwareFallback(value);
-
-            static Vector256<ulong> SoftwareFallback(ulong value)
-            {
-                Vector256<ulong> result = Vector256<ulong>.Zero;
-                Unsafe.WriteUnaligned(ref Unsafe.As<Vector256<ulong>, byte>(ref result), value);
-                return result;
-            }
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> CreateScalar(ulong value) => CreateScalar<ulong>(value);
 
-        /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
+        /// <summary>Creates a new <see cref="Vector256{T}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="value" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
-        public static unsafe Vector256<byte> CreateScalarUnsafe(byte value)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<T> CreateScalarUnsafe<T>(T value)
+            where T : struct
         {
             // This relies on us stripping the "init" flag from the ".locals"
             // declaration to let the upper bits be uninitialized.
 
-            byte* pResult = stackalloc byte[32];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<byte>>(pResult);
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+            Unsafe.SkipInit(out Vector256<T> result);
+
+            result.SetElementUnsafe(0, value);
+            return result;
         }
 
+        /// <summary>Creates a new <see cref="Vector256{Byte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
+        /// <param name="value">The value that element 0 will be initialized to.</param>
+        /// <returns>A new <see cref="Vector256{Byte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<byte> CreateScalarUnsafe(byte value) => CreateScalarUnsafe<byte>(value);
+
         /// <summary>Creates a new <see cref="Vector256{Double}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Double}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector256<double> CreateScalarUnsafe(double value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            double* pResult = stackalloc double[4];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<double>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<double> CreateScalarUnsafe(double value) => CreateScalarUnsafe<double>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector256<short> CreateScalarUnsafe(short value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            short* pResult = stackalloc short[16];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<short>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<short> CreateScalarUnsafe(short value) => CreateScalarUnsafe<short>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector256<int> CreateScalarUnsafe(int value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            int* pResult = stackalloc int[8];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<int>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> CreateScalarUnsafe(int value) => CreateScalarUnsafe<int>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Int64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Int64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector256<long> CreateScalarUnsafe(long value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            long* pResult = stackalloc long[4];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<long>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<long> CreateScalarUnsafe(long value) => CreateScalarUnsafe<long>(value);
 
         /// <summary>Creates a new <see cref="Vector256{IntPtr}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{IntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector256<nint> CreateScalarUnsafe(nint value)
-        {
-#if TARGET_64BIT
-            return CreateScalarUnsafe((long)value).AsNInt();
-#else
-            return CreateScalarUnsafe((int)value).AsNInt();
-#endif
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nint> CreateScalarUnsafe(nint value) => CreateScalarUnsafe<nint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UIntPtr}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UIntPtr}" /> instance with the first element initialized to <paramref name="value"/> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<nuint> CreateScalarUnsafe(nuint value)
-        {
-#if TARGET_64BIT
-            return CreateScalarUnsafe((ulong)value).AsNUInt();
-#else
-            return CreateScalarUnsafe((uint)value).AsNUInt();
-#endif
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nuint> CreateScalarUnsafe(nuint value) => CreateScalarUnsafe<nuint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{SByte}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{SByte}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<sbyte> CreateScalarUnsafe(sbyte value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            sbyte* pResult = stackalloc sbyte[32];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<sbyte>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> CreateScalarUnsafe(sbyte value) => CreateScalarUnsafe<sbyte>(value);
 
         /// <summary>Creates a new <see cref="Vector256{Single}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{Single}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
-        public static unsafe Vector256<float> CreateScalarUnsafe(float value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            float* pResult = stackalloc float[8];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<float>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> CreateScalarUnsafe(float value) => CreateScalarUnsafe<float>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt16}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt16}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ushort> CreateScalarUnsafe(ushort value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            ushort* pResult = stackalloc ushort[16];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<ushort>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> CreateScalarUnsafe(ushort value) => CreateScalarUnsafe<ushort>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt32}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt32}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<uint> CreateScalarUnsafe(uint value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            uint* pResult = stackalloc uint[8];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<uint>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> CreateScalarUnsafe(uint value) => CreateScalarUnsafe<uint>(value);
 
         /// <summary>Creates a new <see cref="Vector256{UInt64}" /> instance with the first element initialized to the specified value and the remaining elements left uninitialized.</summary>
         /// <param name="value">The value that element 0 will be initialized to.</param>
         /// <returns>A new <see cref="Vector256{UInt64}" /> instance with the first element initialized to <paramref name="value" /> and the remaining elements left uninitialized.</returns>
         [Intrinsic]
         [CLSCompliant(false)]
-        public static unsafe Vector256<ulong> CreateScalarUnsafe(ulong value)
-        {
-            // This relies on us stripping the "init" flag from the ".locals"
-            // declaration to let the upper bits be uninitialized.
-
-            ulong* pResult = stackalloc ulong[4];
-            pResult[0] = value;
-            return Unsafe.AsRef<Vector256<ulong>>(pResult);
-        }
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> CreateScalarUnsafe(ulong value) => CreateScalarUnsafe<ulong>(value);
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector that will be divided by <paramref name="right" />.</param>
         /// <param name="right">The vector that will divide <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The quotient of <paramref name="left" /> divided by <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Divide<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left / right;
+            where T : struct
+        {
+            return Create(
+                Vector128.Divide(left.GetLower(), right.GetLower()),
+                Vector128.Divide(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Computes the dot product of two vectors.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector that will be dotted with <paramref name="right" />.</param>
         /// <param name="right">The vector that will be dotted with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The dot product of <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T Dot<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            T result = default;
-
             // Doing this as Dot(lower) + Dot(upper) is important for floating-point determinism
             // This is because the underlying dpps instruction on x86/x64 will do this equivalently
             // and otherwise the software vs accelerated implementations may differ in returned result.
 
-            result = Scalar<T>.Add(result, Vector128.Dot(left.GetLower(), right.GetLower()));
+            T result = Vector128.Dot(left.GetLower(), right.GetLower());
             result = Scalar<T>.Add(result, Vector128.Dot(left.GetUpper(), right.GetUpper()));
-
             return result;
         }
 
         /// <summary>Compares two vectors to determine if they are equal on a per-element basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose elements are all-bits-set or zero, depending on if the corresponding elements in <paramref name="left" /> and <paramref name="right" /> were equal.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Equals<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default;
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Equals(left.GetLower(), right.GetLower()),
+                Vector128.Equals(left.GetUpper(), right.GetUpper())
+            );
         }
 
         /// <summary>Compares two vectors to determine if all elements are equal.</summary>
@@ -2184,40 +1431,44 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if all elements in <paramref name="left" /> were equal to the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool EqualsAll<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left == right;
+            where T : struct
+        {
+            return Vector128.EqualsAll(left.GetLower(), right.GetLower())
+                && Vector128.EqualsAll(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine if any elements are equal.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if any elements in <paramref name="left" /> was equal to the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool EqualsAny<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => Equals(left, right).As<T, ulong>() != Vector256<ulong>.Zero;
+            where T : struct
+        {
+            return Vector128.EqualsAny(left.GetLower(), right.GetLower())
+                || Vector128.EqualsAny(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Extracts the most significant bit from each element in a vector.</summary>
         /// <param name="vector">The vector whose elements should have their most significant bit extracted.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The packed most significant bits extracted from the elements in <paramref name="vector" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static uint ExtractMostSignificantBits<T>(this Vector256<T> vector)
             where T : struct
         {
-            uint result = 0;
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                uint value = Scalar<T>.ExtractMostSignificantBit(vector.GetElementUnsafe(index));
-                value <<= index;
-                result |= value;
-            }
-
+            uint result = vector.GetLower().ExtractMostSignificantBits();
+            result |= vector.GetUpper().ExtractMostSignificantBits() << Vector128<T>.Count;
             return result;
         }
 
@@ -2226,17 +1477,13 @@ namespace System.Runtime.Intrinsics
         /// <returns>A vector whose elements are the floor of the elements in <paramref name="vector" />.</returns>
         /// <seealso cref="MathF.Floor(float)" />
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<float> Floor(Vector256<float> vector)
         {
-            Unsafe.SkipInit(out Vector256<float> result);
-
-            for (int index = 0; index < Vector256<float>.Count; index++)
-            {
-                float value = Scalar<float>.Floor(vector.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Floor(vector.GetLower()),
+                Vector128.Floor(vector.GetUpper())
+            );
         }
 
         /// <summary>Computes the floor of each element in a vector.</summary>
@@ -2244,17 +1491,13 @@ namespace System.Runtime.Intrinsics
         /// <returns>A vector whose elements are the floor of the elements in <paramref name="vector" />.</returns>
         /// <seealso cref="Math.Floor(double)" />
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<double> Floor(Vector256<double> vector)
         {
-            Unsafe.SkipInit(out Vector256<double> result);
-
-            for (int index = 0; index < Vector256<double>.Count; index++)
-            {
-                double value = Scalar<double>.Floor(vector.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Floor(vector.GetLower()),
+                Vector128.Floor(vector.GetUpper())
+            );
         }
 
         /// <summary>Gets the element at the specified index.</summary>
@@ -2262,9 +1505,10 @@ namespace System.Runtime.Intrinsics
         /// <param name="vector">The vector to get the element from.</param>
         /// <param name="index">The index of the element to get.</param>
         /// <returns>The value of the element at <paramref name="index" />.</returns>
-        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T GetElement<T>(this Vector256<T> vector, int index)
             where T : struct
         {
@@ -2284,11 +1528,12 @@ namespace System.Runtime.Intrinsics
         /// <returns>The value of the lower 128-bits as a new <see cref="Vector128{T}" />.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector128<T> GetLower<T>(this Vector256<T> vector)
             where T : struct
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-            return Unsafe.As<Vector256<T>, Vector128<T>>(ref vector);
+            return vector._lower;
         }
 
         /// <summary>Gets the value of the upper 128-bits as a new <see cref="Vector128{T}" />.</summary>
@@ -2307,255 +1552,271 @@ namespace System.Runtime.Intrinsics
                 // All integral types generate the same instruction, so just pick one rather than handling each T separately
                 return Avx2.ExtractVector128(vector.AsByte(), 1).As<byte, T>();
             }
-
-            if (Avx.IsSupported)
+            else if (Avx.IsSupported)
             {
                 // All floating-point types generate the same instruction, so just pick one rather than handling each T separately
                 // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software
                 return Avx.ExtractVector128(vector.AsSingle(), 1).As<float, T>();
             }
-
-            return SoftwareFallback(vector);
-
-            static Vector128<T> SoftwareFallback(Vector256<T> vector)
+            else
             {
-                ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref vector);
-                return Unsafe.Add(ref lower, 1);
+                return vector._upper;
             }
         }
 
         /// <summary>Compares two vectors to determine which is greater on a per-element basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="left" />.</param>
         /// <param name="right">The vector to compare with <paramref name="right" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose elements are all-bits-set or zero, depending on if which of the corresponding elements in <paramref name="left" /> and <paramref name="right" /> were greater.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> GreaterThan<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default;
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.GreaterThan(left.GetLower(), right.GetLower()),
+                Vector128.GreaterThan(left.GetUpper(), right.GetUpper())
+            );
         }
 
         /// <summary>Compares two vectors to determine if all elements are greater.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if all elements in <paramref name="left" /> were greater than the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanAll<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => GreaterThan(left, right).As<T, ulong>() == Vector256<ulong>.AllBitsSet;
+            where T : struct
+        {
+            return Vector128.GreaterThanAll(left.GetLower(), right.GetLower())
+                && Vector128.GreaterThanAll(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine if any elements are greater.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if any elements in <paramref name="left" /> was greater than the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanAny<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => GreaterThan(left, right).As<T, ulong>() != Vector256<ulong>.Zero;
+            where T : struct
+        {
+            return Vector128.GreaterThanAny(left.GetLower(), right.GetLower())
+                || Vector128.GreaterThanAny(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine which is greater or equal on a per-element basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="left" />.</param>
         /// <param name="right">The vector to compare with <paramref name="right" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose elements are all-bits-set or zero, depending on if which of the corresponding elements in <paramref name="left" /> and <paramref name="right" /> were greater or equal.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> GreaterThanOrEqual<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.GreaterThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default;
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.GreaterThanOrEqual(left.GetLower(), right.GetLower()),
+                Vector128.GreaterThanOrEqual(left.GetUpper(), right.GetUpper())
+            );
         }
 
         /// <summary>Compares two vectors to determine if all elements are greater or equal.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if all elements in <paramref name="left" /> were greater than or equal to the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanOrEqualAll<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => GreaterThanOrEqual(left, right).As<T, ulong>() == Vector256<ulong>.AllBitsSet;
+            where T : struct
+        {
+            return Vector128.GreaterThanOrEqualAll(left.GetLower(), right.GetLower())
+                && Vector128.GreaterThanOrEqualAll(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine if any elements are greater or equal.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if any elements in <paramref name="left" /> was greater than or equal to the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool GreaterThanOrEqualAny<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => GreaterThanOrEqual(left, right).As<T, ulong>() != Vector256<ulong>.Zero;
+            where T : struct
+        {
+            return Vector128.GreaterThanOrEqualAny(left.GetLower(), right.GetLower())
+                || Vector128.GreaterThanOrEqualAny(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine which is less on a per-element basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="left" />.</param>
         /// <param name="right">The vector to compare with <paramref name="right" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose elements are all-bits-set or zero, depending on if which of the corresponding elements in <paramref name="left" /> and <paramref name="right" /> were less.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> LessThan<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default;
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.LessThan(left.GetLower(), right.GetLower()),
+                Vector128.LessThan(left.GetUpper(), right.GetUpper())
+            );
         }
 
         /// <summary>Compares two vectors to determine if all elements are less.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if all elements in <paramref name="left" /> were less than the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanAll<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => LessThan(left, right).As<T, ulong>() == Vector256<ulong>.AllBitsSet;
+            where T : struct
+        {
+            return Vector128.LessThanAll(left.GetLower(), right.GetLower())
+                && Vector128.LessThanAll(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine if any elements are less.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if any elements in <paramref name="left" /> was less than the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanAny<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => LessThan(left, right).As<T, ulong>() != Vector256<ulong>.Zero;
+            where T : struct
+        {
+            return Vector128.LessThanAny(left.GetLower(), right.GetLower())
+                || Vector128.LessThanAny(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine which is less or equal on a per-element basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="left" />.</param>
         /// <param name="right">The vector to compare with <paramref name="right" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose elements are all-bits-set or zero, depending on if which of the corresponding elements in <paramref name="left" /> and <paramref name="right" /> were less or equal.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> LessThanOrEqual<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.LessThanOrEqual(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? Scalar<T>.AllBitsSet : default;
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.LessThanOrEqual(left.GetLower(), right.GetLower()),
+                Vector128.LessThanOrEqual(left.GetUpper(), right.GetUpper())
+            );
         }
 
         /// <summary>Compares two vectors to determine if all elements are less or equal.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if all elements in <paramref name="left" /> were less than or equal to the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanOrEqualAll<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => LessThanOrEqual(left, right).As<T, ulong>() == Vector256<ulong>.AllBitsSet;
+            where T : struct
+        {
+            return Vector128.LessThanOrEqualAll(left.GetLower(), right.GetLower())
+                && Vector128.LessThanOrEqualAll(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Compares two vectors to determine if any elements are less or equal.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns><c>true</c> if any elements in <paramref name="left" /> was less than or equal to the corresponding element in <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool LessThanOrEqualAny<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => LessThanOrEqual(left, right).As<T, ulong>() != Vector256<ulong>.Zero;
+            where T : struct
+        {
+            return Vector128.LessThanOrEqualAny(left.GetLower(), right.GetLower())
+                || Vector128.LessThanOrEqualAny(left.GetUpper(), right.GetUpper());
+        }
 
         /// <summary>Loads a vector from the given source.</summary>
-        /// <param name="source">The source from which the vector will be loaded.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="source">The source from which the vector will be loaded.</param>
         /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<T> Load<T>(T* source)
-            where T : unmanaged
-        {
-            return *(Vector256<T>*)source;
-        }
+        public static Vector256<T> Load<T>(T* source)
+            where T : unmanaged => LoadUnsafe(ref *source);
 
         /// <summary>Loads a vector from the given aligned source.</summary>
-        /// <param name="source">The aligned source from which the vector will be loaded.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
         /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<T> LoadAligned<T>(T* source)
+        public static Vector256<T> LoadAligned<T>(T* source)
             where T : unmanaged
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
 
-            if (((nuint)source % Alignment) != 0)
+            if (((nuint)(source) % Alignment) != 0)
             {
-                throw new AccessViolationException();
+                ThrowHelper.ThrowAccessViolationException();
             }
 
-            return *(Vector256<T>*)source;
+            return *(Vector256<T>*)(source);
         }
 
         /// <summary>Loads a vector from the given aligned source.</summary>
-        /// <param name="source">The aligned source from which the vector will be loaded.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
         /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         /// <remarks>This method may bypass the cache on certain platforms.</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe Vector256<T> LoadAlignedNonTemporal<T>(T* source)
-            where T : unmanaged
-        {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-
-            if (((nuint)source % Alignment) != 0)
-            {
-                throw new AccessViolationException();
-            }
-
-            return *(Vector256<T>*)source;
-        }
+        public static Vector256<T> LoadAlignedNonTemporal<T>(T* source)
+            where T : unmanaged => LoadAligned(source);
 
         /// <summary>Loads a vector from the given source.</summary>
-        /// <param name="source">The source from which the vector will be loaded.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="source">The source from which the vector will be loaded.</param>
         /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> LoadUnsafe<T>(ref T source)
             where T : struct
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-            return Unsafe.ReadUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref source));
+            ref byte address = ref Unsafe.As<T, byte>(ref source);
+            return Unsafe.ReadUnaligned<Vector256<T>>(ref address);
         }
 
         /// <summary>Loads a vector from the given source and element offset.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The source to which <paramref name="elementOffset" /> will be added before loading the vector.</param>
         /// <param name="elementOffset">The element offset from <paramref name="source" /> from which the vector will be loaded.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The vector loaded from <paramref name="source" /> plus <paramref name="elementOffset" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -2568,122 +1829,117 @@ namespace System.Runtime.Intrinsics
         }
 
         /// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose elements are the maximum of the corresponding elements in <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Max<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.GreaterThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? left.GetElementUnsafe(index) : right.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Max(left.GetLower(), right.GetLower()),
+                Vector128.Max(left.GetUpper(), right.GetUpper())
+            );
         }
 
         /// <summary>Computes the minimum of two vectors on a per-element basis.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>A vector whose elements are the minimum of the corresponding elements in <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Min<T>(Vector256<T> left, Vector256<T> right)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.LessThan(left.GetElementUnsafe(index), right.GetElementUnsafe(index)) ? left.GetElementUnsafe(index) : right.GetElementUnsafe(index);
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Min(left.GetLower(), right.GetLower()),
+                Vector128.Min(left.GetUpper(), right.GetUpper())
+            );
         }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to multiply with <paramref name="right" />.</param>
         /// <param name="right">The vector to multiply with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The element-wise product of <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Multiply<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left * right;
+            where T : struct
+        {
+            return Create(
+                Vector128.Multiply(left.GetLower(), right.GetLower()),
+                Vector128.Multiply(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to multiply with <paramref name="right" />.</param>
         /// <param name="right">The scalar to multiply with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Multiply<T>(Vector256<T> left, T right)
-            where T : struct => left * right;
+            where T : struct
+        {
+            return Create(
+                Vector128.Multiply(left.GetLower(), right),
+                Vector128.Multiply(left.GetUpper(), right)
+            );
+        }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The scalar to multiply with <paramref name="right" />.</param>
         /// <param name="right">The vector to multiply with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Multiply<T>(T left, Vector256<T> right)
-            where T : struct => left * right;
+            where T : struct
+        {
+            return Create(
+                Vector128.Multiply(left, right.GetLower()),
+                Vector128.Multiply(left, right.GetUpper())
+            );
+        }
 
         /// <summary>Narrows two <see cref="Vector256{Double}"/> instances into one <see cref="Vector256{Single}" />.</summary>
         /// <param name="lower">The vector that will be narrowed to the lower half of the result vector.</param>
         /// <param name="upper">The vector that will be narrowed to the upper half of the result vector.</param>
         /// <returns>A <see cref="Vector256{Single}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
-        public static unsafe Vector256<float> Narrow(Vector256<double> lower, Vector256<double> upper)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<float> Narrow(Vector256<double> lower, Vector256<double> upper)
         {
-            Unsafe.SkipInit(out Vector256<float> result);
-
-            for (int i = 0; i < Vector256<double>.Count; i++)
-            {
-                float value = (float)lower.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            for (int i = Vector256<double>.Count; i < Vector256<float>.Count; i++)
-            {
-                float value = (float)upper.GetElementUnsafe(i - Vector256<double>.Count);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Narrow(lower.GetLower(), lower.GetUpper()),
+                Vector128.Narrow(upper.GetLower(), upper.GetUpper())
+            );
         }
 
         /// <summary>Narrows two <see cref="Vector256{Int16}"/> instances into one <see cref="Vector256{SByte}" />.</summary>
         /// <param name="lower">The vector that will be narrowed to the lower half of the result vector.</param>
         /// <param name="upper">The vector that will be narrowed to the upper half of the result vector.</param>
         /// <returns>A <see cref="Vector256{SByte}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
-        public static unsafe Vector256<sbyte> Narrow(Vector256<short> lower, Vector256<short> upper)
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> Narrow(Vector256<short> lower, Vector256<short> upper)
         {
-            Unsafe.SkipInit(out Vector256<sbyte> result);
-
-            for (int i = 0; i < Vector256<short>.Count; i++)
-            {
-                sbyte value = (sbyte)lower.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            for (int i = Vector256<short>.Count; i < Vector256<sbyte>.Count; i++)
-            {
-                sbyte value = (sbyte)upper.GetElementUnsafe(i - Vector256<short>.Count);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Narrow(lower.GetLower(), lower.GetUpper()),
+                Vector128.Narrow(upper.GetLower(), upper.GetUpper())
+            );
         }
 
         /// <summary>Narrows two <see cref="Vector256{Int32}"/> instances into one <see cref="Vector256{Int16}" />.</summary>
@@ -2691,23 +1947,13 @@ namespace System.Runtime.Intrinsics
         /// <param name="upper">The vector that will be narrowed to the upper half of the result vector.</param>
         /// <returns>A <see cref="Vector256{Int16}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
-        public static unsafe Vector256<short> Narrow(Vector256<int> lower, Vector256<int> upper)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<short> Narrow(Vector256<int> lower, Vector256<int> upper)
         {
-            Unsafe.SkipInit(out Vector256<short> result);
-
-            for (int i = 0; i < Vector256<int>.Count; i++)
-            {
-                short value = (short)lower.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            for (int i = Vector256<int>.Count; i < Vector256<short>.Count; i++)
-            {
-                short value = (short)upper.GetElementUnsafe(i - Vector256<int>.Count);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Narrow(lower.GetLower(), lower.GetUpper()),
+                Vector128.Narrow(upper.GetLower(), upper.GetUpper())
+            );
         }
 
         /// <summary>Narrows two <see cref="Vector256{Int64}"/> instances into one <see cref="Vector256{Int32}" />.</summary>
@@ -2715,117 +1961,91 @@ namespace System.Runtime.Intrinsics
         /// <param name="upper">The vector that will be narrowed to the upper half of the result vector.</param>
         /// <returns>A <see cref="Vector256{Int32}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
         [Intrinsic]
-        public static unsafe Vector256<int> Narrow(Vector256<long> lower, Vector256<long> upper)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> Narrow(Vector256<long> lower, Vector256<long> upper)
         {
-            Unsafe.SkipInit(out Vector256<int> result);
-
-            for (int i = 0; i < Vector256<long>.Count; i++)
-            {
-                int value = (int)lower.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            for (int i = Vector256<long>.Count; i < Vector256<int>.Count; i++)
-            {
-                int value = (int)upper.GetElementUnsafe(i - Vector256<long>.Count);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Narrow(lower.GetLower(), lower.GetUpper()),
+                Vector128.Narrow(upper.GetLower(), upper.GetUpper())
+            );
         }
 
         /// <summary>Narrows two <see cref="Vector256{UInt16}"/> instances into one <see cref="Vector256{Byte}" />.</summary>
         /// <param name="lower">The vector that will be narrowed to the lower half of the result vector.</param>
         /// <param name="upper">The vector that will be narrowed to the upper half of the result vector.</param>
         /// <returns>A <see cref="Vector256{Byte}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
-        public static unsafe Vector256<byte> Narrow(Vector256<ushort> lower, Vector256<ushort> upper)
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<byte> Narrow(Vector256<ushort> lower, Vector256<ushort> upper)
         {
-            Unsafe.SkipInit(out Vector256<byte> result);
-
-            for (int i = 0; i < Vector256<ushort>.Count; i++)
-            {
-                byte value = (byte)lower.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            for (int i = Vector256<ushort>.Count; i < Vector256<byte>.Count; i++)
-            {
-                byte value = (byte)upper.GetElementUnsafe(i - Vector256<ushort>.Count);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Narrow(lower.GetLower(), lower.GetUpper()),
+                Vector128.Narrow(upper.GetLower(), upper.GetUpper())
+            );
         }
 
         /// <summary>Narrows two <see cref="Vector256{UInt32}"/> instances into one <see cref="Vector256{UInt16}" />.</summary>
         /// <param name="lower">The vector that will be narrowed to the lower half of the result vector.</param>
         /// <param name="upper">The vector that will be narrowed to the upper half of the result vector.</param>
         /// <returns>A <see cref="Vector256{UInt16}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
-        public static unsafe Vector256<ushort> Narrow(Vector256<uint> lower, Vector256<uint> upper)
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> Narrow(Vector256<uint> lower, Vector256<uint> upper)
         {
-            Unsafe.SkipInit(out Vector256<ushort> result);
-
-            for (int i = 0; i < Vector256<uint>.Count; i++)
-            {
-                ushort value = (ushort)lower.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            for (int i = Vector256<uint>.Count; i < Vector256<ushort>.Count; i++)
-            {
-                ushort value = (ushort)upper.GetElementUnsafe(i - Vector256<uint>.Count);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Narrow(lower.GetLower(), lower.GetUpper()),
+                Vector128.Narrow(upper.GetLower(), upper.GetUpper())
+            );
         }
 
         /// <summary>Narrows two <see cref="Vector256{UInt64}"/> instances into one <see cref="Vector256{UInt32}" />.</summary>
         /// <param name="lower">The vector that will be narrowed to the lower half of the result vector.</param>
         /// <param name="upper">The vector that will be narrowed to the upper half of the result vector.</param>
         /// <returns>A <see cref="Vector256{UInt32}"/> containing elements narrowed from <paramref name="lower" /> and <paramref name="upper" />.</returns>
-        [CLSCompliant(false)]
         [Intrinsic]
-        public static unsafe Vector256<uint> Narrow(Vector256<ulong> lower, Vector256<ulong> upper)
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> Narrow(Vector256<ulong> lower, Vector256<ulong> upper)
         {
-            Unsafe.SkipInit(out Vector256<uint> result);
-
-            for (int i = 0; i < Vector256<ulong>.Count; i++)
-            {
-                uint value = (uint)lower.GetElementUnsafe(i);
-                result.SetElementUnsafe(i, value);
-            }
-
-            for (int i = Vector256<ulong>.Count; i < Vector256<uint>.Count; i++)
-            {
-                uint value = (uint)upper.GetElementUnsafe(i - Vector256<ulong>.Count);
-                result.SetElementUnsafe(i, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Narrow(lower.GetLower(), lower.GetUpper()),
+                Vector128.Narrow(upper.GetLower(), upper.GetUpper())
+            );
         }
 
         /// <summary>Negates a vector.</summary>
-        /// <param name="vector">The vector to negate.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="vector">The vector to negate.</param>
         /// <returns>A vector whose elements are the negation of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Negate<T>(Vector256<T> vector)
-            where T : struct => -vector;
+            where T : struct
+        {
+            return Create(
+                Vector128.Negate(vector.GetLower()),
+                Vector128.Negate(vector.GetUpper())
+            );
+        }
 
         /// <summary>Computes the ones-complement of a vector.</summary>
-        /// <param name="vector">The vector whose ones-complement is to be computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="vector">The vector whose ones-complement is to be computed.</param>
         /// <returns>A vector whose elements are the ones-complement of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> OnesComplement<T>(Vector256<T> vector)
-            where T : struct => ~vector;
+            where T : struct
+        {
+            return Create(
+                Vector128.OnesComplement(vector.GetLower()),
+                Vector128.OnesComplement(vector.GetUpper())
+            );
+        }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
         /// <param name="vector">The vector whose elements are to be shifted.</param>
@@ -2835,15 +2055,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<byte> ShiftLeft(Vector256<byte> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<byte> result);
-
-            for (int index = 0; index < Vector256<byte>.Count; index++)
-            {
-                byte element = Scalar<byte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2854,15 +2069,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<short> ShiftLeft(Vector256<short> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<short> result);
-
-            for (int index = 0; index < Vector256<short>.Count; index++)
-            {
-                short element = Scalar<short>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2873,15 +2083,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<int> ShiftLeft(Vector256<int> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<int> result);
-
-            for (int index = 0; index < Vector256<int>.Count; index++)
-            {
-                int element = Scalar<int>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2892,15 +2097,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<long> ShiftLeft(Vector256<long> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<long> result);
-
-            for (int index = 0; index < Vector256<long>.Count; index++)
-            {
-                long element = Scalar<long>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2911,15 +2111,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<nint> ShiftLeft(Vector256<nint> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<nint> result);
-
-            for (int index = 0; index < Vector256<nint>.Count; index++)
-            {
-                nint element = Scalar<nint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2931,15 +2126,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<nuint> ShiftLeft(Vector256<nuint> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<nuint> result);
-
-            for (int index = 0; index < Vector256<nuint>.Count; index++)
-            {
-                nuint element = Scalar<nuint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2950,16 +2140,11 @@ namespace System.Runtime.Intrinsics
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<sbyte> ShiftLeft(Vector256<sbyte> vector, int shiftCount)
-        {
-            Unsafe.SkipInit(out Vector256<sbyte> result);
-
-            for (int index = 0; index < Vector256<sbyte>.Count; index++)
-            {
-                sbyte element = Scalar<sbyte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+        {
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2971,15 +2156,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<ushort> ShiftLeft(Vector256<ushort> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<ushort> result);
-
-            for (int index = 0; index < Vector256<ushort>.Count; index++)
-            {
-                ushort element = Scalar<ushort>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -2991,15 +2171,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<uint> ShiftLeft(Vector256<uint> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<uint> result);
-
-            for (int index = 0; index < Vector256<uint>.Count; index++)
-            {
-                uint element = Scalar<uint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts each element of a vector left by the specified amount.</summary>
@@ -3011,15 +2186,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<ulong> ShiftLeft(Vector256<ulong> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<ulong> result);
-
-            for (int index = 0; index < Vector256<ulong>.Count; index++)
-            {
-                ulong element = Scalar<ulong>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftLeft(vector.GetLower(), shiftCount),
+                Vector128.ShiftLeft(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -3030,15 +2200,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<short> ShiftRightArithmetic(Vector256<short> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<short> result);
-
-            for (int index = 0; index < Vector256<short>.Count; index++)
-            {
-                short element = Scalar<short>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightArithmetic(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightArithmetic(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -3049,15 +2214,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<int> ShiftRightArithmetic(Vector256<int> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<int> result);
-
-            for (int index = 0; index < Vector256<int>.Count; index++)
-            {
-                int element = Scalar<int>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightArithmetic(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightArithmetic(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -3068,15 +2228,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<long> ShiftRightArithmetic(Vector256<long> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<long> result);
-
-            for (int index = 0; index < Vector256<long>.Count; index++)
-            {
-                long element = Scalar<long>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightArithmetic(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightArithmetic(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -3087,15 +2242,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<nint> ShiftRightArithmetic(Vector256<nint> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<nint> result);
-
-            for (int index = 0; index < Vector256<nint>.Count; index++)
-            {
-                nint element = Scalar<nint>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightArithmetic(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightArithmetic(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (signed) each element of a vector right by the specified amount.</summary>
@@ -3107,15 +2257,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<sbyte> ShiftRightArithmetic(Vector256<sbyte> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<sbyte> result);
-
-            for (int index = 0; index < Vector256<sbyte>.Count; index++)
-            {
-                sbyte element = Scalar<sbyte>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightArithmetic(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightArithmetic(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3126,15 +2271,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<byte> ShiftRightLogical(Vector256<byte> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<byte> result);
-
-            for (int index = 0; index < Vector256<byte>.Count; index++)
-            {
-                byte element = Scalar<byte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3145,15 +2285,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<short> ShiftRightLogical(Vector256<short> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<short> result);
-
-            for (int index = 0; index < Vector256<short>.Count; index++)
-            {
-                short element = Scalar<short>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3164,15 +2299,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<int> ShiftRightLogical(Vector256<int> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<int> result);
-
-            for (int index = 0; index < Vector256<int>.Count; index++)
-            {
-                int element = Scalar<int>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3183,15 +2313,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<long> ShiftRightLogical(Vector256<long> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<long> result);
-
-            for (int index = 0; index < Vector256<long>.Count; index++)
-            {
-                long element = Scalar<long>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3202,15 +2327,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<nint> ShiftRightLogical(Vector256<nint> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<nint> result);
-
-            for (int index = 0; index < Vector256<nint>.Count; index++)
-            {
-                nint element = Scalar<nint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3222,15 +2342,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<nuint> ShiftRightLogical(Vector256<nuint> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<nuint> result);
-
-            for (int index = 0; index < Vector256<nuint>.Count; index++)
-            {
-                nuint element = Scalar<nuint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3242,15 +2357,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<sbyte> ShiftRightLogical(Vector256<sbyte> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<sbyte> result);
-
-            for (int index = 0; index < Vector256<sbyte>.Count; index++)
-            {
-                sbyte element = Scalar<sbyte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3262,15 +2372,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<ushort> ShiftRightLogical(Vector256<ushort> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<ushort> result);
-
-            for (int index = 0; index < Vector256<ushort>.Count; index++)
-            {
-                ushort element = Scalar<ushort>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3282,15 +2387,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<uint> ShiftRightLogical(Vector256<uint> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<uint> result);
-
-            for (int index = 0; index < Vector256<uint>.Count; index++)
-            {
-                uint element = Scalar<uint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Shifts (unsigned) each element of a vector right by the specified amount.</summary>
@@ -3302,15 +2402,10 @@ namespace System.Runtime.Intrinsics
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<ulong> ShiftRightLogical(Vector256<ulong> vector, int shiftCount)
         {
-            Unsafe.SkipInit(out Vector256<ulong> result);
-
-            for (int index = 0; index < Vector256<ulong>.Count; index++)
-            {
-                ulong element = Scalar<ulong>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
-                result.SetElementUnsafe(index, element);
-            }
-
-            return result;
+            return Create(
+                Vector128.ShiftRightLogical(vector.GetLower(), shiftCount),
+                Vector128.ShiftRightLogical(vector.GetUpper(), shiftCount)
+            );
         }
 
         /// <summary>Creates a new vector by selecting values from an input vector using a set of indices.</summary>
@@ -3558,96 +2653,86 @@ namespace System.Runtime.Intrinsics
         }
 
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
-        /// <param name="vector">The vector whose square root is to be computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <param name="vector">The vector whose square root is to be computed.</param>
         /// <returns>A vector whose elements are the square root of the corresponding elements in <paramref name="vector" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Sqrt<T>(Vector256<T> vector)
             where T : struct
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                T value = Scalar<T>.Sqrt(vector.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Create(
+                Vector128.Sqrt(vector.GetLower()),
+                Vector128.Sqrt(vector.GetUpper())
+            );
         }
 
         /// <summary>Stores a vector at the given destination.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
         /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe void Store<T>(this Vector256<T> source, T* destination)
-            where T : unmanaged
-        {
-            *(Vector256<T>*)destination = source;
-        }
+        public static void Store<T>(this Vector256<T> source, T* destination)
+            where T : unmanaged => source.StoreUnsafe(ref *destination);
 
         /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
         /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe void StoreAligned<T>(this Vector256<T> source, T* destination)
+        public static void StoreAligned<T>(this Vector256<T> source, T* destination)
             where T : unmanaged
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
 
-            if (((nuint)destination % Alignment) != 0)
+            if (((nuint)(destination) % Alignment) != 0)
             {
-                throw new AccessViolationException();
+                ThrowHelper.ThrowAccessViolationException();
             }
 
-            *(Vector256<T>*)destination = source;
+            *(Vector256<T>*)(destination) = source;
         }
 
         /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
         /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         /// <remarks>This method may bypass the cache on certain platforms.</remarks>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static unsafe void StoreAlignedNonTemporal<T>(this Vector256<T> source, T* destination)
-            where T : unmanaged
-        {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-
-            if (((nuint)destination % Alignment) != 0)
-            {
-                throw new AccessViolationException();
-            }
-
-            *(Vector256<T>*)destination = source;
-        }
+        public static void StoreAlignedNonTemporal<T>(this Vector256<T> source, T* destination)
+            where T : unmanaged => source.StoreAligned(destination);
 
         /// <summary>Stores a vector at the given destination.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
         /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static void StoreUnsafe<T>(this Vector256<T> source, ref T destination)
             where T : struct
         {
             ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref destination), source);
+            ref byte address = ref Unsafe.As<T, byte>(ref destination);
+            Unsafe.WriteUnaligned(ref address, source);
         }
 
         /// <summary>Stores a vector at the given destination.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="source">The vector that will be stored.</param>
         /// <param name="destination">The destination to which <paramref name="elementOffset" /> will be added before the vector will be stored.</param>
         /// <param name="elementOffset">The element offset from <paramref name="destination" /> from which the vector will be stored.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <exception cref="NotSupportedException">The type of <paramref name="source" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [CLSCompliant(false)]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -3664,27 +2749,35 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The vector to subtract from <paramref name="left" />.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The difference of <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Subtract<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left - right;
+            where T : struct
+        {
+            return Create(
+                Vector128.Subtract(left.GetLower(), right.GetLower()),
+                Vector128.Subtract(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         /// <summary>Computes the sum of all elements in a vector.</summary>
         /// <param name="vector">The vector whose elements will be summed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The sum of all elements in <paramref name="vector" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T Sum<T>(Vector256<T> vector)
             where T : struct
         {
-            T sum = default;
-
-            for (int index = 0; index < Vector256<T>.Count; index++)
-            {
-                sum = Scalar<T>.Add(sum, vector.GetElementUnsafe(index));
-            }
+            // Doing this as Sum(lower) + Sum(upper) is important for floating-point determinism
+            // This is because the underlying dpps instruction on x86/x64 will do this equivalently
+            // and otherwise the software vs accelerated implementations may differ in returned result.
 
-            return sum;
+            T result = Vector128.Sum(vector.GetLower());
+            result = Scalar<T>.Add(result, Vector128.Sum(vector.GetUpper()));
+            return result;
         }
 
         /// <summary>Converts the given vector to a scalar containing the value of the first element.</summary>
@@ -3693,6 +2786,7 @@ namespace System.Runtime.Intrinsics
         /// <returns>A scalar <typeparamref name="T" /> containing the value of the first element.</returns>
         /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T ToScalar<T>(this Vector256<T> vector)
             where T : struct
         {
@@ -3701,9 +2795,11 @@ namespace System.Runtime.Intrinsics
         }
 
         /// <summary>Tries to copy a <see cref="Vector{T}" /> to a given span.</summary>
+        /// <typeparam name="T">The type of the input vector.</typeparam>
         /// <param name="vector">The vector to copy.</param>
         /// <param name="destination">The span to which <paramref name="destination" /> is copied.</param>
         /// <returns><c>true</c> if <paramref name="vector" /> was successfully copied to <paramref name="destination" />; otherwise, <c>false</c> if the length of <paramref name="destination" /> is less than <see cref="Vector256{T}.Count" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> and <paramref name="destination" /> (<typeparamref name="T" />) is not supported.</exception>
         public static bool TryCopyTo<T>(this Vector256<T> vector, Span<T> destination)
             where T : struct
         {
@@ -3714,7 +2810,8 @@ namespace System.Runtime.Intrinsics
                 return false;
             }
 
-            Unsafe.WriteUnaligned(ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination)), vector);
+            ref byte address = ref Unsafe.As<T, byte>(ref MemoryMarshal.GetReference(destination));
+            Unsafe.WriteUnaligned(ref address, vector);
             return true;
         }
 
@@ -3722,47 +2819,47 @@ namespace System.Runtime.Intrinsics
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe (Vector256<ushort> Lower, Vector256<ushort> Upper) Widen(Vector256<byte> source)
-            => (WidenLower(source), WidenUpper(source));
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<ushort> Lower, Vector256<ushort> Upper) Widen(Vector256<byte> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector256{Int16}" /> into two <see cref="Vector256{Int32} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
-        public static unsafe (Vector256<int> Lower, Vector256<int> Upper) Widen(Vector256<short> source)
-            => (WidenLower(source), WidenUpper(source));
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<int> Lower, Vector256<int> Upper) Widen(Vector256<short> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector256{Int32}" /> into two <see cref="Vector256{Int64} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
-        public static unsafe (Vector256<long> Lower, Vector256<long> Upper) Widen(Vector256<int> source)
-            => (WidenLower(source), WidenUpper(source));
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<long> Lower, Vector256<long> Upper) Widen(Vector256<int> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector256{SByte}" /> into two <see cref="Vector256{Int16} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe (Vector256<short> Lower, Vector256<short> Upper) Widen(Vector256<sbyte> source)
-            => (WidenLower(source), WidenUpper(source));
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<short> Lower, Vector256<short> Upper) Widen(Vector256<sbyte> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector256{Single}" /> into two <see cref="Vector256{Double} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
-        public static unsafe (Vector256<double> Lower, Vector256<double> Upper) Widen(Vector256<float> source)
-            => (WidenLower(source), WidenUpper(source));
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<double> Lower, Vector256<double> Upper) Widen(Vector256<float> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector256{UInt16}" /> into two <see cref="Vector256{UInt32} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe (Vector256<uint> Lower, Vector256<uint> Upper) Widen(Vector256<ushort> source)
-            => (WidenLower(source), WidenUpper(source));
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<uint> Lower, Vector256<uint> Upper) Widen(Vector256<ushort> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Widens a <see cref="Vector256{UInt32}" /> into two <see cref="Vector256{UInt64} " />.</summary>
         /// <param name="source">The vector whose elements are to be widened.</param>
         /// <returns>A pair of vectors that contain the widened lower and upper halves of <paramref name="source" />.</returns>
         [CLSCompliant(false)]
-        public static unsafe (Vector256<ulong> Lower, Vector256<ulong> Upper) Widen(Vector256<uint> source)
-            => (WidenLower(source), WidenUpper(source));
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static (Vector256<ulong> Lower, Vector256<ulong> Upper) Widen(Vector256<uint> source) => (WidenLower(source), WidenUpper(source));
 
         /// <summary>Creates a new <see cref="Vector256{T}" /> with the element at the specified index set to the specified value and the remaining elements set to the same value as that in the given vector.</summary>
         /// <typeparam name="T">The type of the input vector.</typeparam>
@@ -3770,8 +2867,8 @@ namespace System.Runtime.Intrinsics
         /// <param name="index">The index of the element to set.</param>
         /// <param name="value">The value to set the element to.</param>
         /// <returns>A <see cref="Vector256{T}" /> with the value of the element at <paramref name="index" /> set to <paramref name="value" /> and the remaining elements set to the same value as that in <paramref name="vector" />.</returns>
-        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         /// <exception cref="ArgumentOutOfRangeException"><paramref name="index" /> was less than zero or greater than the number of elements.</exception>
+        /// <exception cref="NotSupportedException">The type of <paramref name="vector" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         public static Vector256<T> WithElement<T>(this Vector256<T> vector, int index, T value)
             where T : struct
@@ -3805,20 +2902,16 @@ namespace System.Runtime.Intrinsics
                 // All integral types generate the same instruction, so just pick one rather than handling each T separately
                 return Avx2.InsertVector128(vector.AsByte(), value.AsByte(), 0).As<byte, T>();
             }
-
-            if (Avx.IsSupported)
+            else if (Avx.IsSupported)
             {
                 // All floating-point types generate the same instruction, so just pick one rather than handling each T separately
                 // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software
                 return Avx.InsertVector128(vector.AsSingle(), value.AsSingle(), 0).As<float, T>();
             }
-
-            return SoftwareFallback(vector, value);
-
-            static Vector256<T> SoftwareFallback(Vector256<T> vector, Vector128<T> value)
+            else
             {
                 Vector256<T> result = vector;
-                Unsafe.As<Vector256<T>, Vector128<T>>(ref result) = value;
+                result.SetLowerUnsafe(value);
                 return result;
             }
         }
@@ -3840,41 +2933,44 @@ namespace System.Runtime.Intrinsics
                 // All integral types generate the same instruction, so just pick one rather than handling each T separately
                 return Avx2.InsertVector128(vector.AsByte(), value.AsByte(), 1).As<byte, T>();
             }
-
-            if (Avx.IsSupported)
+            else if (Avx.IsSupported)
             {
                 // All floating-point types generate the same instruction, so just pick one rather than handling each T separately
                 // We also just fallback to this for integral types if AVX2 isn't supported, since that is still faster than software
                 return Avx.InsertVector128(vector.AsSingle(), value.AsSingle(), 1).As<float, T>();
             }
-
-            return SoftwareFallback(vector, value);
-
-            static Vector256<T> SoftwareFallback(Vector256<T> vector, Vector128<T> value)
+            else
             {
                 Vector256<T> result = vector;
-                ref Vector128<T> lower = ref Unsafe.As<Vector256<T>, Vector128<T>>(ref result);
-                Unsafe.Add(ref lower, 1) = value;
+                result.SetUpperUnsafe(value);
                 return result;
             }
         }
 
         /// <summary>Computes the exclusive-or of two vectors.</summary>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <param name="left">The vector to exclusive-or with <paramref name="right" />.</param>
         /// <param name="right">The vector to exclusive-or with <paramref name="left" />.</param>
-        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
         /// <returns>The exclusive-or of <paramref name="left" /> and <paramref name="right" />.</returns>
+        /// <exception cref="NotSupportedException">The type of <paramref name="left" /> and <paramref name="right" /> (<typeparamref name="T" />) is not supported.</exception>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> Xor<T>(Vector256<T> left, Vector256<T> right)
-            where T : struct => left ^ right;
+            where T : struct
+        {
+            return Create(
+                Vector128.Xor(left.GetLower(), right.GetLower()),
+                Vector128.Xor(left.GetUpper(), right.GetUpper())
+            );
+        }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static T GetElementUnsafe<T>(in this Vector256<T> vector, int index)
             where T : struct
         {
             Debug.Assert((index >= 0) && (index < Vector256<T>.Count));
-            return Unsafe.Add(ref Unsafe.As<Vector256<T>, T>(ref Unsafe.AsRef(in vector)), index);
+            ref T address = ref Unsafe.As<Vector256<T>, T>(ref Unsafe.AsRef(in vector));
+            return Unsafe.Add(ref address, index);
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
@@ -3882,203 +2978,190 @@ namespace System.Runtime.Intrinsics
             where T : struct
         {
             Debug.Assert((index >= 0) && (index < Vector256<T>.Count));
-            Unsafe.Add(ref Unsafe.As<Vector256<T>, T>(ref Unsafe.AsRef(in vector)), index) = value;
+            ref T address = ref Unsafe.As<Vector256<T>, T>(ref Unsafe.AsRef(in vector));
+            Unsafe.Add(ref address, index) = value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static void SetLowerUnsafe<T>(in this Vector256<T> vector, Vector128<T> value)
+            where T : struct
+        {
+            Unsafe.AsRef(in vector._lower) = value;
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static void SetUpperUnsafe<T>(in this Vector256<T> vector, Vector128<T> value)
+            where T : struct
+        {
+            Unsafe.AsRef(in vector._upper) = value;
         }
 
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector256<ushort> WidenLower(Vector256<byte> source)
         {
-            Unsafe.SkipInit(out Vector256<ushort> lower);
-
-            for (int i = 0; i < Vector256<ushort>.Count; i++)
-            {
-                ushort value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
-            }
+            Vector128<byte> lower = source.GetLower();
 
-            return lower;
+            return Create(
+                Vector128.WidenLower(lower),
+                Vector128.WidenUpper(lower)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<int> WidenLower(Vector256<short> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<int> WidenLower(Vector256<short> source)
         {
-            Unsafe.SkipInit(out Vector256<int> lower);
-
-            for (int i = 0; i < Vector256<int>.Count; i++)
-            {
-                int value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
-            }
+            Vector128<short> lower = source.GetLower();
 
-            return lower;
+            return Create(
+                Vector128.WidenLower(lower),
+                Vector128.WidenUpper(lower)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<long> WidenLower(Vector256<int> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<long> WidenLower(Vector256<int> source)
         {
-            Unsafe.SkipInit(out Vector256<long> lower);
-
-            for (int i = 0; i < Vector256<long>.Count; i++)
-            {
-                long value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
-            }
+            Vector128<int> lower = source.GetLower();
 
-            return lower;
+            return Create(
+                Vector128.WidenLower(lower),
+                Vector128.WidenUpper(lower)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<short> WidenLower(Vector256<sbyte> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<short> WidenLower(Vector256<sbyte> source)
         {
-            Unsafe.SkipInit(out Vector256<short> lower);
-
-            for (int i = 0; i < Vector256<short>.Count; i++)
-            {
-                short value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
-            }
+            Vector128<sbyte> lower = source.GetLower();
 
-            return lower;
+            return Create(
+                Vector128.WidenLower(lower),
+                Vector128.WidenUpper(lower)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<double> WidenLower(Vector256<float> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<double> WidenLower(Vector256<float> source)
         {
-            Unsafe.SkipInit(out Vector256<double> lower);
-
-            for (int i = 0; i < Vector256<double>.Count; i++)
-            {
-                double value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
-            }
+            Vector128<float> lower = source.GetLower();
 
-            return lower;
+            return Create(
+                Vector128.WidenLower(lower),
+                Vector128.WidenUpper(lower)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<uint> WidenLower(Vector256<ushort> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<uint> WidenLower(Vector256<ushort> source)
         {
-            Unsafe.SkipInit(out Vector256<uint> lower);
-
-            for (int i = 0; i < Vector256<uint>.Count; i++)
-            {
-                uint value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
-            }
+            Vector128<ushort> lower = source.GetLower();
 
-            return lower;
+            return Create(
+                Vector128.WidenLower(lower),
+                Vector128.WidenUpper(lower)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<ulong> WidenLower(Vector256<uint> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<ulong> WidenLower(Vector256<uint> source)
         {
-            Unsafe.SkipInit(out Vector256<ulong> lower);
-
-            for (int i = 0; i < Vector256<ulong>.Count; i++)
-            {
-                ulong value = source.GetElementUnsafe(i);
-                lower.SetElementUnsafe(i, value);
-            }
+            Vector128<uint> lower = source.GetLower();
 
-            return lower;
+            return Create(
+                Vector128.WidenLower(lower),
+                Vector128.WidenUpper(lower)
+            );
         }
 
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         internal static Vector256<ushort> WidenUpper(Vector256<byte> source)
         {
-            Unsafe.SkipInit(out Vector256<ushort> upper);
-
-            for (int i = Vector256<ushort>.Count; i < Vector256<byte>.Count; i++)
-            {
-                ushort value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector256<ushort>.Count, value);
-            }
+            Vector128<byte> upper = source.GetUpper();
 
-            return upper;
+            return Create(
+                Vector128.WidenLower(upper),
+                Vector128.WidenUpper(upper)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<int> WidenUpper(Vector256<short> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<int> WidenUpper(Vector256<short> source)
         {
-            Unsafe.SkipInit(out Vector256<int> upper);
-
-            for (int i = Vector256<int>.Count; i < Vector256<short>.Count; i++)
-            {
-                int value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector256<int>.Count, value);
-            }
+            Vector128<short> upper = source.GetUpper();
 
-            return upper;
+            return Create(
+                Vector128.WidenLower(upper),
+                Vector128.WidenUpper(upper)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<long> WidenUpper(Vector256<int> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<long> WidenUpper(Vector256<int> source)
         {
-            Unsafe.SkipInit(out Vector256<long> upper);
-
-            for (int i = Vector256<long>.Count; i < Vector256<int>.Count; i++)
-            {
-                long value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector256<long>.Count, value);
-            }
+            Vector128<int> upper = source.GetUpper();
 
-            return upper;
+            return Create(
+                Vector128.WidenLower(upper),
+                Vector128.WidenUpper(upper)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<short> WidenUpper(Vector256<sbyte> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<short> WidenUpper(Vector256<sbyte> source)
         {
-            Unsafe.SkipInit(out Vector256<short> upper);
-
-            for (int i = Vector256<short>.Count; i < Vector256<sbyte>.Count; i++)
-            {
-                short value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector256<short>.Count, value);
-            }
+            Vector128<sbyte> upper = source.GetUpper();
 
-            return upper;
+            return Create(
+                Vector128.WidenLower(upper),
+                Vector128.WidenUpper(upper)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<double> WidenUpper(Vector256<float> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<double> WidenUpper(Vector256<float> source)
         {
-            Unsafe.SkipInit(out Vector256<double> upper);
-
-            for (int i = Vector256<double>.Count; i < Vector256<float>.Count; i++)
-            {
-                double value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector256<double>.Count, value);
-            }
+            Vector128<float> upper = source.GetUpper();
 
-            return upper;
+            return Create(
+                Vector128.WidenLower(upper),
+                Vector128.WidenUpper(upper)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<uint> WidenUpper(Vector256<ushort> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<uint> WidenUpper(Vector256<ushort> source)
         {
-            Unsafe.SkipInit(out Vector256<uint> upper);
-
-            for (int i = Vector256<uint>.Count; i < Vector256<ushort>.Count; i++)
-            {
-                uint value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector256<uint>.Count, value);
-            }
+            Vector128<ushort> upper = source.GetUpper();
 
-            return upper;
+            return Create(
+                Vector128.WidenLower(upper),
+                Vector128.WidenUpper(upper)
+            );
         }
 
         [Intrinsic]
-        internal static unsafe Vector256<ulong> WidenUpper(Vector256<uint> source)
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        internal static Vector256<ulong> WidenUpper(Vector256<uint> source)
         {
-            Unsafe.SkipInit(out Vector256<ulong> upper);
-
-            for (int i = Vector256<ulong>.Count; i < Vector256<uint>.Count; i++)
-            {
-                ulong value = source.GetElementUnsafe(i);
-                upper.SetElementUnsafe(i - Vector256<ulong>.Count, value);
-            }
+            Vector128<uint> upper = source.GetUpper();
 
-            return upper;
+            return Create(
+                Vector128.WidenLower(upper),
+                Vector128.WidenUpper(upper)
+            );
         }
     }
 }
index dac4eac..136593e 100644 (file)
@@ -4,10 +4,8 @@
 using System.Diagnostics;
 using System.Diagnostics.CodeAnalysis;
 using System.Globalization;
-using System.Numerics;
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics.X86;
 using System.Text;
 
 namespace System.Runtime.Intrinsics
@@ -30,22 +28,19 @@ namespace System.Runtime.Intrinsics
     public readonly struct Vector256<T> : IEquatable<Vector256<T>>
         where T : struct
     {
-        // These fields exist to ensure the alignment is 8, rather than 1.
-        // This also allows the debug view to work https://github.com/dotnet/runtime/issues/9495)
-        private readonly ulong _00;
-        private readonly ulong _01;
-        private readonly ulong _02;
-        private readonly ulong _03;
+        internal readonly Vector128<T> _lower;
+        internal readonly Vector128<T> _upper;
 
         /// <summary>Gets a new <see cref="Vector256{T}" /> with all bits set to 1.</summary>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
         public static Vector256<T> AllBitsSet
         {
             [Intrinsic]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
             get
             {
-                ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-                return Vector256.Create(0xFFFFFFFF).As<uint, T>();
+                Vector128<T> vector = Vector128<T>.AllBitsSet;
+                return Vector256.Create(vector, vector);
             }
         }
 
@@ -54,10 +49,10 @@ namespace System.Runtime.Intrinsics
         public static int Count
         {
             [Intrinsic]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
             get
             {
-                ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-                return Vector256.Size / Unsafe.SizeOf<T>();
+                return Vector128<T>.Count * 2;
             }
         }
 
@@ -66,18 +61,21 @@ namespace System.Runtime.Intrinsics
         public static bool IsSupported
         {
             [MethodImpl(MethodImplOptions.AggressiveInlining)]
-            get => (typeof(T) == typeof(byte)) ||
-                   (typeof(T) == typeof(double)) ||
-                   (typeof(T) == typeof(short)) ||
-                   (typeof(T) == typeof(int)) ||
-                   (typeof(T) == typeof(long)) ||
-                   (typeof(T) == typeof(nint)) ||
-                   (typeof(T) == typeof(nuint)) ||
-                   (typeof(T) == typeof(sbyte)) ||
-                   (typeof(T) == typeof(float)) ||
-                   (typeof(T) == typeof(ushort)) ||
-                   (typeof(T) == typeof(uint)) ||
-                   (typeof(T) == typeof(ulong));
+            get
+            {
+                return (typeof(T) == typeof(byte)) ||
+                       (typeof(T) == typeof(double)) ||
+                       (typeof(T) == typeof(short)) ||
+                       (typeof(T) == typeof(int)) ||
+                       (typeof(T) == typeof(long)) ||
+                       (typeof(T) == typeof(nint)) ||
+                       (typeof(T) == typeof(nuint)) ||
+                       (typeof(T) == typeof(sbyte)) ||
+                       (typeof(T) == typeof(float)) ||
+                       (typeof(T) == typeof(ushort)) ||
+                       (typeof(T) == typeof(uint)) ||
+                       (typeof(T) == typeof(ulong));
+            }
         }
 
         /// <summary>Gets a new <see cref="Vector256{T}" /> with all elements initialized to zero.</summary>
@@ -85,6 +83,7 @@ namespace System.Runtime.Intrinsics
         public static Vector256<T> Zero
         {
             [Intrinsic]
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
             get
             {
                 ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
@@ -107,24 +106,27 @@ namespace System.Runtime.Intrinsics
             }
         }
 
-        public T this[int index] => this.GetElement(index);
+        public T this[int index]
+        {
+            [MethodImpl(MethodImplOptions.AggressiveInlining)]
+            get
+            {
+                return this.GetElement(index);
+            }
+        }
 
         /// <summary>Adds two vectors to compute their sum.</summary>
         /// <param name="left">The vector to add with <paramref name="right" />.</param>
         /// <param name="right">The vector to add with <paramref name="left" />.</param>
         /// <returns>The sum of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<T> operator +(Vector256<T> left, Vector256<T> right)
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Add(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() + right.GetLower(),
+                left.GetUpper() + right.GetUpper()
+            );
         }
 
         /// <summary>Computes the bitwise-and of two vectors.</summary>
@@ -132,17 +134,13 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The vector to bitwise-and with <paramref name="left" />.</param>
         /// <returns>The bitwise-and of <paramref name="left" /> and <paramref name="right"/>.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<T> operator &(Vector256<T> left, Vector256<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            Unsafe.AsRef(in result._00) = left._00 & right._00;
-            Unsafe.AsRef(in result._01) = left._01 & right._01;
-            Unsafe.AsRef(in result._02) = left._02 & right._02;
-            Unsafe.AsRef(in result._03) = left._03 & right._03;
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() & right.GetLower(),
+                left.GetUpper() & right.GetUpper()
+            );
         }
 
         /// <summary>Computes the bitwise-or of two vectors.</summary>
@@ -150,17 +148,13 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The vector to bitwise-or with <paramref name="left" />.</param>
         /// <returns>The bitwise-or of <paramref name="left" /> and <paramref name="right"/>.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<T> operator |(Vector256<T> left, Vector256<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            Unsafe.AsRef(in result._00) = left._00 | right._00;
-            Unsafe.AsRef(in result._01) = left._01 | right._01;
-            Unsafe.AsRef(in result._02) = left._02 | right._02;
-            Unsafe.AsRef(in result._03) = left._03 | right._03;
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() | right.GetLower(),
+                left.GetUpper() | right.GetUpper()
+            );
         }
 
         /// <summary>Divides two vectors to compute their quotient.</summary>
@@ -168,17 +162,13 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The vector that will divide <paramref name="left" />.</param>
         /// <returns>The quotient of <paramref name="left" /> divided by <paramref name="right" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<T> operator /(Vector256<T> left, Vector256<T> right)
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Divide(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() / right.GetLower(),
+                left.GetUpper() / right.GetUpper()
+            );
         }
 
         /// <summary>Compares two vectors to determine if all elements are equal.</summary>
@@ -186,16 +176,11 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
         /// <returns><c>true</c> if all elements in <paramref name="left" /> were equal to the corresponding element in <paramref name="right" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool operator ==(Vector256<T> left, Vector256<T> right)
         {
-            for (int index = 0; index < Count; index++)
-            {
-                if (!Scalar<T>.Equals(left.GetElementUnsafe(index), right.GetElementUnsafe(index)))
-                {
-                    return false;
-                }
-            }
-            return true;
+            return (left.GetLower() == right.GetLower())
+                && (left.GetUpper() == right.GetUpper());
         }
 
         /// <summary>Computes the exclusive-or of two vectors.</summary>
@@ -203,17 +188,13 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The vector to exclusive-or with <paramref name="left" />.</param>
         /// <returns>The exclusive-or of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<T> operator ^(Vector256<T> left, Vector256<T> right)
         {
-            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            Unsafe.AsRef(in result._00) = left._00 ^ right._00;
-            Unsafe.AsRef(in result._01) = left._01 ^ right._01;
-            Unsafe.AsRef(in result._02) = left._02 ^ right._02;
-            Unsafe.AsRef(in result._03) = left._03 ^ right._03;
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() ^ right.GetLower(),
+                left.GetUpper() ^ right.GetUpper()
+            );
         }
 
         /// <summary>Compares two vectors to determine if any elements are not equal.</summary>
@@ -223,24 +204,23 @@ namespace System.Runtime.Intrinsics
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static bool operator !=(Vector256<T> left, Vector256<T> right)
-            => !(left == right);
+        {
+            return (left.GetLower() != right.GetLower())
+                || (left.GetUpper() != right.GetUpper());
+        }
 
         /// <summary>Multiplies two vectors to compute their element-wise product.</summary>
         /// <param name="left">The vector to multiply with <paramref name="right" />.</param>
         /// <param name="right">The vector to multiply with <paramref name="left" />.</param>
         /// <returns>The element-wise product of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<T> operator *(Vector256<T> left, Vector256<T> right)
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Multiply(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() * right.GetLower(),
+                left.GetUpper() * right.GetUpper()
+            );
         }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
@@ -248,17 +228,13 @@ namespace System.Runtime.Intrinsics
         /// <param name="right">The scalar to multiply with <paramref name="left" />.</param>
         /// <returns>The product of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator *(Vector256<T> left, T right)
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Multiply(left.GetElementUnsafe(index), right);
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() * right,
+                left.GetUpper() * right
+            );
         }
 
         /// <summary>Multiplies a vector by a scalar to compute their product.</summary>
@@ -268,31 +244,38 @@ namespace System.Runtime.Intrinsics
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static Vector256<T> operator *(T left, Vector256<T> right)
-            => right * left;
+        {
+            return Vector256.Create(
+                left * right.GetLower(),
+                left * right.GetUpper()
+            );
+        }
 
         /// <summary>Computes the ones-complement of a vector.</summary>
         /// <param name="vector">The vector whose ones-complement is to be computed.</param>
         /// <returns>A vector whose elements are the ones-complement of the corresponding elements in <paramref name="vector" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector256<T> operator ~(Vector256<T> vector) => AllBitsSet ^ vector;
+        public static Vector256<T> operator ~(Vector256<T> vector)
+        {
+            return Vector256.Create(
+                ~vector.GetLower(),
+                ~vector.GetUpper()
+            );
+        }
 
         /// <summary>Subtracts two vectors to compute their difference.</summary>
         /// <param name="left">The vector from which <paramref name="right" /> will be subtracted.</param>
         /// <param name="right">The vector to subtract from <paramref name="left" />.</param>
         /// <returns>The difference of <paramref name="left" /> and <paramref name="right" />.</returns>
         [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static unsafe Vector256<T> operator -(Vector256<T> left, Vector256<T> right)
         {
-            Unsafe.SkipInit(out Vector256<T> result);
-
-            for (int index = 0; index < Count; index++)
-            {
-                T value = Scalar<T>.Subtract(left.GetElementUnsafe(index), right.GetElementUnsafe(index));
-                result.SetElementUnsafe(index, value);
-            }
-
-            return result;
+            return Vector256.Create(
+                left.GetLower() - right.GetLower(),
+                left.GetUpper() - right.GetUpper()
+            );
         }
 
         /// <summary>Computes the unary negation of a vector.</summary>
@@ -300,7 +283,13 @@ namespace System.Runtime.Intrinsics
         /// <returns>A vector whose elements are the unary negation of the corresponding elements in <paramref name="vector" />.</returns>
         [Intrinsic]
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        public static Vector256<T> operator -(Vector256<T> vector) => Zero - vector;
+        public static Vector256<T> operator -(Vector256<T> vector)
+        {
+            return Vector256.Create(
+                -vector.GetLower(),
+                -vector.GetUpper()
+            );
+        }
 
         /// <summary>Returns a given vector unchanged.</summary>
         /// <param name="value">The vector.</param>
@@ -313,8 +302,8 @@ namespace System.Runtime.Intrinsics
         /// <param name="obj">The object to compare with the current instance.</param>
         /// <returns><c>true</c> if <paramref name="obj" /> is a <see cref="Vector256{T}" /> and is equal to the current instance; otherwise, <c>false</c>.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
-        public override bool Equals([NotNullWhen(true)] object? obj)
-            => (obj is Vector256<T> other) && Equals(other);
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public override bool Equals([NotNullWhen(true)] object? obj) => (obj is Vector256<T> other) && Equals(other);
 
         /// <summary>Determines whether the specified <see cref="Vector256{T}" /> is equal to the current instance.</summary>
         /// <param name="other">The <see cref="Vector256{T}" /> to compare with the current instance.</param>
@@ -338,19 +327,10 @@ namespace System.Runtime.Intrinsics
                     return this == other;
                 }
             }
-
-            return SoftwareFallback(in this, other);
-
-            static bool SoftwareFallback(in Vector256<T> self, Vector256<T> other)
+            else
             {
-                for (int index = 0; index < Count; index++)
-                {
-                    if (!Scalar<T>.ObjectEquals(self.GetElementUnsafe(index), other.GetElementUnsafe(index)))
-                    {
-                        return false;
-                    }
-                }
-                return true;
+                return this.GetLower().Equals(other.GetLower())
+                    && this.GetUpper().Equals(other.GetUpper());
             }
         }
 
@@ -373,8 +353,8 @@ namespace System.Runtime.Intrinsics
         /// <summary>Converts the current instance to an equivalent string representation.</summary>
         /// <returns>An equivalent string representation of the current instance.</returns>
         /// <exception cref="NotSupportedException">The type of the current instance (<typeparamref name="T" />) is not supported.</exception>
-        public override string ToString()
-            => ToString("G", CultureInfo.InvariantCulture);
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public override string ToString() => ToString("G", CultureInfo.InvariantCulture);
 
         private string ToString([StringSyntax(StringSyntaxAttribute.NumericFormat)] string? format, IFormatProvider? formatProvider)
         {
index 69e7bc8..4f1dada 100644 (file)
@@ -37,6 +37,10 @@ namespace System.Runtime.Intrinsics
             {
                 return vector;
             }
+            else if (typeof(T) == typeof(nuint))
+            {
+                return vector;
+            }
             else if (typeof(T) == typeof(ushort))
             {
                 return vector;
@@ -510,6 +514,14 @@ namespace System.Runtime.Intrinsics
             {
                 return Create((long)(object)value).As<long, T>();
             }
+            else if (typeof(T) == typeof(nint))
+            {
+                return Create((nint)(object)value).As<nint, T>();
+            }
+            else if (typeof(T) == typeof(nuint))
+            {
+                return Create((nuint)(object)value).As<nuint, T>();
+            }
             else if (typeof(T) == typeof(sbyte))
             {
                 return Create((sbyte)(object)value).As<sbyte, T>();
index 19f10ac..1f0e031 100644 (file)
@@ -51,6 +51,12 @@ namespace System
     internal static class ThrowHelper
     {
         [DoesNotReturn]
+        internal static void ThrowAccessViolationException()
+        {
+            throw new AccessViolationException();
+        }
+
+        [DoesNotReturn]
         internal static void ThrowArrayTypeMismatchException()
         {
             throw new ArrayTypeMismatchException();
@@ -633,7 +639,8 @@ namespace System
         // If 'T' is allowed, no-ops. JIT will elide the method entirely if 'T'
         // is supported and we're on an optimized release build.
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static void ThrowForUnsupportedNumericsVectorBaseType<T>() where T : struct
+        internal static void ThrowForUnsupportedNumericsVectorBaseType<T>()
+            where T : struct
         {
             if (!Vector<T>.IsSupported)
             {
@@ -645,7 +652,8 @@ namespace System
         // If 'T' is allowed, no-ops. JIT will elide the method entirely if 'T'
         // is supported and we're on an optimized release build.
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static void ThrowForUnsupportedIntrinsicsVector64BaseType<T>() where T : struct
+        internal static void ThrowForUnsupportedIntrinsicsVector64BaseType<T>()
+            where T : struct
         {
             if (!Vector64<T>.IsSupported)
             {
@@ -657,7 +665,8 @@ namespace System
         // If 'T' is allowed, no-ops. JIT will elide the method entirely if 'T'
         // is supported and we're on an optimized release build.
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static void ThrowForUnsupportedIntrinsicsVector128BaseType<T>() where T : struct
+        internal static void ThrowForUnsupportedIntrinsicsVector128BaseType<T>()
+            where T : struct
         {
             if (!Vector128<T>.IsSupported)
             {
@@ -669,7 +678,8 @@ namespace System
         // If 'T' is allowed, no-ops. JIT will elide the method entirely if 'T'
         // is supported and we're on an optimized release build.
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
-        internal static void ThrowForUnsupportedIntrinsicsVector256BaseType<T>() where T : struct
+        internal static void ThrowForUnsupportedIntrinsicsVector256BaseType<T>()
+            where T : struct
         {
             if (!Vector256<T>.IsSupported)
             {
index 63e6b52..a33bd00 100644 (file)
@@ -590,10 +590,18 @@ emit_sum_vector (MonoCompile *cfg, MonoClass *klass, MonoMethodSignature *fsig,
        case MONO_TYPE_U2:
                instc0 = INTRINS_SSE_PHADDW;
                break;
+#if TARGET_SIZEOF_VOID_P == 4
+       case MONO_TYPE_I:
+       case MONO_TYPE_U:
+#endif
        case MONO_TYPE_I4:
        case MONO_TYPE_U4:
                instc0 = INTRINS_SSE_PHADDD;
                break;
+#if TARGET_SIZEOF_VOID_P == 8
+       case MONO_TYPE_I:
+       case MONO_TYPE_U:
+#endif
        case MONO_TYPE_I8:
        case MONO_TYPE_U8: {
                // Ssse3 doesn't have support for HorizontalAdd on i64
@@ -1274,10 +1282,18 @@ emit_sri_vector (MonoCompile *cfg, MonoMethod *cmethod, MonoMethodSignature *fsi
                        return emit_vector_create_elementwise (cfg, fsig, fsig->ret, etype, args);
                break;
        }
-       case SN_CreateScalar:
+       case SN_CreateScalar: {
+               MonoType *etype = get_vector_t_elem_type (fsig->ret);
+               if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
+                       return NULL;
                return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR, -1, arg0_type, fsig, args);
-       case SN_CreateScalarUnsafe:
+       }
+       case SN_CreateScalarUnsafe: {
+               MonoType *etype = get_vector_t_elem_type (fsig->ret);
+               if (!MONO_TYPE_IS_VECTOR_PRIMITIVE (etype))
+                       return NULL;
                return emit_simd_ins_for_sig (cfg, klass, OP_CREATE_SCALAR_UNSAFE, -1, arg0_type, fsig, args);
+       }
        case SN_Dot: {
 #ifdef TARGET_ARM64
                if (!is_element_type_primitive (fsig->params [0]))
@@ -3671,10 +3687,18 @@ emit_x86_intrinsics (
                        case MONO_TYPE_U2:
                                op = is_imm ? INTRINS_SSE_PSRLI_W : INTRINS_SSE_PSRL_W;
                                break;
+#if TARGET_SIZEOF_VOID_P == 4
+                       case MONO_TYPE_I:
+                       case MONO_TYPE_U:
+#endif
                        case MONO_TYPE_I4:
                        case MONO_TYPE_U4:
                                op = is_imm ? INTRINS_SSE_PSRLI_D : INTRINS_SSE_PSRL_D;
                                break;
+#if TARGET_SIZEOF_VOID_P == 8
+                       case MONO_TYPE_I:
+                       case MONO_TYPE_U:
+#endif
                        case MONO_TYPE_I8:
                        case MONO_TYPE_U8:
                                op = is_imm ? INTRINS_SSE_PSRLI_Q : INTRINS_SSE_PSRL_Q;
@@ -3707,10 +3731,18 @@ emit_x86_intrinsics (
                        case MONO_TYPE_U2:
                                op = is_imm ? INTRINS_SSE_PSLLI_W : INTRINS_SSE_PSLL_W;
                                break;
+#if TARGET_SIZEOF_VOID_P == 4
+                       case MONO_TYPE_I:
+                       case MONO_TYPE_U:
+#endif
                        case MONO_TYPE_I4:
                        case MONO_TYPE_U4:
                                op = is_imm ? INTRINS_SSE_PSLLI_D : INTRINS_SSE_PSLL_D;
                                break;
+#if TARGET_SIZEOF_VOID_P == 8
+                       case MONO_TYPE_I:
+                       case MONO_TYPE_U:
+#endif
                        case MONO_TYPE_I8:
                        case MONO_TYPE_U8:
                                op = is_imm ? INTRINS_SSE_PSLLI_Q : INTRINS_SSE_PSLL_Q;
@@ -3754,8 +3786,16 @@ emit_x86_intrinsics (
                case SN_LoadScalarVector128: {
                        int op = 0;
                        switch (arg0_type) {
+#if TARGET_SIZEOF_VOID_P == 4
+                       case MONO_TYPE_I:
+                       case MONO_TYPE_U:
+#endif
                        case MONO_TYPE_I4:
                        case MONO_TYPE_U4: op = OP_SSE2_MOVD; break;
+#if TARGET_SIZEOF_VOID_P == 8
+                       case MONO_TYPE_I:
+                       case MONO_TYPE_U:
+#endif
                        case MONO_TYPE_I8:
                        case MONO_TYPE_U8: op = OP_SSE2_MOVQ; break;
                        case MONO_TYPE_R8: op = OP_SSE2_MOVUPD; break;
@@ -3845,17 +3885,19 @@ emit_x86_intrinsics (
                        int op = 0;
                        switch (arg0_type) {
                        case MONO_TYPE_U1: op = OP_XEXTRACT_I1; break;
-                       case MONO_TYPE_U4: case MONO_TYPE_I4: op = OP_XEXTRACT_I4; break;
-                       case MONO_TYPE_U8: case MONO_TYPE_I8: op = OP_XEXTRACT_I8; break;
-                       case MONO_TYPE_R4: op = OP_XEXTRACT_R4; break;
+#if TARGET_SIZEOF_VOID_P == 4
                        case MONO_TYPE_I:
                        case MONO_TYPE_U:
+#endif
+                       case MONO_TYPE_U4:
+                       case MONO_TYPE_I4: op = OP_XEXTRACT_I4; break;
 #if TARGET_SIZEOF_VOID_P == 8
-                               op = OP_XEXTRACT_I8;
-#else
-                               op = OP_XEXTRACT_I4;
+                       case MONO_TYPE_I:
+                       case MONO_TYPE_U:
 #endif
-                               break;
+                       case MONO_TYPE_U8:
+                       case MONO_TYPE_I8: op = OP_XEXTRACT_I8; break;
+                       case MONO_TYPE_R4: op = OP_XEXTRACT_R4; break;
                        default: g_assert_not_reached(); break;
                        }
                        return emit_simd_ins_for_sig (cfg, klass, op, 0, arg0_type, fsig, args);