Implement the last of the approved cross platform hardware intrinsics, except shuffle...
authorTanner Gooding <tagoo@outlook.com>
Thu, 13 Jan 2022 00:00:04 +0000 (16:00 -0800)
committerGitHub <noreply@github.com>
Thu, 13 Jan 2022 00:00:04 +0000 (16:00 -0800)
* Exposing Sum<T> for Vector64/128/256<T>

* Adding support for ShiftLeft, ShiftRightArithmetic, and ShiftRightLogical to Vector<T> and Vector64/128/256<T>

* Adding support for Load, LoadAligned, LoadAlignedNonTemporal, and LoadUnsafe to Vector64/128/256<T>

* Adding support for Store, StoreAligned, StoreAlignedNonTemporal, and StoreUnsafe to Vector64/128/256<T>

* Adding support for ExtractMostSignificantBits to Vector64/128/256<T>

* Adding tests covering Vector64/128/256<T>.Sum

* Adding tests covering Vector64/128/256<T>.ShiftLeft, ShiftRightArithmetic, and ShiftRightLogical

* Moving System.Runtime.InteropServices.UnmanagedType down to System.Runtime so the `unmanaged` constraint can be used

* Adding tests covering Vector64/128/256<T>.Load, LoadAligned, LoadAlignedNonTemporal, and LoadUnsafe

* Fixing a few issues in the source and tests to ensure the right paths are being taken

* Adding tests covering Vector64/128/256<T>.Store, StoreAligned, StoreAlignedNonTemporal, and StoreUnsafe

* Adding tests covering Vector64/128/256<T>.ExtractMostSignificantBits

* Ensure AlignedAlloc is matched by AlignedFree

* Fixing a couple test issues and the handling of Scalar.ExtractMostSignificantBit for nint/nuint

* Applying formatting patch

* Ensure gtNewOperNode uses TYP_INT when dealing with the shiftCount

* Fixing a couple ARM64 node types

* Ensure the shift intrinsics use impPopStack().val on ARM64

* Responding to PR feedback

25 files changed:
src/coreclr/jit/compiler.h
src/coreclr/jit/gentree.cpp
src/coreclr/jit/hwintrinsicarm64.cpp
src/coreclr/jit/hwintrinsiclistarm64.h
src/coreclr/jit/hwintrinsiclistxarch.h
src/coreclr/jit/hwintrinsicxarch.cpp
src/coreclr/jit/simdashwintrinsic.cpp
src/coreclr/jit/simdashwintrinsiclistarm64.h
src/coreclr/jit/simdashwintrinsiclistxarch.h
src/libraries/System.Numerics.Vectors/ref/System.Numerics.Vectors.cs
src/libraries/System.Private.CoreLib/src/System/Numerics/Vector.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Scalar.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector128_1.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector256_1.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64.cs
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/Vector64_1.cs
src/libraries/System.Runtime.InteropServices/ref/System.Runtime.InteropServices.Forwards.cs
src/libraries/System.Runtime.InteropServices/ref/System.Runtime.InteropServices.cs
src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector128Tests.cs
src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector256Tests.cs
src/libraries/System.Runtime.Intrinsics/tests/Vectors/Vector64Tests.cs
src/libraries/System.Runtime/ref/System.Runtime.cs

index 2b7ec09..f884594 100644 (file)
@@ -3300,6 +3300,9 @@ public:
     GenTree* gtNewSimdSqrtNode(
         var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
 
+    GenTree* gtNewSimdSumNode(
+        var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic);
+
     GenTree* gtNewSimdUnOpNode(genTreeOps  op,
                                var_types   type,
                                GenTree*    op1,
index 3a61e63..70000ed 100644 (file)
@@ -18074,7 +18074,15 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps  op,
     assert(op1->TypeIs(type, simdBaseType, genActualType(simdBaseType)));
 
     assert(op2 != nullptr);
-    assert(op2->TypeIs(type, simdBaseType, genActualType(simdBaseType)));
+
+    if ((op == GT_LSH) || (op == GT_RSH) || (op == GT_RSZ))
+    {
+        assert(op2->TypeIs(TYP_INT));
+    }
+    else
+    {
+        assert(op2->TypeIs(type, simdBaseType, genActualType(simdBaseType)));
+    }
 
     NamedIntrinsic       intrinsic = NI_Illegal;
     CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSIMD(type, simdBaseJitType);
@@ -18201,6 +18209,67 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps  op,
             break;
         }
 
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        {
+            assert(!varTypeIsByte(simdBaseType));
+            assert(!varTypeIsFloating(simdBaseType));
+            assert((op != GT_RSH) || !varTypeIsUnsigned(simdBaseType));
+
+            // "over shifting" is platform specific behavior. We will match the C# behavior
+            // this requires we mask with (sizeof(T) * 8) - 1 which ensures the shift cannot
+            // exceed the number of bits available in `T`. This is roughly equivalent to
+            // x % (sizeof(T) * 8), but that is "more expensive" and only the same for unsigned
+            // inputs, where-as we have a signed-input and so negative values would differ.
+
+            unsigned shiftCountMask = (genTypeSize(simdBaseType) * 8) - 1;
+
+            if (op2->IsCnsIntOrI())
+            {
+                op2->AsIntCon()->gtIconVal &= shiftCountMask;
+            }
+            else
+            {
+                op2 = gtNewOperNode(GT_AND, TYP_INT, op2, gtNewIconNode(shiftCountMask));
+                op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD16, op2, NI_SSE2_ConvertScalarToVector128Int32, CORINFO_TYPE_INT,
+                                               16, isSimdAsHWIntrinsic);
+            }
+
+            if (simdSize == 32)
+            {
+                assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
+
+                if (op == GT_LSH)
+                {
+                    intrinsic = NI_AVX2_ShiftLeftLogical;
+                }
+                else if (op == GT_RSH)
+                {
+                    intrinsic = NI_AVX2_ShiftRightArithmetic;
+                }
+                else
+                {
+                    assert(op == GT_RSZ);
+                    intrinsic = NI_AVX2_ShiftRightLogical;
+                }
+            }
+            else if (op == GT_LSH)
+            {
+                intrinsic = NI_SSE2_ShiftLeftLogical;
+            }
+            else if (op == GT_RSH)
+            {
+                intrinsic = NI_SSE2_ShiftRightArithmetic;
+            }
+            else
+            {
+                assert(op == GT_RSZ);
+                intrinsic = NI_SSE2_ShiftRightLogical;
+            }
+            break;
+        }
+
         case GT_MUL:
         {
             GenTree** broadcastOp = nullptr;
@@ -18469,6 +18538,98 @@ GenTree* Compiler::gtNewSimdBinOpNode(genTreeOps  op,
             break;
         }
 
+        case GT_LSH:
+        case GT_RSH:
+        case GT_RSZ:
+        {
+            assert(!varTypeIsFloating(simdBaseType));
+            assert((op != GT_RSH) || !varTypeIsUnsigned(simdBaseType));
+
+            // "over shifting" is platform specific behavior. We will match the C# behavior
+            // this requires we mask with (sizeof(T) * 8) - 1 which ensures the shift cannot
+            // exceed the number of bits available in `T`. This is roughly equivalent to
+            // x % (sizeof(T) * 8), but that is "more expensive" and only the same for unsigned
+            // inputs, where-as we have a signed-input and so negative values would differ.
+
+            unsigned shiftCountMask = (genTypeSize(simdBaseType) * 8) - 1;
+
+            if (op2->IsCnsIntOrI())
+            {
+                op2->AsIntCon()->gtIconVal &= shiftCountMask;
+
+                if ((simdSize == 8) && varTypeIsLong(simdBaseType))
+                {
+                    if (op == GT_LSH)
+                    {
+                        intrinsic = NI_AdvSimd_ShiftLeftLogicalScalar;
+                    }
+                    else if (op == GT_RSH)
+                    {
+                        intrinsic = NI_AdvSimd_ShiftRightArithmeticScalar;
+                    }
+                    else
+                    {
+                        assert(op == GT_RSZ);
+                        intrinsic = NI_AdvSimd_ShiftRightLogicalScalar;
+                    }
+                }
+                else if (op == GT_LSH)
+                {
+                    intrinsic = NI_AdvSimd_ShiftLeftLogical;
+                }
+                else if (op == GT_RSH)
+                {
+                    intrinsic = NI_AdvSimd_ShiftRightArithmetic;
+                }
+                else
+                {
+                    assert(op == GT_RSZ);
+                    intrinsic = NI_AdvSimd_ShiftRightLogical;
+                }
+            }
+            else
+            {
+                op2 = gtNewOperNode(GT_AND, TYP_INT, op2, gtNewIconNode(shiftCountMask));
+
+                if (op != GT_LSH)
+                {
+                    op2 = gtNewOperNode(GT_NEG, TYP_INT, op2);
+                }
+
+                op2 = gtNewSimdCreateBroadcastNode(type, op2, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
+
+                if ((simdSize == 8) && varTypeIsLong(simdBaseType))
+                {
+                    if (op == GT_LSH)
+                    {
+                        intrinsic = NI_AdvSimd_ShiftLogicalScalar;
+                    }
+                    else if (op == GT_RSH)
+                    {
+                        intrinsic = NI_AdvSimd_ShiftArithmeticScalar;
+                    }
+                    else
+                    {
+                        intrinsic = NI_AdvSimd_ShiftLogicalScalar;
+                    }
+                }
+                else if (op == GT_LSH)
+                {
+                    intrinsic = NI_AdvSimd_ShiftLogical;
+                }
+                else if (op == GT_RSH)
+                {
+                    intrinsic = NI_AdvSimd_ShiftArithmetic;
+                }
+                else
+                {
+                    assert(op == GT_RSZ);
+                    intrinsic = NI_AdvSimd_ShiftLogical;
+                }
+            }
+            break;
+        }
+
         case GT_MUL:
         {
             assert(!varTypeIsLong(simdBaseType));
@@ -20596,6 +20757,123 @@ GenTree* Compiler::gtNewSimdSqrtNode(
     return gtNewSimdHWIntrinsicNode(type, op1, intrinsic, simdBaseJitType, simdSize, isSimdAsHWIntrinsic);
 }
 
+GenTree* Compiler::gtNewSimdSumNode(
+    var_types type, GenTree* op1, CorInfoType simdBaseJitType, unsigned simdSize, bool isSimdAsHWIntrinsic)
+{
+    assert(IsBaselineSimdIsaSupportedDebugOnly());
+
+    var_types simdType = getSIMDTypeForSize(simdSize);
+    assert(varTypeIsSIMD(simdType));
+
+    assert(op1 != nullptr);
+    assert(op1->TypeIs(simdType));
+
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    assert(varTypeIsArithmetic(simdBaseType));
+
+    NamedIntrinsic       intrinsic = NI_Illegal;
+    GenTree*             tmp       = nullptr;
+    CORINFO_CLASS_HANDLE clsHnd    = gtGetStructHandleForSIMD(simdType, simdBaseJitType);
+
+#if defined(TARGET_XARCH)
+    assert(!varTypeIsByte(simdBaseType) && !varTypeIsLong(simdBaseType));
+
+    // HorizontalAdd combines pairs so we need log2(vectorLength) passes to sum all elements together.
+    unsigned vectorLength = getSIMDVectorLength(simdSize, simdBaseType);
+    int      haddCount    = genLog2(vectorLength);
+
+    if (simdSize == 32)
+    {
+        // Minus 1 because for the last pass we split the vector to low / high and add them together.
+        haddCount -= 1;
+
+        if (varTypeIsFloating(simdBaseType))
+        {
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX));
+            intrinsic = NI_AVX_HorizontalAdd;
+        }
+        else
+        {
+            assert(compIsaSupportedDebugOnly(InstructionSet_AVX2));
+            intrinsic = NI_AVX2_HorizontalAdd;
+        }
+    }
+    else if (varTypeIsFloating(simdBaseType))
+    {
+        assert(compIsaSupportedDebugOnly(InstructionSet_SSE3));
+        intrinsic = NI_SSE3_HorizontalAdd;
+    }
+    else
+    {
+        assert(compIsaSupportedDebugOnly(InstructionSet_SSSE3));
+        intrinsic = NI_SSSE3_HorizontalAdd;
+    }
+
+    for (int i = 0; i < haddCount; i++)
+    {
+        op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone op1 for vector sum"));
+        op1 = gtNewSimdAsHWIntrinsicNode(simdType, op1, tmp, intrinsic, simdBaseJitType, simdSize);
+    }
+
+    if (simdSize == 32)
+    {
+        intrinsic = (simdBaseType == TYP_FLOAT) ? NI_SSE_Add : NI_SSE2_Add;
+
+        op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL, nullptr DEBUGARG("Clone op1 for vector sum"));
+        op1 = gtNewSimdAsHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(0x01, TYP_INT), NI_AVX_ExtractVector128,
+                                         simdBaseJitType, simdSize);
+
+        tmp = gtNewSimdAsHWIntrinsicNode(simdType, tmp, NI_Vector256_GetLower, simdBaseJitType, simdSize);
+        op1 = gtNewSimdAsHWIntrinsicNode(TYP_SIMD16, op1, tmp, intrinsic, simdBaseJitType, 16);
+    }
+
+    return gtNewSimdAsHWIntrinsicNode(type, op1, NI_Vector128_ToScalar, simdBaseJitType, simdSize);
+#elif defined(TARGET_ARM64)
+    switch (simdBaseType)
+    {
+        case TYP_BYTE:
+        case TYP_UBYTE:
+        case TYP_SHORT:
+        case TYP_USHORT:
+        case TYP_INT:
+        case TYP_UINT:
+        {
+            tmp = gtNewSimdAsHWIntrinsicNode(simdType, op1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, simdSize);
+            return gtNewSimdAsHWIntrinsicNode(type, tmp, NI_Vector64_ToScalar, simdBaseJitType, 8);
+        }
+        case TYP_FLOAT:
+        {
+            unsigned vectorLength = getSIMDVectorLength(simdSize, simdBaseType);
+            int      haddCount    = genLog2(vectorLength);
+
+            for (int i = 0; i < haddCount; i++)
+            {
+                op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                   nullptr DEBUGARG("Clone op1 for vector sum"));
+                op1 = gtNewSimdAsHWIntrinsicNode(simdType, op1, tmp, NI_AdvSimd_Arm64_AddPairwise, simdBaseJitType,
+                                                 simdSize);
+            }
+
+            return gtNewSimdAsHWIntrinsicNode(type, op1, NI_Vector128_ToScalar, simdBaseJitType, simdSize);
+        }
+        case TYP_DOUBLE:
+        case TYP_LONG:
+        case TYP_ULONG:
+        {
+            op1 = gtNewSimdAsHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_AddPairwiseScalar, simdBaseJitType,
+                                             simdSize);
+            return gtNewSimdAsHWIntrinsicNode(type, op1, NI_Vector64_ToScalar, simdBaseJitType, 8);
+        }
+        default:
+        {
+            unreached();
+        }
+    }
+#else
+#error Unsupported platform
+#endif // !TARGET_XARCH && !TARGET_ARM64
+}
+
 GenTree* Compiler::gtNewSimdUnOpNode(genTreeOps  op,
                                      var_types   type,
                                      GenTree*    op1,
index 69902ac..0c1b01e 100644 (file)
@@ -332,7 +332,7 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
 
     assert(numArgs >= 0);
 
-    const var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
+    var_types simdBaseType = JitType2PreciseVarType(simdBaseJitType);
     assert(varTypeIsArithmetic(simdBaseType));
 
     GenTree* retNode = nullptr;
@@ -652,6 +652,189 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector64_ExtractMostSignificantBits:
+        case NI_Vector128_ExtractMostSignificantBits:
+        {
+            assert(sig->numArgs == 1);
+
+            // ARM64 doesn't have a single instruction that performs the behavior so we'll emulate it instead.
+            // To do this, we effectively perform the following steps:
+            // 1. tmp = input & 0x80         ; and the input to clear all but the most significant bit
+            // 2. tmp = tmp >> index         ; right shift each element by its index
+            // 3. tmp = sum(tmp)             ; sum the elements together
+
+            // For byte/sbyte, we also need to handle the fact that we can only shift by up to 8
+            // but for Vector128, we have 16 elements to handle. In that scenario, we will simply
+            // extract both scalars, and combine them via: (upper << 8) | lower
+
+            var_types simdType = getSIMDTypeForSize(simdSize);
+
+            op1 = impSIMDPopStack(simdType);
+
+            GenTree*    vectorCreateOp1  = nullptr;
+            GenTree*    vectorCreateOp2  = nullptr;
+            CorInfoType vectorCreateType = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_ULONG : CORINFO_TYPE_LONG;
+
+            switch (simdBaseType)
+            {
+                case TYP_BYTE:
+                case TYP_UBYTE:
+                {
+                    op2             = gtNewIconNode(0x80);
+                    vectorCreateOp1 = gtNewLconNode(0x00FFFEFDFCFBFAF9);
+
+                    if (simdSize == 16)
+                    {
+                        vectorCreateOp2 = gtNewLconNode(0x00FFFEFDFCFBFAF9);
+                    }
+                    break;
+                }
+
+                case TYP_SHORT:
+                case TYP_USHORT:
+                {
+                    op2             = gtNewIconNode(0x8000);
+                    vectorCreateOp1 = gtNewLconNode(0xFFF4FFF3FFF2FFF1);
+
+                    if (simdSize == 16)
+                    {
+                        vectorCreateOp2 = gtNewLconNode(0xFFF8FFF7FFF6FFF5);
+                    }
+                    break;
+                }
+
+                case TYP_INT:
+                case TYP_UINT:
+                {
+                    op2             = gtNewIconNode(0x80000000);
+                    vectorCreateOp1 = gtNewLconNode(0xFFFFFFE2FFFFFFE1);
+
+                    if (simdSize == 16)
+                    {
+                        vectorCreateOp2 = gtNewLconNode(0xFFFFFFE4FFFFFFE3);
+                    }
+                    break;
+                }
+
+                case TYP_LONG:
+                case TYP_ULONG:
+                {
+                    op2             = gtNewLconNode(0x8000000000000000);
+                    vectorCreateOp1 = gtNewLconNode(0xFFFFFFFFFFFFFFC1);
+
+                    if (simdSize == 16)
+                    {
+                        vectorCreateOp2 = gtNewLconNode(0xFFFFFFFFFFFFFFC2);
+                    }
+                    break;
+                }
+
+                case TYP_FLOAT:
+                {
+                    op2             = gtNewIconNode(0x80000000);
+                    simdBaseType    = TYP_INT;
+                    simdBaseJitType = CORINFO_TYPE_INT;
+                    vectorCreateOp1 = gtNewLconNode(0xFFFFFFE2FFFFFFE1);
+
+                    if (simdSize == 16)
+                    {
+                        vectorCreateOp2 = gtNewLconNode(0xFFFFFFE4FFFFFFE3);
+                    }
+                    break;
+                }
+
+                case TYP_DOUBLE:
+                {
+                    op2             = gtNewLconNode(0x8000000000000000);
+                    simdBaseType    = TYP_LONG;
+                    simdBaseJitType = CORINFO_TYPE_LONG;
+                    vectorCreateOp1 = gtNewLconNode(0xFFFFFFFFFFFFFFC1);
+
+                    if (simdSize == 16)
+                    {
+                        vectorCreateOp2 = gtNewLconNode(0xFFFFFFFFFFFFFFC2);
+                    }
+                    break;
+                }
+
+                default:
+                {
+                    unreached();
+                }
+            }
+
+            if (simdSize == 16)
+            {
+                op3 = gtNewSimdHWIntrinsicNode(simdType, vectorCreateOp1, vectorCreateOp2, NI_Vector128_Create,
+                                               vectorCreateType, simdSize);
+            }
+            else
+            {
+                op3 =
+                    gtNewSimdHWIntrinsicNode(simdType, vectorCreateOp1, NI_Vector64_Create, vectorCreateType, simdSize);
+            }
+
+            op1 = gtNewSimdHWIntrinsicNode(simdType, op1, op2, NI_AdvSimd_And, simdBaseJitType, simdSize,
+                                           /* isSimdAsHWIntrinsic */ false);
+
+            op1 = gtNewSimdHWIntrinsicNode(simdType, op1, op3, NI_AdvSimd_ShiftLogical, simdBaseJitType, simdSize,
+                                           /* isSimdAsHWIntrinsic */ false);
+
+            if (varTypeIsByte(simdBaseType) && (simdSize == 16))
+            {
+                CORINFO_CLASS_HANDLE clsHnd = gtGetStructHandleForSIMD(simdType, simdBaseJitType);
+
+                op1 = impCloneExpr(op1, &op2, clsHnd, (unsigned)CHECK_SPILL_ALL,
+                                   nullptr DEBUGARG("Clone op1 for vector extractmostsignificantbits"));
+
+                op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_Vector128_GetLower, simdBaseJitType, simdSize,
+                                               /* isSimdAsHWIntrinsic */ false);
+                op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, 8,
+                                               /* isSimdAsHWIntrinsic */ false);
+                op1 = gtNewSimdHWIntrinsicNode(simdBaseType, op1, NI_Vector64_ToScalar, simdBaseJitType, 8,
+                                               /* isSimdAsHWIntrinsic */ false);
+                op1 = gtNewCastNode(TYP_INT, op1, /* isUnsigned */ true, simdBaseType);
+
+                GenTree* zero  = gtNewSimdHWIntrinsicNode(retType, NI_Vector128_get_Zero, simdBaseJitType, simdSize);
+                ssize_t  index = 8 / genTypeSize(simdBaseType);
+
+                op2 = gtNewSimdHWIntrinsicNode(simdType, op2, zero, gtNewIconNode(index), NI_AdvSimd_ExtractVector128,
+                                               simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+                op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op2, NI_Vector128_GetLower, simdBaseJitType, simdSize,
+                                               /* isSimdAsHWIntrinsic */ false);
+                op2 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op2, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType, 8,
+                                               /* isSimdAsHWIntrinsic */ false);
+                op2 = gtNewSimdHWIntrinsicNode(simdBaseType, op2, NI_Vector64_ToScalar, simdBaseJitType, 8,
+                                               /* isSimdAsHWIntrinsic */ false);
+                op2 = gtNewCastNode(TYP_INT, op2, /* isUnsigned */ true, simdBaseType);
+
+                op2     = gtNewOperNode(GT_LSH, TYP_INT, op2, gtNewIconNode(8));
+                retNode = gtNewOperNode(GT_OR, TYP_INT, op1, op2);
+            }
+            else
+            {
+                if (!varTypeIsLong(simdBaseType))
+                {
+                    op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType,
+                                                   simdSize, /* isSimdAsHWIntrinsic */ false);
+                }
+                else if (simdSize == 16)
+                {
+                    op1 = gtNewSimdHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_AddPairwiseScalar, simdBaseJitType,
+                                                   simdSize, /* isSimdAsHWIntrinsic */ false);
+                }
+
+                retNode = gtNewSimdHWIntrinsicNode(simdBaseType, op1, NI_Vector64_ToScalar, simdBaseJitType, 8,
+                                                   /* isSimdAsHWIntrinsic */ false);
+
+                if ((simdBaseType != TYP_INT) && (simdBaseType != TYP_UINT))
+                {
+                    retNode = gtNewCastNode(TYP_INT, retNode, /* isUnsigned */ true, simdBaseType);
+                }
+            }
+            break;
+        }
+
         case NI_Vector64_Floor:
         case NI_Vector128_Floor:
         {
@@ -874,6 +1057,164 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector64_Load:
+        case NI_Vector128_Load:
+        {
+            assert(sig->numArgs == 1);
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 16)
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector128;
+            }
+            else
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector64;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector64_LoadAligned:
+        case NI_Vector128_LoadAligned:
+        {
+            assert(sig->numArgs == 1);
+
+            if (!opts.MinOpts())
+            {
+                // ARM64 doesn't have aligned loads, but aligned loads are only validated to be
+                // aligned during minopts, so only skip the intrinsic handling if we're minopts
+                break;
+            }
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 16)
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector128;
+            }
+            else
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector64;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector64_LoadAlignedNonTemporal:
+        case NI_Vector128_LoadAlignedNonTemporal:
+        {
+            assert(sig->numArgs == 1);
+
+            if (!opts.MinOpts())
+            {
+                // ARM64 doesn't have aligned loads, but aligned loads are only validated to be
+                // aligned during minopts, so only skip the intrinsic handling if we're minopts
+                break;
+            }
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            // ARM64 has non-temporal loads (LDNP) but we don't currently support them
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 16)
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector128;
+            }
+            else
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector64;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector64_LoadUnsafe:
+        case NI_Vector128_LoadUnsafe:
+        {
+            if (sig->numArgs == 2)
+            {
+                op2 = impPopStack().val;
+            }
+            else
+            {
+                assert(sig->numArgs == 1);
+            }
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            if (sig->numArgs == 2)
+            {
+                op3 = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet());
+                op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, op3);
+                op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2);
+            }
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 16)
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector128;
+            }
+            else
+            {
+                loadIntrinsic = NI_AdvSimd_LoadVector64;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
         case NI_Vector64_Max:
         case NI_Vector128_Max:
         {
@@ -1001,6 +1342,45 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector64_ShiftLeft:
+        case NI_Vector128_ShiftLeft:
+        {
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                         /* isSimdAsHWIntrinsic */ false);
+            break;
+        }
+
+        case NI_Vector64_ShiftRightArithmetic:
+        case NI_Vector128_ShiftRightArithmetic:
+        {
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            retNode = gtNewSimdBinOpNode(GT_RSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                         /* isSimdAsHWIntrinsic */ false);
+            break;
+        }
+
+        case NI_Vector64_ShiftRightLogical:
+        case NI_Vector128_ShiftRightLogical:
+        {
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize,
+                                         /* isSimdAsHWIntrinsic */ false);
+            break;
+        }
+
         case NI_Vector64_Sqrt:
         case NI_Vector128_Sqrt:
         {
@@ -1014,6 +1394,94 @@ GenTree* Compiler::impSpecialIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector64_Store:
+        case NI_Vector128_Store:
+        {
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, NI_AdvSimd_Store, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector64_StoreAligned:
+        case NI_Vector128_StoreAligned:
+        {
+            assert(sig->numArgs == 2);
+
+            if (!opts.MinOpts())
+            {
+                // ARM64 doesn't have aligned stores, but aligned stores are only validated to be
+                // aligned during minopts, so only skip the intrinsic handling if we're minopts
+                break;
+            }
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, NI_AdvSimd_Store, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector64_StoreAlignedNonTemporal:
+        case NI_Vector128_StoreAlignedNonTemporal:
+        {
+            assert(sig->numArgs == 2);
+
+            if (!opts.MinOpts())
+            {
+                // ARM64 doesn't have aligned stores, but aligned stores are only validated to be
+                // aligned during minopts, so only skip the intrinsic handling if we're minopts
+                break;
+            }
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            // ARM64 has non-temporal stores (STNP) but we don't currently support them
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, NI_AdvSimd_Store, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector64_StoreUnsafe:
+        case NI_Vector128_StoreUnsafe:
+        {
+            if (sig->numArgs == 3)
+            {
+                op3 = impPopStack().val;
+            }
+            else
+            {
+                assert(sig->numArgs == 2);
+            }
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            if (sig->numArgs == 3)
+            {
+                op3 = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet());
+                op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, op3);
+                op2 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2);
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, NI_AdvSimd_Store, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector64_Sum:
+        case NI_Vector128_Sum:
+        {
+            assert(sig->numArgs == 1);
+
+            op1     = impSIMDPopStack(retType);
+            retNode = gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+            break;
+        }
+
         case NI_Vector64_WidenLower:
         case NI_Vector128_WidenLower:
         {
index 9360ea2..4b2e3fa 100644 (file)
@@ -46,6 +46,7 @@ HARDWARE_INTRINSIC(Vector64,      Dot,
 HARDWARE_INTRINSIC(Vector64,      Equals,                                                            8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      EqualsAll,                                                         8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      EqualsAny,                                                         8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      ExtractMostSignificantBits,                                        8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      Floor,                                                             8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      get_AllBitsSet,                                                    8,      0,     {INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},        HW_Category_Helper,                HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Vector64,      get_Count,                                                         8,      0,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
@@ -63,6 +64,10 @@ HARDWARE_INTRINSIC(Vector64,      LessThanAny,
 HARDWARE_INTRINSIC(Vector64,      LessThanOrEqual,                                                   8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      LessThanOrEqualAll,                                                8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      LessThanOrEqualAny,                                                8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      Load,                                                              8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      LoadAligned,                                                       8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      LoadAlignedNonTemporal,                                            8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      LoadUnsafe,                                                        8,     -1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      Max,                                                               8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      Min,                                                               8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      Multiply,                                                          8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -81,8 +86,16 @@ HARDWARE_INTRINSIC(Vector64,      op_OnesComplement,
 HARDWARE_INTRINSIC(Vector64,      op_Subtraction,                                                    8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      op_UnaryNegation,                                                  8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      op_UnaryPlus,                                                      8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector64,      Subtract,                                                          8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      ShiftLeft,                                                         8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      ShiftRightArithmetic,                                              8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      ShiftRightLogical,                                                 8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      Sqrt,                                                              8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      Store,                                                             8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      StoreAligned,                                                      8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      StoreAlignedNonTemporal,                                           8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      StoreUnsafe,                                                       8,     -1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      Subtract,                                                          8,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector64,      Sum,                                                               8,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector64,      ToScalar,                                                          8,      1,     {INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},         HW_Category_SIMD,                  HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector64,      ToVector128,                                                       8,      1,     {INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov},         HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector64,      ToVector128Unsafe,                                                 8,      1,     {INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov,            INS_mov},         HW_Category_SIMD,                  HW_Flag_SpecialCodeGen)
@@ -131,6 +144,7 @@ HARDWARE_INTRINSIC(Vector128,     Dot,
 HARDWARE_INTRINSIC(Vector128,     Equals,                                                           16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     EqualsAll,                                                        16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     EqualsAny,                                                        16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     ExtractMostSignificantBits,                                       16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     Floor,                                                            16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     get_AllBitsSet,                                                   16,      0,     {INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni,           INS_mvni},        HW_Category_Helper,                HW_Flag_SpecialCodeGen|HW_Flag_SpecialImport)
 HARDWARE_INTRINSIC(Vector128,     get_Count,                                                        16,      0,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_NoCodeGen|HW_Flag_SpecialImport)
@@ -150,6 +164,10 @@ HARDWARE_INTRINSIC(Vector128,     LessThanAny,
 HARDWARE_INTRINSIC(Vector128,     LessThanOrEqual,                                                  16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     LessThanOrEqualAll,                                               16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     LessThanOrEqualAny,                                               16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     Load,                                                             16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     LoadAligned,                                                      16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     LoadAlignedNonTemporal,                                           16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     LoadUnsafe,                                                       16,     -1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     Max,                                                              16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     Min,                                                              16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     Multiply,                                                         16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -168,8 +186,16 @@ HARDWARE_INTRINSIC(Vector128,     op_OnesComplement,
 HARDWARE_INTRINSIC(Vector128,     op_Subtraction,                                                   16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     op_UnaryNegation,                                                 16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     op_UnaryPlus,                                                     16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128,     Subtract,                                                         16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     ShiftLeft,                                                        16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     ShiftRightArithmetic,                                             16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     ShiftRightLogical,                                                16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     Sqrt,                                                             16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     Store,                                                            16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     StoreAligned,                                                     16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     StoreAlignedNonTemporal,                                          16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     StoreUnsafe,                                                      16,     -1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     Subtract,                                                         16,      2,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,     Sum,                                                              16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,     ToScalar,                                                         16,      1,     {INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_smov,           INS_umov,           INS_umov,           INS_umov,           INS_dup,            INS_dup},         HW_Category_SIMD,                  HW_Flag_BaseTypeFromFirstArg|HW_Flag_SIMDScalar|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(Vector128,     WidenLower,                                                       16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector128,     WidenUpper,                                                       16,      1,     {INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},     HW_Category_Helper,                HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
index 827a5a8..95ab714 100644 (file)
@@ -63,6 +63,7 @@ HARDWARE_INTRINSIC(Vector128,       Dot,
 HARDWARE_INTRINSIC(Vector128,       Equals,                                     16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       EqualsAll,                                  16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       EqualsAny,                                  16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       ExtractMostSignificantBits,                 16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       Floor,                                      16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                             16,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       get_Count,                                  16,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -80,6 +81,10 @@ HARDWARE_INTRINSIC(Vector128,       LessThanAny,
 HARDWARE_INTRINSIC(Vector128,       LessThanOrEqual,                            16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       LessThanOrEqualAll,                         16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       LessThanOrEqualAny,                         16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       Load,                                       16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       LoadAligned,                                16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       LoadAlignedNonTemporal,                     16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       LoadUnsafe,                                 16,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       Max,                                        16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       Min,                                        16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       Multiply,                                   16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -98,8 +103,16 @@ HARDWARE_INTRINSIC(Vector128,       op_OnesComplement,
 HARDWARE_INTRINSIC(Vector128,       op_Subtraction,                             16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       op_UnaryNegation,                           16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       op_UnaryPlus,                               16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128,       Subtract,                                   16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       ShiftLeft,                                  16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       ShiftRightArithmetic,                       16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       ShiftRightLogical,                          16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       Sqrt,                                       16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       Store,                                      16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       StoreAligned,                               16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       StoreAlignedNonTemporal,                    16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       StoreUnsafe,                                16,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       Subtract,                                   16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector128,       Sum,                                        16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector128,       ToScalar,                                   16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       ToVector256,                                16,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       ToVector256Unsafe,                          16,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
@@ -146,6 +159,7 @@ HARDWARE_INTRINSIC(Vector256,       Dot,
 HARDWARE_INTRINSIC(Vector256,       Equals,                                     32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       EqualsAll,                                  32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       EqualsAny,                                  32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       ExtractMostSignificantBits,                 32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       Floor,                                      32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       get_AllBitsSet,                             32,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmpps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       get_Count,                                  32,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -164,6 +178,10 @@ HARDWARE_INTRINSIC(Vector256,       LessThanAny,
 HARDWARE_INTRINSIC(Vector256,       LessThanOrEqual,                            32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       LessThanOrEqualAll,                         32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       LessThanOrEqualAny,                         32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       Load,                                       32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       LoadAligned,                                32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       LoadAlignedNonTemporal,                     32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       LoadUnsafe,                                 32,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       Max,                                        32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       Min,                                        32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       Multiply,                                   32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
@@ -182,8 +200,16 @@ HARDWARE_INTRINSIC(Vector256,       op_OnesComplement,
 HARDWARE_INTRINSIC(Vector256,       op_Subtraction,                             32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       op_UnaryNegation,                           32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       op_UnaryPlus,                               32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector256,       Subtract,                                   32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       ShiftLeft,                                  32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       ShiftRightArithmetic,                       32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       ShiftRightLogical,                          32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       Sqrt,                                       32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       Store,                                      32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       StoreAligned,                               32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       StoreAlignedNonTemporal,                    32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       StoreUnsafe,                                32,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       Subtract,                                   32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
+HARDWARE_INTRINSIC(Vector256,       Sum,                                        32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       ToScalar,                                   32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movss,              INS_movsdsse2},         HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       WidenLower,                                 32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector256,       WidenUpper,                                 32,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen|HW_Flag_BaseTypeFromFirstArg)
index a0eceba..e6f4995 100644 (file)
@@ -1035,6 +1035,39 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector128_ExtractMostSignificantBits:
+        case NI_Vector256_ExtractMostSignificantBits:
+        {
+            assert(sig->numArgs == 1);
+
+            if ((simdSize != 32) || varTypeIsFloating(simdBaseType) || compExactlyDependsOn(InstructionSet_AVX2))
+            {
+                var_types simdType = getSIMDTypeForSize(simdSize);
+
+                op1 = impSIMDPopStack(simdType);
+
+                NamedIntrinsic moveMaskIntrinsic = NI_Illegal;
+
+                if (simdBaseType == TYP_FLOAT)
+                {
+                    moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_SSE_MoveMask;
+                }
+                else if (simdBaseType == TYP_DOUBLE)
+                {
+                    moveMaskIntrinsic = (simdSize == 32) ? NI_AVX_MoveMask : NI_SSE2_MoveMask;
+                }
+                else
+                {
+                    moveMaskIntrinsic = (simdSize == 32) ? NI_AVX2_MoveMask : NI_SSE2_MoveMask;
+                    simdBaseJitType   = varTypeIsUnsigned(simdBaseType) ? CORINFO_TYPE_UBYTE : CORINFO_TYPE_BYTE;
+                }
+
+                retNode = gtNewSimdHWIntrinsicNode(retType, op1, moveMaskIntrinsic, simdBaseJitType, simdSize,
+                                                   /* isSimdAsHWIntrinsic */ false);
+            }
+            break;
+        }
+
         case NI_Vector128_Floor:
         case NI_Vector256_Floor:
         {
@@ -1326,6 +1359,183 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector128_Load:
+        case NI_Vector256_Load:
+        {
+            assert(sig->numArgs == 1);
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                loadIntrinsic = NI_AVX_LoadVector256;
+            }
+            else if (simdBaseType != TYP_FLOAT)
+            {
+                loadIntrinsic = NI_SSE2_LoadVector128;
+            }
+            else
+            {
+                loadIntrinsic = NI_SSE_LoadVector128;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector128_LoadAligned:
+        case NI_Vector256_LoadAligned:
+        {
+            assert(sig->numArgs == 1);
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                loadIntrinsic = NI_AVX_LoadAlignedVector256;
+            }
+            else if (simdBaseType != TYP_FLOAT)
+            {
+                loadIntrinsic = NI_SSE2_LoadAlignedVector128;
+            }
+            else
+            {
+                loadIntrinsic = NI_SSE_LoadAlignedVector128;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector128_LoadAlignedNonTemporal:
+        case NI_Vector256_LoadAlignedNonTemporal:
+        {
+            assert(sig->numArgs == 1);
+
+            if ((simdSize == 16) && !compOpportunisticallyDependsOn(InstructionSet_SSE41))
+            {
+                // Vector128 requires at least SSE4.1
+                break;
+            }
+            else if ((simdSize == 32) && !compOpportunisticallyDependsOn(InstructionSet_AVX2))
+            {
+                // Vector256 requires at least AVX2
+                break;
+            }
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                loadIntrinsic = NI_AVX2_LoadAlignedVector256NonTemporal;
+            }
+            else
+            {
+                loadIntrinsic = NI_SSE41_LoadAlignedVector128NonTemporal;
+            }
+
+            // float and double don't have actual instructions for non-temporal loads
+            // so we'll just use the equivalent integer instruction instead.
+
+            if (simdBaseType == TYP_FLOAT)
+            {
+                simdBaseJitType = CORINFO_TYPE_INT;
+            }
+            else if (simdBaseType == TYP_DOUBLE)
+            {
+                simdBaseJitType = CORINFO_TYPE_LONG;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector128_LoadUnsafe:
+        case NI_Vector256_LoadUnsafe:
+        {
+            if (sig->numArgs == 2)
+            {
+                op2 = impPopStack().val;
+            }
+            else
+            {
+                assert(sig->numArgs == 1);
+            }
+
+            op1 = impPopStack().val;
+
+            if (op1->OperIs(GT_CAST))
+            {
+                // Although the API specifies a pointer, if what we have is a BYREF, that's what
+                // we really want, so throw away the cast.
+                if (op1->gtGetOp1()->TypeGet() == TYP_BYREF)
+                {
+                    op1 = op1->gtGetOp1();
+                }
+            }
+
+            if (sig->numArgs == 2)
+            {
+                op3 = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet());
+                op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, op3);
+                op1 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2);
+            }
+
+            NamedIntrinsic loadIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                loadIntrinsic = NI_AVX_LoadVector256;
+            }
+            else if (simdBaseType != TYP_FLOAT)
+            {
+                loadIntrinsic = NI_SSE2_LoadVector128;
+            }
+            else
+            {
+                loadIntrinsic = NI_SSE_LoadVector128;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op1, loadIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
         case NI_Vector128_Max:
         case NI_Vector256_Max:
         {
@@ -1487,6 +1697,72 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector128_ShiftLeft:
+        case NI_Vector256_ShiftLeft:
+        {
+            assert(sig->numArgs == 2);
+
+            if (varTypeIsByte(simdBaseType))
+            {
+                // byte and sbyte would require more work to support
+                break;
+            }
+
+            if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
+            {
+                op2 = impPopStack().val;
+                op1 = impSIMDPopStack(retType);
+
+                retNode = gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                             /* isSimdAsHWIntrinsic */ false);
+            }
+            break;
+        }
+
+        case NI_Vector128_ShiftRightArithmetic:
+        case NI_Vector256_ShiftRightArithmetic:
+        {
+            assert(sig->numArgs == 2);
+
+            if (varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType))
+            {
+                // byte, sbyte, long, and ulong would require more work to support
+                break;
+            }
+
+            if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
+            {
+                op2 = impPopStack().val;
+                op1 = impSIMDPopStack(retType);
+
+                retNode = gtNewSimdBinOpNode(GT_RSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                             /* isSimdAsHWIntrinsic */ false);
+            }
+            break;
+        }
+
+        case NI_Vector128_ShiftRightLogical:
+        case NI_Vector256_ShiftRightLogical:
+        {
+            assert(sig->numArgs == 2);
+
+            if (varTypeIsByte(simdBaseType))
+            {
+                // byte and sbyte would require more work to support
+                break;
+            }
+
+            if ((simdSize != 32) || compExactlyDependsOn(InstructionSet_AVX2))
+            {
+                op2 = impPopStack().val;
+                op1 = impSIMDPopStack(retType);
+
+                retNode = gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize,
+                                             /* isSimdAsHWIntrinsic */ false);
+            }
+            break;
+        }
+
         case NI_Vector128_Sqrt:
         case NI_Vector256_Sqrt:
         {
@@ -1500,6 +1776,157 @@ GenTree* Compiler::impBaseIntrinsic(NamedIntrinsic        intrinsic,
             break;
         }
 
+        case NI_Vector128_Store:
+        case NI_Vector256_Store:
+        {
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            NamedIntrinsic storeIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                storeIntrinsic = NI_AVX_Store;
+            }
+            else if (simdBaseType != TYP_FLOAT)
+            {
+                storeIntrinsic = NI_SSE2_Store;
+            }
+            else
+            {
+                storeIntrinsic = NI_SSE_Store;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, storeIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector128_StoreAligned:
+        case NI_Vector256_StoreAligned:
+        {
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            NamedIntrinsic storeIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                storeIntrinsic = NI_AVX_StoreAligned;
+            }
+            else if (simdBaseType != TYP_FLOAT)
+            {
+                storeIntrinsic = NI_SSE2_StoreAligned;
+            }
+            else
+            {
+                storeIntrinsic = NI_SSE_StoreAligned;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, storeIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector128_StoreAlignedNonTemporal:
+        case NI_Vector256_StoreAlignedNonTemporal:
+        {
+            assert(sig->numArgs == 2);
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            NamedIntrinsic storeIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                storeIntrinsic = NI_AVX_StoreAlignedNonTemporal;
+            }
+            else if (simdBaseType != TYP_FLOAT)
+            {
+                storeIntrinsic = NI_SSE2_StoreAlignedNonTemporal;
+            }
+            else
+            {
+                storeIntrinsic = NI_SSE_StoreAlignedNonTemporal;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, storeIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector128_StoreUnsafe:
+        case NI_Vector256_StoreUnsafe:
+        {
+            if (sig->numArgs == 3)
+            {
+                op3 = impPopStack().val;
+            }
+            else
+            {
+                assert(sig->numArgs == 2);
+            }
+
+            op2 = impPopStack().val;
+            op1 = impSIMDPopStack(retType);
+
+            if (sig->numArgs == 3)
+            {
+                op3 = gtNewIconNode(genTypeSize(simdBaseType), op2->TypeGet());
+                op2 = gtNewOperNode(GT_MUL, op2->TypeGet(), op2, op3);
+                op2 = gtNewOperNode(GT_ADD, op1->TypeGet(), op1, op2);
+            }
+
+            NamedIntrinsic storeIntrinsic = NI_Illegal;
+
+            if (simdSize == 32)
+            {
+                storeIntrinsic = NI_AVX_Store;
+            }
+            else if (simdBaseType != TYP_FLOAT)
+            {
+                storeIntrinsic = NI_SSE2_Store;
+            }
+            else
+            {
+                storeIntrinsic = NI_SSE_Store;
+            }
+
+            retNode = gtNewSimdHWIntrinsicNode(retType, op2, op1, storeIntrinsic, simdBaseJitType, simdSize);
+            break;
+        }
+
+        case NI_Vector128_Sum:
+        case NI_Vector256_Sum:
+        {
+            assert(sig->numArgs == 1);
+
+            if (varTypeIsFloating(simdBaseType))
+            {
+                if (!compOpportunisticallyDependsOn(InstructionSet_SSE3))
+                {
+                    // Floating-point types require SSE3.HorizontalAdd
+                    break;
+                }
+            }
+            else if (!compOpportunisticallyDependsOn(InstructionSet_SSSE3))
+            {
+                // Integral types require SSSE3.HorizontalAdd
+                break;
+            }
+            else if (varTypeIsByte(simdBaseType) || varTypeIsLong(simdBaseType))
+            {
+                // byte, sbyte, long, and ulong all would require more work to support
+                break;
+            }
+
+            op1     = impSIMDPopStack(retType);
+            retNode = gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ false);
+            break;
+        }
+
         case NI_Vector128_ToScalar:
         case NI_Vector256_ToScalar:
         {
index 73a4863..32a1ce5 100644 (file)
@@ -509,9 +509,6 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
         case NI_VectorT128_Sum:
         {
-            // TODO-XArch-CQ: We could support this all the way down to SSE2 and that might be
-            // worthwhile so we can accelerate cases like byte/sbyte and long/ulong
-
             if (varTypeIsFloating(simdBaseType))
             {
                 if (!compOpportunisticallyDependsOn(InstructionSet_SSE3))
@@ -723,60 +720,9 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                 }
 
                 case NI_VectorT128_Sum:
-                {
-                    GenTree* tmp;
-                    unsigned vectorLength = getSIMDVectorLength(simdSize, simdBaseType);
-                    int      haddCount    = genLog2(vectorLength);
-
-                    NamedIntrinsic horizontalAdd =
-                        varTypeIsFloating(simdBaseType) ? NI_SSE3_HorizontalAdd : NI_SSSE3_HorizontalAdd;
-
-                    for (int i = 0; i < haddCount; i++)
-                    {
-                        op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL,
-                                           nullptr DEBUGARG("Clone op1 for Vector<T>.Sum"));
-                        op1 = gtNewSimdAsHWIntrinsicNode(simdType, op1, tmp, horizontalAdd, simdBaseJitType, simdSize);
-                    }
-
-                    return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_Vector128_ToScalar, simdBaseJitType, simdSize);
-                }
-
                 case NI_VectorT256_Sum:
                 {
-                    // HorizontalAdd combines pairs so we need log2(vectorLength) passes to sum all elements together.
-                    unsigned vectorLength = getSIMDVectorLength(simdSize, simdBaseType);
-                    int haddCount = genLog2(vectorLength) - 1; // Minus 1 because for the last pass we split the vector
-                                                               // to low / high and add them together.
-                    GenTree*       tmp;
-                    NamedIntrinsic horizontalAdd = NI_AVX2_HorizontalAdd;
-                    NamedIntrinsic add           = NI_SSE2_Add;
-
-                    if (simdBaseType == TYP_DOUBLE)
-                    {
-                        horizontalAdd = NI_AVX_HorizontalAdd;
-                    }
-                    else if (simdBaseType == TYP_FLOAT)
-                    {
-                        horizontalAdd = NI_AVX_HorizontalAdd;
-                        add           = NI_SSE_Add;
-                    }
-
-                    for (int i = 0; i < haddCount; i++)
-                    {
-                        op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL,
-                                           nullptr DEBUGARG("Clone op1 for Vector<T>.Sum"));
-                        op1 = gtNewSimdAsHWIntrinsicNode(simdType, op1, tmp, horizontalAdd, simdBaseJitType, simdSize);
-                    }
-
-                    op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL,
-                                       nullptr DEBUGARG("Clone op1 for Vector<T>.Sum"));
-                    op1 = gtNewSimdAsHWIntrinsicNode(TYP_SIMD16, op1, gtNewIconNode(0x01, TYP_INT),
-                                                     NI_AVX_ExtractVector128, simdBaseJitType, simdSize);
-
-                    tmp = gtNewSimdAsHWIntrinsicNode(simdType, tmp, NI_Vector256_GetLower, simdBaseJitType, simdSize);
-                    op1 = gtNewSimdAsHWIntrinsicNode(TYP_SIMD16, op1, tmp, add, simdBaseJitType, 16);
-
-                    return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_Vector128_ToScalar, simdBaseJitType, 16);
+                    return gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_WidenLower:
@@ -842,50 +788,7 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
 
                 case NI_VectorT128_Sum:
                 {
-                    GenTree* tmp;
-
-                    switch (simdBaseType)
-                    {
-                        case TYP_BYTE:
-                        case TYP_UBYTE:
-                        case TYP_SHORT:
-                        case TYP_USHORT:
-                        case TYP_INT:
-                        case TYP_UINT:
-                        {
-                            tmp = gtNewSimdAsHWIntrinsicNode(simdType, op1, NI_AdvSimd_Arm64_AddAcross, simdBaseJitType,
-                                                             simdSize);
-                            return gtNewSimdAsHWIntrinsicNode(retType, tmp, NI_Vector64_ToScalar, simdBaseJitType, 8);
-                        }
-                        case TYP_FLOAT:
-                        {
-                            unsigned vectorLength = getSIMDVectorLength(simdSize, simdBaseType);
-                            int      haddCount    = genLog2(vectorLength);
-
-                            for (int i = 0; i < haddCount; i++)
-                            {
-                                op1 = impCloneExpr(op1, &tmp, clsHnd, (unsigned)CHECK_SPILL_ALL,
-                                                   nullptr DEBUGARG("Clone op1 for Vector<T>.Sum"));
-                                op1 = gtNewSimdAsHWIntrinsicNode(simdType, op1, tmp, NI_AdvSimd_Arm64_AddPairwise,
-                                                                 simdBaseJitType, simdSize);
-                            }
-
-                            return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_Vector128_ToScalar, simdBaseJitType,
-                                                              simdSize);
-                        }
-                        case TYP_DOUBLE:
-                        case TYP_LONG:
-                        case TYP_ULONG:
-                        {
-                            op1 = gtNewSimdAsHWIntrinsicNode(TYP_SIMD8, op1, NI_AdvSimd_Arm64_AddPairwiseScalar,
-                                                             simdBaseJitType, simdSize);
-                            return gtNewSimdAsHWIntrinsicNode(retType, op1, NI_Vector64_ToScalar, simdBaseJitType, 8);
-                        }
-                        default:
-                        {
-                            unreached();
-                        }
-                    }
+                    return gtNewSimdSumNode(retType, op1, simdBaseJitType, simdSize, /* isSimdAsHWIntrinsic */ true);
                 }
 
                 case NI_VectorT128_WidenLower:
@@ -1046,6 +949,27 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                                               /* isSimdAsHWIntrinsic */ true);
                 }
 
+                case NI_VectorT128_ShiftLeft:
+                case NI_VectorT256_ShiftLeft:
+                {
+                    return gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                              /* isSimdAsHWIntrinsic */ true);
+                }
+
+                case NI_VectorT128_ShiftRightArithmetic:
+                case NI_VectorT256_ShiftRightArithmetic:
+                {
+                    return gtNewSimdBinOpNode(GT_RSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                              /* isSimdAsHWIntrinsic */ true);
+                }
+
+                case NI_VectorT128_ShiftRightLogical:
+                case NI_VectorT256_ShiftRightLogical:
+                {
+                    return gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize,
+                                              /* isSimdAsHWIntrinsic */ true);
+                }
+
 #elif defined(TARGET_ARM64)
                 case NI_Vector2_CreateBroadcast:
                 case NI_Vector3_CreateBroadcast:
@@ -1089,6 +1013,24 @@ GenTree* Compiler::impSimdAsHWIntrinsicSpecial(NamedIntrinsic       intrinsic,
                     return gtNewSimdBinOpNode(GT_MUL, retType, op1, op2, simdBaseJitType, simdSize,
                                               /* isSimdAsHWIntrinsic */ true);
                 }
+
+                case NI_VectorT128_ShiftLeft:
+                {
+                    return gtNewSimdBinOpNode(GT_LSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                              /* isSimdAsHWIntrinsic */ true);
+                }
+
+                case NI_VectorT128_ShiftRightArithmetic:
+                {
+                    return gtNewSimdBinOpNode(GT_RSH, retType, op1, op2, simdBaseJitType, simdSize,
+                                              /* isSimdAsHWIntrinsic */ true);
+                }
+
+                case NI_VectorT128_ShiftRightLogical:
+                {
+                    return gtNewSimdBinOpNode(GT_RSZ, retType, op1, op2, simdBaseJitType, simdSize,
+                                              /* isSimdAsHWIntrinsic */ true);
+                }
 #else
 #error Unsupported platform
 #endif // !TARGET_XARCH && !TARGET_ARM64
index 82cf2fb..382f783 100644 (file)
@@ -138,6 +138,9 @@ SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Explicit,
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Inequality,                                          2,         {NI_Vector128_op_Inequality,                    NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality,                     NI_Vector128_op_Inequality},                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Multiply,                                            2,         {NI_VectorT128_op_Multiply,                     NI_VectorT128_op_Multiply,                      NI_VectorT128_op_Multiply,                      NI_VectorT128_op_Multiply,                      NI_VectorT128_op_Multiply,                      NI_VectorT128_op_Multiply,                      NI_Illegal,                                     NI_Illegal,                                     NI_VectorT128_op_Multiply,                      NI_VectorT128_op_Multiply},                     SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Subtraction,                                         2,         {NI_AdvSimd_Subtract,                           NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Subtract,                            NI_AdvSimd_Arm64_Subtract},                     SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  ShiftLeft,                                              2,         {NI_VectorT128_ShiftLeft,                       NI_VectorT128_ShiftLeft,                        NI_VectorT128_ShiftLeft,                        NI_VectorT128_ShiftLeft,                        NI_VectorT128_ShiftLeft,                        NI_VectorT128_ShiftLeft,                        NI_VectorT128_ShiftLeft,                        NI_VectorT128_ShiftLeft,                        NI_Illegal,                                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  ShiftRightArithmetic,                                   2,         {NI_VectorT128_ShiftRightArithmetic,            NI_Illegal,                                     NI_VectorT128_ShiftRightArithmetic,             NI_Illegal,                                     NI_VectorT128_ShiftRightArithmetic,             NI_Illegal,                                     NI_VectorT128_ShiftRightArithmetic,             NI_Illegal,                                     NI_Illegal,                                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  ShiftRightLogical,                                      2,         {NI_VectorT128_ShiftRightLogical,               NI_VectorT128_ShiftRightLogical,                NI_VectorT128_ShiftRightLogical,                NI_VectorT128_ShiftRightLogical,                NI_VectorT128_ShiftRightLogical,                NI_VectorT128_ShiftRightLogical,                NI_VectorT128_ShiftRightLogical,                NI_VectorT128_ShiftRightLogical,                NI_Illegal,                                     NI_Illegal},                                    SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  SquareRoot,                                             1,         {NI_Illegal,                                    NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_Illegal,                                     NI_AdvSimd_Arm64_Sqrt,                          NI_AdvSimd_Arm64_Sqrt},                         SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Sum,                                                    1,         {NI_VectorT128_Sum,                             NI_VectorT128_Sum,                              NI_VectorT128_Sum,                              NI_VectorT128_Sum,                              NI_VectorT128_Sum,                              NI_VectorT128_Sum,                              NI_VectorT128_Sum,                              NI_VectorT128_Sum,                              NI_VectorT128_Sum,                              NI_VectorT128_Sum},                             SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  WidenLower,                                             1,         {NI_VectorT128_WidenLower,                      NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower,                       NI_VectorT128_WidenLower},                      SimdAsHWIntrinsicFlag::None)
index e3147ae..e407b6d 100644 (file)
@@ -138,6 +138,9 @@ SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Explicit,
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Inequality,                                          2,         {NI_Vector128_op_Inequality,                NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality,                 NI_Vector128_op_Inequality},                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Multiply,                                            2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT128_op_Multiply,                  NI_VectorT128_op_Multiply,                  NI_VectorT128_op_Multiply,                  NI_VectorT128_op_Multiply,                  NI_Illegal,                                 NI_Illegal,                                 NI_VectorT128_op_Multiply,                  NI_VectorT128_op_Multiply},                 SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  op_Subtraction,                                         2,         {NI_SSE2_Subtract,                          NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE2_Subtract,                           NI_SSE_Subtract,                            NI_SSE2_Subtract},                          SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  ShiftLeft,                                              2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT128_ShiftLeft,                    NI_VectorT128_ShiftLeft,                    NI_VectorT128_ShiftLeft,                    NI_VectorT128_ShiftLeft,                    NI_VectorT128_ShiftLeft,                    NI_VectorT128_ShiftLeft,                    NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  ShiftRightArithmetic,                                   2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT128_ShiftRightArithmetic,         NI_Illegal,                                 NI_VectorT128_ShiftRightArithmetic,         NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT128,  ShiftRightLogical,                                      2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT128_ShiftRightLogical,            NI_VectorT128_ShiftRightLogical,            NI_VectorT128_ShiftRightLogical,            NI_VectorT128_ShiftRightLogical,            NI_VectorT128_ShiftRightLogical,            NI_VectorT128_ShiftRightLogical,            NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  SquareRoot,                                             1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_SSE_Sqrt,                                NI_SSE2_Sqrt},                              SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  Sum,                                                    1,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT128_Sum,                          NI_VectorT128_Sum,                          NI_VectorT128_Sum,                          NI_VectorT128_Sum,                          NI_Illegal,                                 NI_Illegal,                                 NI_VectorT128_Sum,                          NI_VectorT128_Sum},                         SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT128,  WidenLower,                                             1,         {NI_VectorT128_WidenLower,                  NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower,                   NI_VectorT128_WidenLower},                  SimdAsHWIntrinsicFlag::None)
@@ -186,6 +189,9 @@ SIMD_AS_HWINTRINSIC_ID(VectorT256,  op_Explicit,
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  op_Inequality,                                          2,         {NI_Vector256_op_Inequality,                NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality,                 NI_Vector256_op_Inequality},                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  op_Multiply,                                            2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT256_op_Multiply,                  NI_VectorT256_op_Multiply,                  NI_VectorT256_op_Multiply,                  NI_VectorT256_op_Multiply,                  NI_Illegal,                                 NI_Illegal,                                 NI_VectorT256_op_Multiply,                  NI_VectorT256_op_Multiply},                 SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  op_Subtraction,                                         2,         {NI_AVX2_Subtract,                          NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX2_Subtract,                           NI_AVX_Subtract,                            NI_AVX_Subtract},                           SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256,  ShiftLeft,                                              2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT256_ShiftLeft,                    NI_VectorT256_ShiftLeft,                    NI_VectorT256_ShiftLeft,                    NI_VectorT256_ShiftLeft,                    NI_VectorT256_ShiftLeft,                    NI_VectorT256_ShiftLeft,                    NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256,  ShiftRightArithmetic,                                   2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT256_ShiftRightArithmetic,         NI_Illegal,                                 NI_VectorT256_ShiftRightArithmetic,         NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
+SIMD_AS_HWINTRINSIC_ID(VectorT256,  ShiftRightLogical,                                      2,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT256_ShiftRightLogical,            NI_VectorT256_ShiftRightLogical,            NI_VectorT256_ShiftRightLogical,            NI_VectorT256_ShiftRightLogical,            NI_VectorT256_ShiftRightLogical,            NI_VectorT256_ShiftRightLogical,            NI_Illegal,                                 NI_Illegal},                                SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  SquareRoot,                                             1,         {NI_Illegal,                                NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_Illegal,                                 NI_AVX_Sqrt,                                NI_AVX_Sqrt},                               SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  Sum,                                                    1,         {NI_Illegal,                                NI_Illegal,                                 NI_VectorT256_Sum,                          NI_VectorT256_Sum,                          NI_VectorT256_Sum,                          NI_VectorT256_Sum,                          NI_Illegal,                                 NI_Illegal,                                 NI_VectorT256_Sum,                          NI_VectorT256_Sum},                         SimdAsHWIntrinsicFlag::None)
 SIMD_AS_HWINTRINSIC_ID(VectorT256,  WidenLower,                                             1,         {NI_VectorT256_WidenLower,                  NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower,                   NI_VectorT256_WidenLower},                  SimdAsHWIntrinsicFlag::None)
index 394aee0..502a557 100644 (file)
@@ -290,6 +290,42 @@ namespace System.Numerics
         public static System.Numerics.Vector<System.UInt32> Narrow(System.Numerics.Vector<System.UInt64> low, System.Numerics.Vector<System.UInt64> high) { throw null; }
         public static System.Numerics.Vector<T> Negate<T>(System.Numerics.Vector<T> value) where T : struct { throw null; }
         public static System.Numerics.Vector<T> OnesComplement<T>(System.Numerics.Vector<T> value) where T : struct { throw null; }
+        public static System.Numerics.Vector<System.Byte> ShiftLeft(System.Numerics.Vector<System.Byte> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int16> ShiftLeft(System.Numerics.Vector<System.Int16> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int32> ShiftLeft(System.Numerics.Vector<System.Int32> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int64> ShiftLeft(System.Numerics.Vector<System.Int64> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<nint> ShiftLeft(System.Numerics.Vector<nint> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<nuint> ShiftLeft(System.Numerics.Vector<nuint> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.SByte> ShiftLeft(System.Numerics.Vector<System.SByte> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.UInt16> ShiftLeft(System.Numerics.Vector<System.UInt16> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.UInt32> ShiftLeft(System.Numerics.Vector<System.UInt32> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.UInt64> ShiftLeft(System.Numerics.Vector<System.UInt64> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int16> ShiftRightArithmetic(System.Numerics.Vector<System.Int16> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int32> ShiftRightArithmetic(System.Numerics.Vector<System.Int32> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int64> ShiftRightArithmetic(System.Numerics.Vector<System.Int64> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<nint> ShiftRightArithmetic(System.Numerics.Vector<nint> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.SByte> ShiftRightArithmetic(System.Numerics.Vector<System.SByte> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Byte> ShiftRightLogical(System.Numerics.Vector<System.Byte> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int16> ShiftRightLogical(System.Numerics.Vector<System.Int16> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int32> ShiftRightLogical(System.Numerics.Vector<System.Int32> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<System.Int64> ShiftRightLogical(System.Numerics.Vector<System.Int64> value, int shiftCount) { throw null; }
+        public static System.Numerics.Vector<nint> ShiftRightLogical(System.Numerics.Vector<nint> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<nuint> ShiftRightLogical(System.Numerics.Vector<nuint> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.SByte> ShiftRightLogical(System.Numerics.Vector<System.SByte> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.UInt16> ShiftRightLogical(System.Numerics.Vector<System.UInt16> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.UInt32> ShiftRightLogical(System.Numerics.Vector<System.UInt32> value, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Numerics.Vector<System.UInt64> ShiftRightLogical(System.Numerics.Vector<System.UInt64> value, int shiftCount) { throw null; }
         public static System.Numerics.Vector<T> SquareRoot<T>(System.Numerics.Vector<T> value) where T : struct { throw null; }
         public static System.Numerics.Vector<T> Subtract<T>(System.Numerics.Vector<T> left, System.Numerics.Vector<T> right) where T : struct { throw null; }
         [System.CLSCompliantAttribute(false)]
index 86ad06f..712e909 100644 (file)
@@ -1154,6 +1154,492 @@ namespace System.Numerics
         public static Vector<T> OnesComplement<T>(Vector<T> value)
             where T : struct => ~value;
 
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<byte> ShiftLeft(Vector<byte> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<byte> result);
+
+            for (int index = 0; index < Vector<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<short> ShiftLeft(Vector<short> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<short> result);
+
+            for (int index = 0; index < Vector<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<int> ShiftLeft(Vector<int> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<int> result);
+
+            for (int index = 0; index < Vector<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<long> ShiftLeft(Vector<long> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<long> result);
+
+            for (int index = 0; index < Vector<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<nint> ShiftLeft(Vector<nint> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<nint> result);
+
+            for (int index = 0; index < Vector<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<nuint> ShiftLeft(Vector<nuint> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<nuint> result);
+
+            for (int index = 0; index < Vector<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<sbyte> ShiftLeft(Vector<sbyte> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<sbyte> result);
+
+            for (int index = 0; index < Vector<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<ushort> ShiftLeft(Vector<ushort> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<ushort> result);
+
+            for (int index = 0; index < Vector<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<uint> ShiftLeft(Vector<uint> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<uint> result);
+
+            for (int index = 0; index < Vector<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<ulong> ShiftLeft(Vector<ulong> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<ulong> result);
+
+            for (int index = 0; index < Vector<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftLeft(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<short> ShiftRightArithmetic(Vector<short> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<short> result);
+
+            for (int index = 0; index < Vector<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<int> ShiftRightArithmetic(Vector<int> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<int> result);
+
+            for (int index = 0; index < Vector<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<long> ShiftRightArithmetic(Vector<long> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<long> result);
+
+            for (int index = 0; index < Vector<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<nint> ShiftRightArithmetic(Vector<nint> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<nint> result);
+
+            for (int index = 0; index < Vector<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<sbyte> ShiftRightArithmetic(Vector<sbyte> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<sbyte> result);
+
+            for (int index = 0; index < Vector<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightArithmetic(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<byte> ShiftRightLogical(Vector<byte> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<byte> result);
+
+            for (int index = 0; index < Vector<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<short> ShiftRightLogical(Vector<short> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<short> result);
+
+            for (int index = 0; index < Vector<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<int> ShiftRightLogical(Vector<int> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<int> result);
+
+            for (int index = 0; index < Vector<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<long> ShiftRightLogical(Vector<long> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<long> result);
+
+            for (int index = 0; index < Vector<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<nint> ShiftRightLogical(Vector<nint> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<nint> result);
+
+            for (int index = 0; index < Vector<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<nuint> ShiftRightLogical(Vector<nuint> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<nuint> result);
+
+            for (int index = 0; index < Vector<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<sbyte> ShiftRightLogical(Vector<sbyte> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<sbyte> result);
+
+            for (int index = 0; index < Vector<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<ushort> ShiftRightLogical(Vector<ushort> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<ushort> result);
+
+            for (int index = 0; index < Vector<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<uint> ShiftRightLogical(Vector<uint> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<uint> result);
+
+            for (int index = 0; index < Vector<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="value">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector<ulong> ShiftRightLogical(Vector<ulong> value, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector<ulong> result);
+
+            for (int index = 0; index < Vector<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftRightLogical(value.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <param name="value">The vector whose square root is to be computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
index db5d684..602b2b7 100644 (file)
@@ -356,6 +356,91 @@ namespace System.Runtime.Intrinsics
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint ExtractMostSignificantBit(T value)
+        {
+            if (typeof(T) == typeof(byte))
+            {
+                uint bits = (byte)(object)value;
+                return bits >> 7;
+            }
+            else if (typeof(T) == typeof(double))
+            {
+                ulong bits = BitConverter.DoubleToUInt64Bits((double)(object)value);
+                return (uint)(bits >> 63);
+            }
+            else if (typeof(T) == typeof(short))
+            {
+                uint bits = (ushort)(short)(object)value;
+                return bits >> 15;
+            }
+            else if (typeof(T) == typeof(int))
+            {
+                uint bits = (uint)(int)(object)value;
+                return bits >> 31;
+            }
+            else if (typeof(T) == typeof(long))
+            {
+                ulong bits = (ulong)(long)(object)value;
+                return (uint)(bits >> 63);
+            }
+            else if (typeof(T) == typeof(nint))
+            {
+                if (Environment.Is64BitProcess)
+                {
+                    ulong bits = (ulong)(nint)(object)value;
+                    return (uint)(bits >> 63);
+                }
+                else
+                {
+                    uint bits = (uint)(nint)(object)value;
+                    return bits >> 31;
+                }
+            }
+            else if (typeof(T) == typeof(nuint))
+            {
+                if (Environment.Is64BitProcess)
+                {
+                    ulong bits = (ulong)(nuint)(object)value;
+                    return (uint)(bits >> 63);
+                }
+                else
+                {
+                    uint bits = (uint)(nuint)(object)value;
+                    return bits >> 31;
+                }
+            }
+            else if (typeof(T) == typeof(sbyte))
+            {
+                uint bits = (byte)(sbyte)(object)value;
+                return bits >> 7;
+            }
+            else if (typeof(T) == typeof(float))
+            {
+                uint bits = BitConverter.SingleToUInt32Bits((float)(object)value);
+                return bits >> 31;
+            }
+            else if (typeof(T) == typeof(ushort))
+            {
+                uint bits = (ushort)(object)value;
+                return bits >> 15;
+            }
+            else if (typeof(T) == typeof(uint))
+            {
+                uint bits = (uint)(object)value;
+                return bits >> 31;
+            }
+            else if (typeof(T) == typeof(ulong))
+            {
+                ulong bits = (ulong)(object)value;
+                return (uint)(bits >> 63);
+            }
+            else
+            {
+                throw new NotSupportedException(SR.Arg_TypeNotSupported);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T Floor(T value)
         {
             if (typeof(T) == typeof(double))
@@ -658,6 +743,133 @@ namespace System.Runtime.Intrinsics
         }
 
         [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static T ShiftLeft(T value, int shiftCount)
+        {
+            if (typeof(T) == typeof(byte))
+            {
+                return (T)(object)(byte)((byte)(object)value << (shiftCount & 7));
+            }
+            else if (typeof(T) == typeof(short))
+            {
+                return (T)(object)(short)((short)(object)value << (shiftCount & 15));
+            }
+            else if (typeof(T) == typeof(int))
+            {
+                return (T)(object)(int)((int)(object)value << shiftCount);
+            }
+            else if (typeof(T) == typeof(long))
+            {
+                return (T)(object)(long)((long)(object)value << shiftCount);
+            }
+            else if (typeof(T) == typeof(nint))
+            {
+                return (T)(object)(nint)((nint)(object)value << shiftCount);
+            }
+            else if (typeof(T) == typeof(nuint))
+            {
+                return (T)(object)(nuint)((nuint)(object)value << shiftCount);
+            }
+            else if (typeof(T) == typeof(sbyte))
+            {
+                return (T)(object)(sbyte)((sbyte)(object)value << (shiftCount & 7));
+            }
+            else if (typeof(T) == typeof(ushort))
+            {
+                return (T)(object)(ushort)((ushort)(object)value << (shiftCount & 15));
+            }
+            else if (typeof(T) == typeof(uint))
+            {
+                return (T)(object)(uint)((uint)(object)value << shiftCount);
+            }
+            else if (typeof(T) == typeof(ulong))
+            {
+                return (T)(object)(ulong)((ulong)(object)value << shiftCount);
+            }
+            else
+            {
+                throw new NotSupportedException(SR.Arg_TypeNotSupported);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static T ShiftRightArithmetic(T value, int shiftCount)
+        {
+            if (typeof(T) == typeof(short))
+            {
+                return (T)(object)(short)((short)(object)value >> (shiftCount & 15));
+            }
+            else if (typeof(T) == typeof(int))
+            {
+                return (T)(object)(int)((int)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(long))
+            {
+                return (T)(object)(long)((long)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(nint))
+            {
+                return (T)(object)(nint)((nint)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(sbyte))
+            {
+                return (T)(object)(sbyte)((sbyte)(object)value >> (shiftCount & 7));
+            }
+            else
+            {
+                throw new NotSupportedException(SR.Arg_TypeNotSupported);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static T ShiftRightLogical(T value, int shiftCount)
+        {
+            if (typeof(T) == typeof(byte))
+            {
+                return (T)(object)(byte)((byte)(object)value >> (shiftCount & 7));
+            }
+            else if (typeof(T) == typeof(short))
+            {
+                return (T)(object)(short)((ushort)(short)(object)value >> (shiftCount & 15));
+            }
+            else if (typeof(T) == typeof(int))
+            {
+                return (T)(object)(int)((uint)(int)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(long))
+            {
+                return (T)(object)(long)((ulong)(long)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(nint))
+            {
+                return (T)(object)(nint)((nuint)(nint)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(nuint))
+            {
+                return (T)(object)(nuint)((nuint)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(sbyte))
+            {
+                return (T)(object)(sbyte)((byte)(sbyte)(object)value >> (shiftCount & 7));
+            }
+            else if (typeof(T) == typeof(ushort))
+            {
+                return (T)(object)(ushort)((ushort)(object)value >> (shiftCount & 15));
+            }
+            else if (typeof(T) == typeof(uint))
+            {
+                return (T)(object)(uint)((uint)(object)value >> shiftCount);
+            }
+            else if (typeof(T) == typeof(ulong))
+            {
+                return (T)(object)(ulong)((ulong)(object)value >> shiftCount);
+            }
+            else
+            {
+                throw new NotSupportedException(SR.Arg_TypeNotSupported);
+            }
+        }
+
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
         public static T Sqrt(T value)
         {
             if (typeof(T) == typeof(byte))
index 1acc80f..5a0c3fa 100644 (file)
@@ -34,6 +34,12 @@ namespace System.Runtime.Intrinsics
     {
         internal const int Size = 16;
 
+#if TARGET_ARM
+        internal const int Alignment = 8;
+#else
+        internal const int Alignment = 16;
+#endif
+
         /// <summary>Gets a value that indicates whether 128-bit vector operations are subject to hardware acceleration through JIT intrinsic support.</summary>
         /// <value><see langword="true" /> if 128-bit vector operations are subject to hardware acceleration; otherwise, <see langword="false" />.</value>
         /// <remarks>128-bit vector operations are subject to hardware acceleration on systems that support Single Instruction, Multiple Data (SIMD) instructions for 128-bit vectors and the RyuJIT just-in-time compiler is used to compile managed code.</remarks>
@@ -2330,6 +2336,28 @@ namespace System.Runtime.Intrinsics
         public static bool EqualsAny<T>(Vector128<T> left, Vector128<T> right)
             where T : struct => Equals(left, right).As<T, ulong>() != Vector128<ulong>.Zero;
 
+        /// <summary>Extracts the most significant bit from each element in a vector.</summary>
+        /// <param name="vector">The vector whose elements should have their most significant bit extracted.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The packed most significant bits extracted from the elements in <paramref name="vector" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint ExtractMostSignificantBits<T>(this Vector128<T> vector)
+            where T : struct
+        {
+            uint result = 0;
+
+            for (int index = 0; index < Vector128<T>.Count; index++)
+            {
+                uint value = Scalar<T>.ExtractMostSignificantBit(vector.GetElementUnsafe(index));
+                value <<= index;
+                result |= value;
+            }
+
+            return result;
+        }
+
         /// <summary>Computes the floor of each element in a vector.</summary>
         /// <param name="vector">The vector that will have its floor computed.</param>
         /// <returns>A vector whose elements are the floor of the elements in <paramref name="vector" />.</returns>
@@ -2575,6 +2603,89 @@ namespace System.Runtime.Intrinsics
         public static bool LessThanOrEqualAny<T>(Vector128<T> left, Vector128<T> right)
             where T : struct => LessThanOrEqual(left, right).As<T, ulong>() != Vector128<ulong>.Zero;
 
+        /// <summary>Loads a vector from the given source.</summary>
+        /// <param name="source">The source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector128<T> Load<T>(T* source)
+            where T : unmanaged
+        {
+            return *(Vector128<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given aligned source.</summary>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector128<T> LoadAligned<T>(T* source)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+
+            if (((nuint)source % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            return *(Vector128<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given aligned source.</summary>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        /// <remarks>This method may bypass the cache on certain platforms.</remarks>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector128<T> LoadAlignedNonTemporal<T>(T* source)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+
+            if (((nuint)source % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            return *(Vector128<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given source.</summary>
+        /// <param name="source">The source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<T> LoadUnsafe<T>(ref T source)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+            return Unsafe.ReadUnaligned<Vector128<T>>(ref Unsafe.As<T, byte>(ref source));
+        }
+
+        /// <summary>Loads a vector from the given source and element offset.</summary>
+        /// <param name="source">The source to which <paramref name="elementOffset" /> will be added before loading the vector.</param>
+        /// <param name="elementOffset">The element offset from <paramref name="source" /> from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" /> plus <paramref name="elementOffset" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<T> LoadUnsafe<T>(ref T source, nuint elementOffset)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+            source = ref Unsafe.Add(ref source, (nint)elementOffset);
+            return Unsafe.ReadUnaligned<Vector128<T>>(ref Unsafe.As<T, byte>(ref source));
+        }
+
         /// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
@@ -2835,6 +2946,492 @@ namespace System.Runtime.Intrinsics
         public static Vector128<T> OnesComplement<T>(Vector128<T> vector)
             where T : struct => ~vector;
 
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<byte> ShiftLeft(Vector128<byte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<byte> result);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<short> ShiftLeft(Vector128<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<short> result);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<int> ShiftLeft(Vector128<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<int> result);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<long> ShiftLeft(Vector128<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<long> result);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<nint> ShiftLeft(Vector128<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<nint> result);
+
+            for (int index = 0; index < Vector128<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<nuint> ShiftLeft(Vector128<nuint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<nuint> result);
+
+            for (int index = 0; index < Vector128<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<sbyte> ShiftLeft(Vector128<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<sbyte> result);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<ushort> ShiftLeft(Vector128<ushort> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<ushort> result);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<uint> ShiftLeft(Vector128<uint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<uint> result);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<ulong> ShiftLeft(Vector128<ulong> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<ulong> result);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<short> ShiftRightArithmetic(Vector128<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<short> result);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<int> ShiftRightArithmetic(Vector128<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<int> result);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<long> ShiftRightArithmetic(Vector128<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<long> result);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<nint> ShiftRightArithmetic(Vector128<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<nint> result);
+
+            for (int index = 0; index < Vector128<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<sbyte> ShiftRightArithmetic(Vector128<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<sbyte> result);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<byte> ShiftRightLogical(Vector128<byte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<byte> result);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<short> ShiftRightLogical(Vector128<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<short> result);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<int> ShiftRightLogical(Vector128<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<int> result);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<long> ShiftRightLogical(Vector128<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<long> result);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<nint> ShiftRightLogical(Vector128<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<nint> result);
+
+            for (int index = 0; index < Vector128<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<nuint> ShiftRightLogical(Vector128<nuint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<nuint> result);
+
+            for (int index = 0; index < Vector128<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<sbyte> ShiftRightLogical(Vector128<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<sbyte> result);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<ushort> ShiftRightLogical(Vector128<ushort> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<ushort> result);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<uint> ShiftRightLogical(Vector128<uint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<uint> result);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector128<ulong> ShiftRightLogical(Vector128<ulong> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector128<ulong> result);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <param name="vector">The vector whose square root is to be computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -2854,6 +3451,89 @@ namespace System.Runtime.Intrinsics
             return result;
         }
 
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void Store<T>(this Vector128<T> source, T* destination)
+            where T : unmanaged
+        {
+            *(Vector128<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void StoreAligned<T>(this Vector128<T> source, T* destination)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+
+            if (((nuint)destination % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            *(Vector128<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <remarks>This method may bypass the cache on certain platforms.</remarks>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void StoreAlignedNonTemporal<T>(this Vector128<T> source, T* destination)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+
+            if (((nuint)destination % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            *(Vector128<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void StoreUnsafe<T>(this Vector128<T> source, ref T destination)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+            Unsafe.WriteUnaligned<Vector128<T>>(ref Unsafe.As<T, byte>(ref destination), source);
+        }
+
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination to which <paramref name="elementOffset" /> will be added before the vector will be stored.</param>
+        /// <param name="elementOffset">The element offset from <paramref name="destination" /> from which the vector will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void StoreUnsafe<T>(this Vector128<T> source, ref T destination, nuint elementOffset)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector128BaseType<T>();
+            destination = ref Unsafe.Add(ref destination, (nint)elementOffset);
+            Unsafe.WriteUnaligned<Vector128<T>>(ref Unsafe.As<T, byte>(ref destination), source);
+        }
+
         /// <summary>Subtracts two vectors to compute their difference.</summary>
         /// <param name="left">The vector from which <paramref name="right" /> will be subtracted.</param>
         /// <param name="right">The vector to subtract from <paramref name="left" />.</param>
@@ -2864,6 +3544,24 @@ namespace System.Runtime.Intrinsics
         public static Vector128<T> Subtract<T>(Vector128<T> left, Vector128<T> right)
             where T : struct => left - right;
 
+        /// <summary>Computes the sum of all elements in a vector.</summary>
+        /// <param name="vector">The vector whose elements will be summed.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The sum of all elements in <paramref name="vector" />.</returns>
+        [Intrinsic]
+        public static T Sum<T>(Vector128<T> vector)
+            where T : struct
+        {
+            T sum = default;
+
+            for (int index = 0; index < Vector128<T>.Count; index++)
+            {
+                sum = Scalar<T>.Add(sum, vector.GetElementUnsafe(index));
+            }
+
+            return sum;
+        }
+
         /// <summary>Converts the given vector to a scalar containing the value of the first element.</summary>
         /// <typeparam name="T">The type of the input vector.</typeparam>
         /// <param name="vector">The vector to get the first element from.</param>
index c1f81ef..935e1f4 100644 (file)
@@ -69,6 +69,8 @@ namespace System.Runtime.Intrinsics
                    (typeof(T) == typeof(short)) ||
                    (typeof(T) == typeof(int)) ||
                    (typeof(T) == typeof(long)) ||
+                   (typeof(T) == typeof(nint)) ||
+                   (typeof(T) == typeof(nuint)) ||
                    (typeof(T) == typeof(sbyte)) ||
                    (typeof(T) == typeof(float)) ||
                    (typeof(T) == typeof(ushort)) ||
index f51cea7..701c527 100644 (file)
@@ -33,6 +33,14 @@ namespace System.Runtime.Intrinsics
     {
         internal const int Size = 32;
 
+#if TARGET_ARM
+        internal const int Alignment = 8;
+#elif TARGET_ARM64
+        internal const int Alignment = 16;
+#else
+        internal const int Alignment = 32;
+#endif
+
         /// <summary>Gets a value that indicates whether 256-bit vector operations are subject to hardware acceleration through JIT intrinsic support.</summary>
         /// <value><see langword="true" /> if 256-bit vector operations are subject to hardware acceleration; otherwise, <see langword="false" />.</value>
         /// <remarks>256-bit vector operations are subject to hardware acceleration on systems that support Single Instruction, Multiple Data (SIMD) instructions for 256-bit vectors and the RyuJIT just-in-time compiler is used to compile managed code.</remarks>
@@ -2448,6 +2456,28 @@ namespace System.Runtime.Intrinsics
         public static bool EqualsAny<T>(Vector256<T> left, Vector256<T> right)
             where T : struct => Equals(left, right).As<T, ulong>() != Vector256<ulong>.Zero;
 
+        /// <summary>Extracts the most significant bit from each element in a vector.</summary>
+        /// <param name="vector">The vector whose elements should have their most significant bit extracted.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The packed most significant bits extracted from the elements in <paramref name="vector" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint ExtractMostSignificantBits<T>(this Vector256<T> vector)
+            where T : struct
+        {
+            uint result = 0;
+
+            for (int index = 0; index < Vector256<T>.Count; index++)
+            {
+                uint value = Scalar<T>.ExtractMostSignificantBit(vector.GetElementUnsafe(index));
+                value <<= index;
+                result |= value;
+            }
+
+            return result;
+        }
+
         /// <summary>Computes the floor of each element in a vector.</summary>
         /// <param name="vector">The vector that will have its floor computed.</param>
         /// <returns>A vector whose elements are the floor of the elements in <paramref name="vector" />.</returns>
@@ -2711,6 +2741,89 @@ namespace System.Runtime.Intrinsics
         public static bool LessThanOrEqualAny<T>(Vector256<T> left, Vector256<T> right)
             where T : struct => LessThanOrEqual(left, right).As<T, ulong>() != Vector256<ulong>.Zero;
 
+        /// <summary>Loads a vector from the given source.</summary>
+        /// <param name="source">The source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector256<T> Load<T>(T* source)
+            where T : unmanaged
+        {
+            return *(Vector256<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given aligned source.</summary>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector256<T> LoadAligned<T>(T* source)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+
+            if (((nuint)source % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            return *(Vector256<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given aligned source.</summary>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        /// <remarks>This method may bypass the cache on certain platforms.</remarks>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector256<T> LoadAlignedNonTemporal<T>(T* source)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+
+            if (((nuint)source % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            return *(Vector256<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given source.</summary>
+        /// <param name="source">The source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<T> LoadUnsafe<T>(ref T source)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+            return Unsafe.ReadUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref source));
+        }
+
+        /// <summary>Loads a vector from the given source and element offset.</summary>
+        /// <param name="source">The source to which <paramref name="elementOffset" /> will be added before loading the vector.</param>
+        /// <param name="elementOffset">The element offset from <paramref name="source" /> from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" /> plus <paramref name="elementOffset" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<T> LoadUnsafe<T>(ref T source, nuint elementOffset)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+            source = ref Unsafe.Add(ref source, (nint)elementOffset);
+            return Unsafe.ReadUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref source));
+        }
+
         /// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
@@ -2971,6 +3084,492 @@ namespace System.Runtime.Intrinsics
         public static Vector256<T> OnesComplement<T>(Vector256<T> vector)
             where T : struct => ~vector;
 
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<byte> ShiftLeft(Vector256<byte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<byte> result);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<short> ShiftLeft(Vector256<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<short> result);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> ShiftLeft(Vector256<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<int> result);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<long> ShiftLeft(Vector256<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<long> result);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nint> ShiftLeft(Vector256<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<nint> result);
+
+            for (int index = 0; index < Vector256<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nuint> ShiftLeft(Vector256<nuint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<nuint> result);
+
+            for (int index = 0; index < Vector256<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> ShiftLeft(Vector256<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<sbyte> result);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> ShiftLeft(Vector256<ushort> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<ushort> result);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> ShiftLeft(Vector256<uint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<uint> result);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> ShiftLeft(Vector256<ulong> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<ulong> result);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<short> ShiftRightArithmetic(Vector256<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<short> result);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> ShiftRightArithmetic(Vector256<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<int> result);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<long> ShiftRightArithmetic(Vector256<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<long> result);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nint> ShiftRightArithmetic(Vector256<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<nint> result);
+
+            for (int index = 0; index < Vector256<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> ShiftRightArithmetic(Vector256<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<sbyte> result);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<byte> ShiftRightLogical(Vector256<byte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<byte> result);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<short> ShiftRightLogical(Vector256<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<short> result);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<int> ShiftRightLogical(Vector256<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<int> result);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<long> ShiftRightLogical(Vector256<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<long> result);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nint> ShiftRightLogical(Vector256<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<nint> result);
+
+            for (int index = 0; index < Vector256<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<nuint> ShiftRightLogical(Vector256<nuint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<nuint> result);
+
+            for (int index = 0; index < Vector256<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<sbyte> ShiftRightLogical(Vector256<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<sbyte> result);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ushort> ShiftRightLogical(Vector256<ushort> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<ushort> result);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<uint> ShiftRightLogical(Vector256<uint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<uint> result);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector256<ulong> ShiftRightLogical(Vector256<ulong> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector256<ulong> result);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <param name="vector">The vector whose square root is to be computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -2990,6 +3589,89 @@ namespace System.Runtime.Intrinsics
             return result;
         }
 
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void Store<T>(this Vector256<T> source, T* destination)
+            where T : unmanaged
+        {
+            *(Vector256<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void StoreAligned<T>(this Vector256<T> source, T* destination)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+
+            if (((nuint)destination % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            *(Vector256<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <remarks>This method may bypass the cache on certain platforms.</remarks>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void StoreAlignedNonTemporal<T>(this Vector256<T> source, T* destination)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+
+            if (((nuint)destination % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            *(Vector256<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void StoreUnsafe<T>(this Vector256<T> source, ref T destination)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+            Unsafe.WriteUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref destination), source);
+        }
+
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination to which <paramref name="elementOffset" /> will be added before the vector will be stored.</param>
+        /// <param name="elementOffset">The element offset from <paramref name="destination" /> from which the vector will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void StoreUnsafe<T>(this Vector256<T> source, ref T destination, nuint elementOffset)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector256BaseType<T>();
+            destination = ref Unsafe.Add(ref destination, (nint)elementOffset);
+            Unsafe.WriteUnaligned<Vector256<T>>(ref Unsafe.As<T, byte>(ref destination), source);
+        }
+
         /// <summary>Subtracts two vectors to compute their difference.</summary>
         /// <param name="left">The vector from which <paramref name="right" /> will be subtracted.</param>
         /// <param name="right">The vector to subtract from <paramref name="left" />.</param>
@@ -3000,6 +3682,24 @@ namespace System.Runtime.Intrinsics
         public static Vector256<T> Subtract<T>(Vector256<T> left, Vector256<T> right)
             where T : struct => left - right;
 
+        /// <summary>Computes the sum of all elements in a vector.</summary>
+        /// <param name="vector">The vector whose elements will be summed.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The sum of all elements in <paramref name="vector" />.</returns>
+        [Intrinsic]
+        public static T Sum<T>(Vector256<T> vector)
+            where T : struct
+        {
+            T sum = default;
+
+            for (int index = 0; index < Vector256<T>.Count; index++)
+            {
+                sum = Scalar<T>.Add(sum, vector.GetElementUnsafe(index));
+            }
+
+            return sum;
+        }
+
         /// <summary>Converts the given vector to a scalar containing the value of the first element.</summary>
         /// <typeparam name="T">The type of the input vector.</typeparam>
         /// <param name="vector">The vector to get the first element from.</param>
index 37478d8..8c865cf 100644 (file)
@@ -71,6 +71,8 @@ namespace System.Runtime.Intrinsics
                    (typeof(T) == typeof(short)) ||
                    (typeof(T) == typeof(int)) ||
                    (typeof(T) == typeof(long)) ||
+                   (typeof(T) == typeof(nint)) ||
+                   (typeof(T) == typeof(nuint)) ||
                    (typeof(T) == typeof(sbyte)) ||
                    (typeof(T) == typeof(float)) ||
                    (typeof(T) == typeof(ushort)) ||
index cd7ef0a..59fe828 100644 (file)
@@ -15,6 +15,8 @@ namespace System.Runtime.Intrinsics
     {
         internal const int Size = 8;
 
+        internal const int Alignment = 8;
+
         /// <summary>Gets a value that indicates whether 64-bit vector operations are subject to hardware acceleration through JIT intrinsic support.</summary>
         /// <value><see langword="true" /> if 64-bit vector operations are subject to hardware acceleration; otherwise, <see langword="false" />.</value>
         /// <remarks>64-bit vector operations are subject to hardware acceleration on systems that support Single Instruction, Multiple Data (SIMD) instructions for 64-bit vectors and the RyuJIT just-in-time compiler is used to compile managed code.</remarks>
@@ -1568,6 +1570,28 @@ namespace System.Runtime.Intrinsics
         public static bool EqualsAny<T>(Vector64<T> left, Vector64<T> right)
             where T : struct => Equals(left, right).As<T, ulong>() != Vector64<ulong>.Zero;
 
+        /// <summary>Extracts the most significant bit from each element in a vector.</summary>
+        /// <param name="vector">The vector whose elements should have their most significant bit extracted.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The packed most significant bits extracted from the elements in <paramref name="vector" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static uint ExtractMostSignificantBits<T>(this Vector64<T> vector)
+            where T : struct
+        {
+            uint result = 0;
+
+            for (int index = 0; index < Vector64<T>.Count; index++)
+            {
+                uint value = Scalar<T>.ExtractMostSignificantBit(vector.GetElementUnsafe(index));
+                value <<= index;
+                result |= value;
+            }
+
+            return result;
+        }
+
         /// <summary>Computes the floor of each element in a vector.</summary>
         /// <param name="vector">The vector that will have its floor computed.</param>
         /// <returns>A vector whose elements are the floor of the elements in <paramref name="vector" />.</returns>
@@ -1783,6 +1807,89 @@ namespace System.Runtime.Intrinsics
         public static bool LessThanOrEqualAny<T>(Vector64<T> left, Vector64<T> right)
             where T : struct => LessThanOrEqual(left, right).As<T, ulong>() != Vector64<ulong>.Zero;
 
+        /// <summary>Loads a vector from the given source.</summary>
+        /// <param name="source">The source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector64<T> Load<T>(T* source)
+            where T : unmanaged
+        {
+            return *(Vector64<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given aligned source.</summary>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector64<T> LoadAligned<T>(T* source)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+
+            if (((nuint)source % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            return *(Vector64<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given aligned source.</summary>
+        /// <param name="source">The aligned source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        /// <remarks>This method may bypass the cache on certain platforms.</remarks>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe Vector64<T> LoadAlignedNonTemporal<T>(T* source)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+
+            if (((nuint)source % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            return *(Vector64<T>*)source;
+        }
+
+        /// <summary>Loads a vector from the given source.</summary>
+        /// <param name="source">The source from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<T> LoadUnsafe<T>(ref T source)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            return Unsafe.ReadUnaligned<Vector64<T>>(ref Unsafe.As<T, byte>(ref source));
+        }
+
+        /// <summary>Loads a vector from the given source and element offset.</summary>
+        /// <param name="source">The source to which <paramref name="elementOffset" /> will be added before loading the vector.</param>
+        /// <param name="elementOffset">The element offset from <paramref name="source" /> from which the vector will be loaded.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The vector loaded from <paramref name="source" /> plus <paramref name="elementOffset" />.</returns>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<T> LoadUnsafe<T>(ref T source, nuint elementOffset)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            source = ref Unsafe.Add(ref source, (nint)elementOffset);
+            return Unsafe.ReadUnaligned<Vector64<T>>(ref Unsafe.As<T, byte>(ref source));
+        }
+
         /// <summary>Computes the maximum of two vectors on a per-element basis.</summary>
         /// <param name="left">The vector to compare with <paramref name="right" />.</param>
         /// <param name="right">The vector to compare with <paramref name="left" />.</param>
@@ -2043,6 +2150,492 @@ namespace System.Runtime.Intrinsics
         public static Vector64<T> OnesComplement<T>(Vector64<T> vector)
             where T : struct => ~vector;
 
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<byte> ShiftLeft(Vector64<byte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<byte> result);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<short> ShiftLeft(Vector64<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<short> result);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<int> ShiftLeft(Vector64<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<int> result);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<long> ShiftLeft(Vector64<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<long> result);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<nint> ShiftLeft(Vector64<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<nint> result);
+
+            for (int index = 0; index < Vector64<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<nuint> ShiftLeft(Vector64<nuint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<nuint> result);
+
+            for (int index = 0; index < Vector64<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<sbyte> ShiftLeft(Vector64<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<sbyte> result);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<ushort> ShiftLeft(Vector64<ushort> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<ushort> result);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<uint> ShiftLeft(Vector64<uint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<uint> result);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector left by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted left by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<ulong> ShiftLeft(Vector64<ulong> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<ulong> result);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftLeft(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<short> ShiftRightArithmetic(Vector64<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<short> result);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<int> ShiftRightArithmetic(Vector64<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<int> result);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<long> ShiftRightArithmetic(Vector64<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<long> result);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<nint> ShiftRightArithmetic(Vector64<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<nint> result);
+
+            for (int index = 0; index < Vector64<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<sbyte> ShiftRightArithmetic(Vector64<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<sbyte> result);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightArithmetic(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<byte> ShiftRightLogical(Vector64<byte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<byte> result);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                var element = Scalar<byte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<short> ShiftRightLogical(Vector64<short> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<short> result);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                var element = Scalar<short>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<int> ShiftRightLogical(Vector64<int> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<int> result);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                var element = Scalar<int>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<long> ShiftRightLogical(Vector64<long> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<long> result);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                var element = Scalar<long>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<nint> ShiftRightLogical(Vector64<nint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<nint> result);
+
+            for (int index = 0; index < Vector64<nint>.Count; index++)
+            {
+                var element = Scalar<nint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<nuint> ShiftRightLogical(Vector64<nuint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<nuint> result);
+
+            for (int index = 0; index < Vector64<nuint>.Count; index++)
+            {
+                var element = Scalar<nuint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<sbyte> ShiftRightLogical(Vector64<sbyte> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<sbyte> result);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                var element = Scalar<sbyte>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<ushort> ShiftRightLogical(Vector64<ushort> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<ushort> result);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                var element = Scalar<ushort>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<uint> ShiftRightLogical(Vector64<uint> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<uint> result);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                var element = Scalar<uint>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
+        /// <summary>Shifts each element of a vector right by the specified amount.</summary>
+        /// <param name="vector">The vector whose elements are to be shifted.</param>
+        /// <param name="shiftCount">The number of bits by which to shift each element.</param>
+        /// <returns>A vector whose elements where shifted right by <paramref name="shiftCount" />.</returns>
+        [Intrinsic]
+        [CLSCompliantAttribute(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static Vector64<ulong> ShiftRightLogical(Vector64<ulong> vector, int shiftCount)
+        {
+            Unsafe.SkipInit(out Vector64<ulong> result);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                var element = Scalar<ulong>.ShiftRightLogical(vector.GetElementUnsafe(index), shiftCount);
+                result.SetElementUnsafe(index, element);
+            }
+
+            return result;
+        }
+
         /// <summary>Computes the square root of a vector on a per-element basis.</summary>
         /// <param name="vector">The vector whose square root is to be computed.</param>
         /// <typeparam name="T">The type of the elements in the vector.</typeparam>
@@ -2062,6 +2655,89 @@ namespace System.Runtime.Intrinsics
             return result;
         }
 
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void Store<T>(this Vector64<T> source, T* destination)
+            where T : unmanaged
+        {
+            *(Vector64<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void StoreAligned<T>(this Vector64<T> source, T* destination)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+
+            if (((nuint)destination % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            *(Vector64<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given aligned destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The aligned destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <remarks>This method may bypass the cache on certain platforms.</remarks>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static unsafe void StoreAlignedNonTemporal<T>(this Vector64<T> source, T* destination)
+            where T : unmanaged
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+
+            if (((nuint)destination % Alignment) != 0)
+            {
+                throw new AccessViolationException();
+            }
+
+            *(Vector64<T>*)destination = source;
+        }
+
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination at which <paramref name="source" /> will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void StoreUnsafe<T>(this Vector64<T> source, ref T destination)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            Unsafe.WriteUnaligned<Vector64<T>>(ref Unsafe.As<T, byte>(ref destination), source);
+        }
+
+        /// <summary>Stores a vector at the given destination.</summary>
+        /// <param name="source">The vector that will be stored.</param>
+        /// <param name="destination">The destination to which <paramref name="elementOffset" /> will be added before the vector will be stored.</param>
+        /// <param name="elementOffset">The element offset from <paramref name="destination" /> from which the vector will be stored.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        [Intrinsic]
+        [CLSCompliant(false)]
+        [MethodImpl(MethodImplOptions.AggressiveInlining)]
+        public static void StoreUnsafe<T>(this Vector64<T> source, ref T destination, nuint elementOffset)
+            where T : struct
+        {
+            ThrowHelper.ThrowForUnsupportedIntrinsicsVector64BaseType<T>();
+            destination = ref Unsafe.Add(ref destination, (nint)elementOffset);
+            Unsafe.WriteUnaligned<Vector64<T>>(ref Unsafe.As<T, byte>(ref destination), source);
+        }
+
         /// <summary>Subtracts two vectors to compute their difference.</summary>
         /// <param name="left">The vector from which <paramref name="right" /> will be subtracted.</param>
         /// <param name="right">The vector to subtract from <paramref name="left" />.</param>
@@ -2072,6 +2748,24 @@ namespace System.Runtime.Intrinsics
         public static Vector64<T> Subtract<T>(Vector64<T> left, Vector64<T> right)
             where T : struct => left - right;
 
+        /// <summary>Computes the sum of all elements in a vector.</summary>
+        /// <param name="vector">The vector whose elements will be summed.</param>
+        /// <typeparam name="T">The type of the elements in the vector.</typeparam>
+        /// <returns>The sum of all elements in <paramref name="vector" />.</returns>
+        [Intrinsic]
+        public static T Sum<T>(Vector64<T> vector)
+            where T : struct
+        {
+            T sum = default;
+
+            for (int index = 0; index < Vector64<T>.Count; index++)
+            {
+                sum = Scalar<T>.Add(sum, vector.GetElementUnsafe(index));
+            }
+
+            return sum;
+        }
+
         /// <summary>Converts the given vector to a scalar containing the value of the first element.</summary>
         /// <typeparam name="T">The type of the input vector.</typeparam>
         /// <param name="vector">The vector to get the first element from.</param>
index 4130c42..b43a1cf 100644 (file)
@@ -52,6 +52,8 @@ namespace System.Runtime.Intrinsics
                    (typeof(T) == typeof(short)) ||
                    (typeof(T) == typeof(int)) ||
                    (typeof(T) == typeof(long)) ||
+                   (typeof(T) == typeof(nint)) ||
+                   (typeof(T) == typeof(nuint)) ||
                    (typeof(T) == typeof(sbyte)) ||
                    (typeof(T) == typeof(float)) ||
                    (typeof(T) == typeof(ushort)) ||
index 4cf2e0e..d1fedae 100644 (file)
@@ -11,4 +11,5 @@
 [assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Runtime.InteropServices.InAttribute))]
 [assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Runtime.InteropServices.SafeBuffer))]
 [assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Runtime.InteropServices.SafeHandle))]
+[assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Runtime.InteropServices.UnmanagedType))]
 [assembly: System.Runtime.CompilerServices.TypeForwardedTo(typeof(System.Reflection.Missing))]
index 6fa5eb1..901b763 100644 (file)
@@ -999,61 +999,6 @@ namespace System.Runtime.InteropServices
         public UnmanagedFunctionPointerAttribute(System.Runtime.InteropServices.CallingConvention callingConvention) { }
         public System.Runtime.InteropServices.CallingConvention CallingConvention { get { throw null; } }
     }
-    public enum UnmanagedType
-    {
-        Bool = 2,
-        I1 = 3,
-        U1 = 4,
-        I2 = 5,
-        U2 = 6,
-        I4 = 7,
-        U4 = 8,
-        I8 = 9,
-        U8 = 10,
-        R4 = 11,
-        R8 = 12,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        [System.ObsoleteAttribute("Marshalling as Currency may be unavailable in future releases.")]
-        Currency = 15,
-        BStr = 19,
-        LPStr = 20,
-        LPWStr = 21,
-        LPTStr = 22,
-        ByValTStr = 23,
-        IUnknown = 25,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        IDispatch = 26,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        Struct = 27,
-        Interface = 28,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        SafeArray = 29,
-        ByValArray = 30,
-        SysInt = 31,
-        SysUInt = 32,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        [System.ObsoleteAttribute("Marshalling as VBByRefString may be unavailable in future releases.")]
-        VBByRefStr = 34,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        [System.ObsoleteAttribute("Marshalling as AnsiBStr may be unavailable in future releases.")]
-        AnsiBStr = 35,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        [System.ObsoleteAttribute("Marshalling as TBstr may be unavailable in future releases.")]
-        TBStr = 36,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        VariantBool = 37,
-        FunctionPtr = 38,
-        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
-        [System.ObsoleteAttribute("Marshalling arbitrary types may be unavailable in future releases. Specify the type you wish to marshal as.")]
-        AsAny = 40,
-        LPArray = 42,
-        LPStruct = 43,
-        CustomMarshaler = 44,
-        Error = 45,
-        IInspectable = 46,
-        HString = 47,
-        LPUTF8Str = 48,
-    }
     [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
     public enum VarEnum
     {
index bc79e1e..68f2b60 100644 (file)
@@ -146,6 +146,8 @@ namespace System.Runtime.Intrinsics
         public static bool EqualsAll<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
         public static bool EqualsAny<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Equals<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static uint ExtractMostSignificantBits<T>(this System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Double> Floor(System.Runtime.Intrinsics.Vector128<System.Double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<System.Single> Floor(System.Runtime.Intrinsics.Vector128<System.Single> vector) { throw null; }
         public static T GetElement<T>(this System.Runtime.Intrinsics.Vector128<T> vector, int index) where T : struct { throw null; }
@@ -163,6 +165,15 @@ namespace System.Runtime.Intrinsics
         public static bool LessThanOrEqualAny<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> LessThanOrEqual<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> LessThan<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector128<T> Load<T>(T* source) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector128<T> LoadAligned<T>(T* source) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector128<T> LoadAlignedNonTemporal<T>(T* source) where T : unmanaged { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<T> LoadUnsafe<T>(ref T source) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<T> LoadUnsafe<T>(ref T source, nuint elementOffset) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Max<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Min<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Multiply<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
@@ -181,8 +192,54 @@ namespace System.Runtime.Intrinsics
         public static System.Runtime.Intrinsics.Vector128<System.UInt32> Narrow(System.Runtime.Intrinsics.Vector128<System.UInt64> lower, System.Runtime.Intrinsics.Vector128<System.UInt64> upper) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Negate<T>(System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> OnesComplement<T>(System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Byte> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.Byte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int16> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int32> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int64> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<nint> ShiftLeft(System.Runtime.Intrinsics.Vector128<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<nuint> ShiftLeft(System.Runtime.Intrinsics.Vector128<nuint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.SByte> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.SByte> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.UInt16> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.UInt16> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.UInt32> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.UInt32> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.UInt64> ShiftLeft(System.Runtime.Intrinsics.Vector128<System.UInt64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int16> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector128<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int32> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector128<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int64> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector128<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<nint> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector128<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.SByte> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector128<System.SByte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Byte> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.Byte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int16> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int32> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<System.Int64> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<nint> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<nuint> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<nuint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.SByte> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.SByte> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.UInt16> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.UInt16> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.UInt32> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.UInt32> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector128<System.UInt64> ShiftRightLogical(System.Runtime.Intrinsics.Vector128<System.UInt64> vector, int shiftCount) { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Sqrt<T>(System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void Store<T>(this System.Runtime.Intrinsics.Vector128<T> source, T* destination) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void StoreAligned<T>(this System.Runtime.Intrinsics.Vector128<T> source, T* destination) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void StoreAlignedNonTemporal<T>(this System.Runtime.Intrinsics.Vector128<T> source, T* destination) where T : unmanaged { throw null; }
+        public static void StoreUnsafe<T>(this System.Runtime.Intrinsics.Vector128<T> source, ref T destination) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static void StoreUnsafe<T>(this System.Runtime.Intrinsics.Vector128<T> source, ref T destination, nuint elementOffset) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> Subtract<T>(System.Runtime.Intrinsics.Vector128<T> left, System.Runtime.Intrinsics.Vector128<T> right) where T : struct { throw null; }
+        public static T Sum<T>(System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
         public static T ToScalar<T>(this System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> ToVector256Unsafe<T>(this System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> ToVector256<T>(this System.Runtime.Intrinsics.Vector128<T> vector) where T : struct { throw null; }
@@ -363,6 +420,8 @@ namespace System.Runtime.Intrinsics
         public static bool EqualsAll<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
         public static bool EqualsAny<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Equals<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static uint ExtractMostSignificantBits<T>(this System.Runtime.Intrinsics.Vector256<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Double> Floor(System.Runtime.Intrinsics.Vector256<System.Double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<System.Single> Floor(System.Runtime.Intrinsics.Vector256<System.Single> vector) { throw null; }
         public static T GetElement<T>(this System.Runtime.Intrinsics.Vector256<T> vector, int index) where T : struct { throw null; }
@@ -380,6 +439,15 @@ namespace System.Runtime.Intrinsics
         public static bool LessThanOrEqualAny<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> LessThanOrEqual<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> LessThan<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector256<T> Load<T>(T* source) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector256<T> LoadAligned<T>(T* source) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector256<T> LoadAlignedNonTemporal<T>(T* source) where T : unmanaged { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<T> LoadUnsafe<T>(ref T source) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<T> LoadUnsafe<T>(ref T source, nuint elementOffset) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Max<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Min<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Multiply<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
@@ -398,8 +466,54 @@ namespace System.Runtime.Intrinsics
         public static System.Runtime.Intrinsics.Vector256<System.UInt32> Narrow(System.Runtime.Intrinsics.Vector256<System.UInt64> lower, System.Runtime.Intrinsics.Vector256<System.UInt64> upper) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Negate<T>(System.Runtime.Intrinsics.Vector256<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> OnesComplement<T>(System.Runtime.Intrinsics.Vector256<T> vector) where T : struct { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Byte> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.Byte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int16> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int32> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int64> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<nint> ShiftLeft(System.Runtime.Intrinsics.Vector256<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<nuint> ShiftLeft(System.Runtime.Intrinsics.Vector256<nuint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.SByte> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.SByte> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.UInt16> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.UInt16> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.UInt32> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.UInt32> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.UInt64> ShiftLeft(System.Runtime.Intrinsics.Vector256<System.UInt64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int16> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int32> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int64> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<nint> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.SByte> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector256<System.SByte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Byte> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.Byte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int16> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int32> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<System.Int64> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<nint> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<nuint> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<nuint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.SByte> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.SByte> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.UInt16> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.UInt16> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.UInt32> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.UInt32> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector256<System.UInt64> ShiftRightLogical(System.Runtime.Intrinsics.Vector256<System.UInt64> vector, int shiftCount) { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Sqrt<T>(System.Runtime.Intrinsics.Vector256<T> vector) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void Store<T>(this System.Runtime.Intrinsics.Vector256<T> source, T* destination) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void StoreAligned<T>(this System.Runtime.Intrinsics.Vector256<T> source, T* destination) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void StoreAlignedNonTemporal<T>(this System.Runtime.Intrinsics.Vector256<T> source, T* destination) where T : unmanaged { throw null; }
+        public static void StoreUnsafe<T>(this System.Runtime.Intrinsics.Vector256<T> source, ref T destination) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static void StoreUnsafe<T>(this System.Runtime.Intrinsics.Vector256<T> source, ref T destination, nuint elementOffset) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector256<T> Subtract<T>(System.Runtime.Intrinsics.Vector256<T> left, System.Runtime.Intrinsics.Vector256<T> right) where T : struct { throw null; }
+        public static T Sum<T>(System.Runtime.Intrinsics.Vector256<T> vector) where T : struct { throw null; }
         public static T ToScalar<T>(this System.Runtime.Intrinsics.Vector256<T> vector) where T : struct { throw null; }
         public static bool TryCopyTo<T>(this System.Runtime.Intrinsics.Vector256<T> vector, System.Span<T> destination) where T : struct { throw null; }
         [System.CLSCompliantAttribute(false)]
@@ -554,6 +668,8 @@ namespace System.Runtime.Intrinsics
         public static bool EqualsAll<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
         public static bool EqualsAny<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Equals<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static uint ExtractMostSignificantBits<T>(this System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Double> Floor(System.Runtime.Intrinsics.Vector64<System.Double> vector) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<System.Single> Floor(System.Runtime.Intrinsics.Vector64<System.Single> vector) { throw null; }
         public static T GetElement<T>(this System.Runtime.Intrinsics.Vector64<T> vector, int index) where T : struct { throw null; }
@@ -569,6 +685,15 @@ namespace System.Runtime.Intrinsics
         public static bool LessThanOrEqualAny<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> LessThanOrEqual<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> LessThan<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector64<T> Load<T>(T* source) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector64<T> LoadAligned<T>(T* source) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe System.Runtime.Intrinsics.Vector64<T> LoadAlignedNonTemporal<T>(T* source) where T : unmanaged { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<T> LoadUnsafe<T>(ref T source) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<T> LoadUnsafe<T>(ref T source, nuint elementOffset) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Max<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Min<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Multiply<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
@@ -587,8 +712,54 @@ namespace System.Runtime.Intrinsics
         public static System.Runtime.Intrinsics.Vector64<System.UInt32> Narrow(System.Runtime.Intrinsics.Vector64<System.UInt64> lower, System.Runtime.Intrinsics.Vector64<System.UInt64> upper) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Negate<T>(System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> OnesComplement<T>(System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Byte> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.Byte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int16> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int32> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int64> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<nint> ShiftLeft(System.Runtime.Intrinsics.Vector64<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<nuint> ShiftLeft(System.Runtime.Intrinsics.Vector64<nuint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.SByte> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.SByte> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.UInt16> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.UInt16> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.UInt32> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.UInt32> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.UInt64> ShiftLeft(System.Runtime.Intrinsics.Vector64<System.UInt64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int16> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector64<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int32> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector64<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int64> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector64<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<nint> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector64<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.SByte> ShiftRightArithmetic(System.Runtime.Intrinsics.Vector64<System.SByte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Byte> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.Byte> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int16> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.Int16> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int32> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.Int32> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<System.Int64> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.Int64> vector, int shiftCount) { throw null; }
+        public static System.Runtime.Intrinsics.Vector64<nint> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<nint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<nuint> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<nuint> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.SByte> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.SByte> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.UInt16> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.UInt16> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.UInt32> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.UInt32> vector, int shiftCount) { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static System.Runtime.Intrinsics.Vector64<System.UInt64> ShiftRightLogical(System.Runtime.Intrinsics.Vector64<System.UInt64> vector, int shiftCount) { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Sqrt<T>(System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void Store<T>(this System.Runtime.Intrinsics.Vector64<T> source, T* destination) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void StoreAligned<T>(this System.Runtime.Intrinsics.Vector64<T> source, T* destination) where T : unmanaged { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static unsafe void StoreAlignedNonTemporal<T>(this System.Runtime.Intrinsics.Vector64<T> source, T* destination) where T : unmanaged { throw null; }
+        public static void StoreUnsafe<T>(this System.Runtime.Intrinsics.Vector64<T> source, ref T destination) where T : struct { throw null; }
+        [System.CLSCompliantAttribute(false)]
+        public static void StoreUnsafe<T>(this System.Runtime.Intrinsics.Vector64<T> source, ref T destination, nuint elementOffset) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector64<T> Subtract<T>(System.Runtime.Intrinsics.Vector64<T> left, System.Runtime.Intrinsics.Vector64<T> right) where T : struct { throw null; }
+        public static T Sum<T>(System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
         public static T ToScalar<T>(this System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> ToVector128Unsafe<T>(this System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
         public static System.Runtime.Intrinsics.Vector128<T> ToVector128<T>(this System.Runtime.Intrinsics.Vector64<T> vector) where T : struct { throw null; }
index 7fa5060..e6a39a0 100644 (file)
 ï»¿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Linq;
+using System.Runtime.InteropServices;
 using Xunit;
 
 namespace System.Runtime.Intrinsics.Tests.Vectors
 {
     public sealed class Vector128Tests
     {
+        [Fact]
+        public unsafe void Vector128ByteExtractMostSignificantBitsTest()
+        {
+            Vector128<byte> vector = Vector128.Create(
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80
+            );
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010_10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleExtractMostSignificantBitsTest()
+        {
+            Vector128<double> vector = Vector128.Create(
+                +1.0,
+                -0.0
+            );
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16ExtractMostSignificantBitsTest()
+        {
+            Vector128<short> vector = Vector128.Create(
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000
+            ).AsInt16();
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32ExtractMostSignificantBitsTest()
+        {
+            Vector128<int> vector = Vector128.Create(
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U
+            ).AsInt32();
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64ExtractMostSignificantBitsTest()
+        {
+            Vector128<long> vector = Vector128.Create(
+                0x0000000000000001UL,
+                0x8000000000000000UL
+            ).AsInt64();
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntExtractMostSignificantBitsTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector128<nint> vector = Vector128.Create(
+                    0x0000000000000001UL,
+                    0x8000000000000000UL
+                ).AsNInt();
+
+                uint result = Vector128.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b10u, result);
+            }
+            else
+            {
+                Vector128<nint> vector = Vector128.Create(
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U
+                ).AsNInt();
+
+                uint result = Vector128.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b1010u, result);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntExtractMostSignificantBitsTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector128<nuint> vector = Vector128.Create(
+                    0x0000000000000001UL,
+                    0x8000000000000000UL
+                ).AsNUInt();
+
+                uint result = Vector128.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b10u, result);
+            }
+            else
+            {
+                Vector128<nuint> vector = Vector128.Create(
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U
+                ).AsNUInt();
+
+                uint result = Vector128.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b1010u, result);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteExtractMostSignificantBitsTest()
+        {
+            Vector128<sbyte> vector = Vector128.Create(
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80
+            ).AsSByte();
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010_10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleExtractMostSignificantBitsTest()
+        {
+            Vector128<float> vector = Vector128.Create(
+                +1.0f,
+                -0.0f,
+                +1.0f,
+                -0.0f
+            );
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16ExtractMostSignificantBitsTest()
+        {
+            Vector128<ushort> vector = Vector128.Create(
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000
+            );
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32ExtractMostSignificantBitsTest()
+        {
+            Vector128<uint> vector = Vector128.Create(
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U
+            );
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64ExtractMostSignificantBitsTest()
+        {
+            Vector128<ulong> vector = Vector128.Create(
+                0x0000000000000001UL,
+                0x8000000000000000UL
+            );
+
+            uint result = Vector128.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteLoadTest()
+        {
+            byte* value = stackalloc byte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128<byte> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleLoadTest()
+        {
+            double* value = stackalloc double[2] {
+                0,
+                1,
+            };
+
+            Vector128<double> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<double>.Count; index++)
+            {
+                Assert.Equal((double)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16LoadTest()
+        {
+            short* value = stackalloc short[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128<short> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32LoadTest()
+        {
+            int* value = stackalloc int[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128<int> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64LoadTest()
+        {
+            long* value = stackalloc long[2] {
+                0,
+                1,
+            };
+
+            Vector128<long> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntLoadTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128<nint> vector = Vector128.Load(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128<nint> vector = Vector128.Load(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntLoadTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128<nuint> vector = Vector128.Load(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128<nuint> vector = Vector128.Load(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteLoadTest()
+        {
+            sbyte* value = stackalloc sbyte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128<sbyte> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleLoadTest()
+        {
+            float* value = stackalloc float[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128<float> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<float>.Count; index++)
+            {
+                Assert.Equal((float)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16LoadTest()
+        {
+            ushort* value = stackalloc ushort[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128<ushort> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32LoadTest()
+        {
+            uint* value = stackalloc uint[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128<uint> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64LoadTest()
+        {
+            ulong* value = stackalloc ulong[2] {
+                0,
+                1,
+            };
+
+            Vector128<ulong> vector = Vector128.Load(value);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteLoadAlignedTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128<byte> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleLoadAlignedTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128<double> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<double>.Count; index++)
+                {
+                    Assert.Equal((double)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16LoadAlignedTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128<short> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<short>.Count; index++)
+                {
+                    Assert.Equal((short)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32LoadAlignedTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128<int> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<int>.Count; index++)
+                {
+                    Assert.Equal((int)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64LoadAlignedTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128<long> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<long>.Count; index++)
+                {
+                    Assert.Equal((long)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntLoadAlignedTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128<nint> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntLoadAlignedTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128<nuint> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteLoadAlignedTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128<sbyte> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleLoadAlignedTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128<float> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<float>.Count; index++)
+                {
+                    Assert.Equal((float)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16LoadAlignedTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128<ushort> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32LoadAlignedTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128<uint> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64LoadAlignedTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128<ulong> vector = Vector128.LoadAligned(value);
+
+                for (int index = 0; index < Vector128<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteLoadAlignedNonTemporalTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128<byte> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleLoadAlignedNonTemporalTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128<double> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<double>.Count; index++)
+                {
+                    Assert.Equal((double)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16LoadAlignedNonTemporalTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128<short> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<short>.Count; index++)
+                {
+                    Assert.Equal((short)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32LoadAlignedNonTemporalTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128<int> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<int>.Count; index++)
+                {
+                    Assert.Equal((int)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64LoadAlignedNonTemporalTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128<long> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<long>.Count; index++)
+                {
+                    Assert.Equal((long)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntLoadAlignedNonTemporalTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128<nint> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntLoadAlignedNonTemporalTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128<nuint> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteLoadAlignedNonTemporalTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128<sbyte> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleLoadAlignedNonTemporalTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128<float> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<float>.Count; index++)
+                {
+                    Assert.Equal((float)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16LoadAlignedNonTemporalTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128<ushort> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32LoadAlignedNonTemporalTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128<uint> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64LoadAlignedNonTemporalTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128<ulong> vector = Vector128.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteLoadUnsafeTest()
+        {
+            byte* value = stackalloc byte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128<byte> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleLoadUnsafeTest()
+        {
+            double* value = stackalloc double[2] {
+                0,
+                1,
+            };
+
+            Vector128<double> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<double>.Count; index++)
+            {
+                Assert.Equal((double)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16LoadUnsafeTest()
+        {
+            short* value = stackalloc short[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128<short> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32LoadUnsafeTest()
+        {
+            int* value = stackalloc int[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128<int> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64LoadUnsafeTest()
+        {
+            long* value = stackalloc long[2] {
+                0,
+                1,
+            };
+
+            Vector128<long> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntLoadUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128<nint> vector = Vector128.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128<nint> vector = Vector128.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntLoadUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128<nuint> vector = Vector128.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128<nuint> vector = Vector128.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteLoadUnsafeTest()
+        {
+            sbyte* value = stackalloc sbyte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128<sbyte> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleLoadUnsafeTest()
+        {
+            float* value = stackalloc float[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128<float> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<float>.Count; index++)
+            {
+                Assert.Equal((float)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16LoadUnsafeTest()
+        {
+            ushort* value = stackalloc ushort[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128<ushort> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32LoadUnsafeTest()
+        {
+            uint* value = stackalloc uint[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128<uint> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64LoadUnsafeTest()
+        {
+            ulong* value = stackalloc ulong[2] {
+                0,
+                1,
+            };
+
+            Vector128<ulong> vector = Vector128.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteLoadUnsafeIndexTest()
+        {
+            byte* value = stackalloc byte[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector128<byte> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleLoadUnsafeIndexTest()
+        {
+            double* value = stackalloc double[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector128<double> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<double>.Count; index++)
+            {
+                Assert.Equal((double)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16LoadUnsafeIndexTest()
+        {
+            short* value = stackalloc short[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector128<short> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32LoadUnsafeIndexTest()
+        {
+            int* value = stackalloc int[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector128<int> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64LoadUnsafeIndexTest()
+        {
+            long* value = stackalloc long[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector128<long> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntLoadUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector128<nint> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)(index + 1), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector128<nint> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)(index + 1), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntLoadUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector128<nuint> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)(index + 1), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector128<nuint> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)(index + 1), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteLoadUnsafeIndexTest()
+        {
+            sbyte* value = stackalloc sbyte[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector128<sbyte> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleLoadUnsafeIndexTest()
+        {
+            float* value = stackalloc float[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector128<float> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<float>.Count; index++)
+            {
+                Assert.Equal((float)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16LoadUnsafeIndexTest()
+        {
+            ushort* value = stackalloc ushort[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector128<ushort> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32LoadUnsafeIndexTest()
+        {
+            uint* value = stackalloc uint[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector128<uint> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64LoadUnsafeIndexTest()
+        {
+            ulong* value = stackalloc ulong[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector128<ulong> vector = Vector128.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128ByteShiftLeftTest()
+        {
+            Vector128<byte> vector = Vector128.Create((byte)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int16ShiftLeftTest()
+        {
+            Vector128<short> vector = Vector128.Create((short)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int32ShiftLeftTest()
+        {
+            Vector128<int> vector = Vector128.Create((int)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int64ShiftLeftTest()
+        {
+            Vector128<long> vector = Vector128.Create((long)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128NIntShiftLeftTest()
+        {
+            Vector128<nint> vector = Vector128.Create((nint)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<nint>.Count; index++)
+            {
+                Assert.Equal((nint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128NUIntShiftLeftTest()
+        {
+            Vector128<nuint> vector = Vector128.Create((nuint)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<nuint>.Count; index++)
+            {
+                Assert.Equal((nuint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128SByteShiftLeftTest()
+        {
+            Vector128<sbyte> vector = Vector128.Create((sbyte)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128UInt16ShiftLeftTest()
+        {
+            Vector128<ushort> vector = Vector128.Create((ushort)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128UInt32ShiftLeftTest()
+        {
+            Vector128<uint> vector = Vector128.Create((uint)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128UInt64ShiftLeftTest()
+        {
+            Vector128<ulong> vector = Vector128.Create((ulong)0x01);
+            vector = Vector128.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int16ShiftRightArithmeticTest()
+        {
+            Vector128<short> vector = Vector128.Create(unchecked((short)0x8000));
+            vector = Vector128.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal(unchecked((short)0xF800), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int32ShiftRightArithmeticTest()
+        {
+            Vector128<int> vector = Vector128.Create(unchecked((int)0x80000000));
+            vector = Vector128.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal(unchecked((int)0xF8000000), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int64ShiftRightArithmeticTest()
+        {
+            Vector128<long> vector = Vector128.Create(unchecked((long)0x8000000000000000));
+            vector = Vector128.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal(unchecked((long)0xF800000000000000), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128NIntShiftRightArithmeticTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector128<nint> vector = Vector128.Create(unchecked((nint)0x8000000000000000));
+                vector = Vector128.ShiftRightArithmetic(vector, 4);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0xF800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector128<nint> vector = Vector128.Create(unchecked((nint)0x80000000));
+                vector = Vector128.ShiftRightArithmetic(vector, 4);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0xF8000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector128SByteShiftRightArithmeticTest()
+        {
+            Vector128<sbyte> vector = Vector128.Create(unchecked((sbyte)0x80));
+            vector = Vector128.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal(unchecked((sbyte)0xF8), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128ByteShiftRightLogicalTest()
+        {
+            Vector128<byte> vector = Vector128.Create((byte)0x80);
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x08, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int16ShiftRightLogicalTest()
+        {
+            Vector128<short> vector = Vector128.Create(unchecked((short)0x8000));
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)0x0800, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int32ShiftRightLogicalTest()
+        {
+            Vector128<int> vector = Vector128.Create(unchecked((int)0x80000000));
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)0x08000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128Int64ShiftRightLogicalTest()
+        {
+            Vector128<long> vector = Vector128.Create(unchecked((long)0x8000000000000000));
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)0x0800000000000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128NIntShiftRightLogicalTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector128<nint> vector = Vector128.Create(unchecked((nint)0x8000000000000000));
+                vector = Vector128.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0x0800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector128<nint> vector = Vector128.Create(unchecked((nint)0x80000000));
+                vector = Vector128.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0x08000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector128NUIntShiftRightLogicalTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector128<nuint> vector = Vector128.Create(unchecked((nuint)0x8000000000000000));
+                vector = Vector128.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nuint)0x0800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector128<nuint> vector = Vector128.Create(unchecked((nuint)0x80000000));
+                vector = Vector128.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nuint)0x08000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector128SByteShiftRightLogicalTest()
+        {
+            Vector128<sbyte> vector = Vector128.Create(unchecked((sbyte)0x80));
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x08, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128UInt16ShiftRightLogicalTest()
+        {
+            Vector128<ushort> vector = Vector128.Create(unchecked((ushort)0x8000));
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x0800, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128UInt32ShiftRightLogicalTest()
+        {
+            Vector128<uint> vector = Vector128.Create(0x80000000);
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x08000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector128UInt64ShiftRightLogicalTest()
+        {
+            Vector128<ulong> vector = Vector128.Create(0x8000000000000000);
+            vector = Vector128.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x0800000000000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteStoreTest()
+        {
+            byte* value = stackalloc byte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128.Create((byte)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleStoreTest()
+        {
+            double* value = stackalloc double[2] {
+                0,
+                1,
+            };
+
+            Vector128.Create((double)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16StoreTest()
+        {
+            short* value = stackalloc short[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128.Create((short)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32StoreTest()
+        {
+            int* value = stackalloc int[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128.Create((int)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64StoreTest()
+        {
+            long* value = stackalloc long[2] {
+                0,
+                1,
+            };
+
+            Vector128.Create((long)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntStoreTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128.Create((nint)0x1).Store(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128.Create((nint)0x1).Store(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntStoreTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128.Create((nuint)0x1).Store(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128.Create((nuint)0x1).Store(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteStoreTest()
+        {
+            sbyte* value = stackalloc sbyte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128.Create((sbyte)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleStoreTest()
+        {
+            float* value = stackalloc float[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128.Create((float)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16StoreTest()
+        {
+            ushort* value = stackalloc ushort[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128.Create((ushort)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32StoreTest()
+        {
+            uint* value = stackalloc uint[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128.Create((uint)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64StoreTest()
+        {
+            ulong* value = stackalloc ulong[2] {
+                0,
+                1,
+            };
+
+            Vector128.Create((ulong)0x1).Store(value);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteStoreAlignedTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128.Create((byte)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleStoreAlignedTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128.Create((double)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<double>.Count; index++)
+                {
+                    Assert.Equal((double)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16StoreAlignedTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128.Create((short)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<short>.Count; index++)
+                {
+                    Assert.Equal((short)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32StoreAlignedTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128.Create((int)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<int>.Count; index++)
+                {
+                    Assert.Equal((int)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64StoreAlignedTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128.Create((long)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<long>.Count; index++)
+                {
+                    Assert.Equal((long)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntStoreAlignedTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128.Create((nint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntStoreAlignedTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128.Create((nuint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteStoreAlignedTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128.Create((sbyte)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleStoreAlignedTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128.Create((float)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<float>.Count; index++)
+                {
+                    Assert.Equal((float)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16StoreAlignedTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128.Create((ushort)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32StoreAlignedTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128.Create((uint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64StoreAlignedTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128.Create((ulong)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector128<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteStoreAlignedNonTemporalTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128.Create((byte)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleStoreAlignedNonTemporalTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128.Create((double)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<double>.Count; index++)
+                {
+                    Assert.Equal((double)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16StoreAlignedNonTemporalTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128.Create((short)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<short>.Count; index++)
+                {
+                    Assert.Equal((short)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32StoreAlignedNonTemporalTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128.Create((int)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<int>.Count; index++)
+                {
+                    Assert.Equal((int)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64StoreAlignedNonTemporalTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128.Create((long)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<long>.Count; index++)
+                {
+                    Assert.Equal((long)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntStoreAlignedNonTemporalTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128.Create((nint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntStoreAlignedNonTemporalTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+
+                Vector128.Create((nuint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteStoreAlignedNonTemporalTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector128.Create((sbyte)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleStoreAlignedNonTemporalTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128.Create((float)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<float>.Count; index++)
+                {
+                    Assert.Equal((float)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16StoreAlignedNonTemporalTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector128.Create((ushort)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32StoreAlignedNonTemporalTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector128.Create((uint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64StoreAlignedNonTemporalTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 16, alignment: 16);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector128.Create((ulong)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector128<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteStoreUnsafeTest()
+        {
+            byte* value = stackalloc byte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128.Create((byte)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleStoreUnsafeTest()
+        {
+            double* value = stackalloc double[2] {
+                0,
+                1,
+            };
+
+            Vector128.Create((double)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16StoreUnsafeTest()
+        {
+            short* value = stackalloc short[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128.Create((short)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32StoreUnsafeTest()
+        {
+            int* value = stackalloc int[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128.Create((int)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64StoreUnsafeTest()
+        {
+            long* value = stackalloc long[2] {
+                0,
+                1,
+            };
+
+            Vector128.Create((long)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntStoreUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128.Create((nint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128.Create((nint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntStoreUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector128.Create((nuint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector128.Create((nuint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteStoreUnsafeTest()
+        {
+            sbyte* value = stackalloc sbyte[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector128.Create((sbyte)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleStoreUnsafeTest()
+        {
+            float* value = stackalloc float[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128.Create((float)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16StoreUnsafeTest()
+        {
+            ushort* value = stackalloc ushort[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector128.Create((ushort)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32StoreUnsafeTest()
+        {
+            uint* value = stackalloc uint[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector128.Create((uint)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64StoreUnsafeTest()
+        {
+            ulong* value = stackalloc ulong[2] {
+                0,
+                1,
+            };
+
+            Vector128.Create((ulong)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128ByteStoreUnsafeIndexTest()
+        {
+            byte* value = stackalloc byte[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector128.Create((byte)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128DoubleStoreUnsafeIndexTest()
+        {
+            double* value = stackalloc double[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector128.Create((double)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int16StoreUnsafeIndexTest()
+        {
+            short* value = stackalloc short[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector128.Create((short)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int32StoreUnsafeIndexTest()
+        {
+            int* value = stackalloc int[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector128.Create((int)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128Int64StoreUnsafeIndexTest()
+        {
+            long* value = stackalloc long[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector128.Create((long)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NIntStoreUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector128.Create((nint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index + 1]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector128.Create((nint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index + 1]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128NUIntStoreUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector128.Create((nuint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index + 1]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector128.Create((nuint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector128<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index + 1]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SByteStoreUnsafeIndexTest()
+        {
+            sbyte* value = stackalloc sbyte[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector128.Create((sbyte)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128SingleStoreUnsafeIndexTest()
+        {
+            float* value = stackalloc float[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector128.Create((float)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt16StoreUnsafeIndexTest()
+        {
+            ushort* value = stackalloc ushort[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector128.Create((ushort)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt32StoreUnsafeIndexTest()
+        {
+            uint* value = stackalloc uint[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector128.Create((uint)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector128UInt64StoreUnsafeIndexTest()
+        {
+            ulong* value = stackalloc ulong[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector128.Create((ulong)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector128<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public void Vector128ByteSumTest()
+        {
+            Vector128<byte> vector = Vector128.Create((byte)0x01);
+            Assert.Equal((byte)16, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128DoubleSumTest()
+        {
+            Vector128<double> vector = Vector128.Create((double)0x01);
+            Assert.Equal(2.0, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128Int16SumTest()
+        {
+            Vector128<short> vector = Vector128.Create((short)0x01);
+            Assert.Equal((short)8, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128Int32SumTest()
+        {
+            Vector128<int> vector = Vector128.Create((int)0x01);
+            Assert.Equal((int)4, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128Int64SumTest()
+        {
+            Vector128<long> vector = Vector128.Create((long)0x01);
+            Assert.Equal((long)2, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128NIntSumTest()
+        {
+            Vector128<nint> vector = Vector128.Create((nint)0x01);
+
+            if (Environment.Is64BitProcess)
+            {
+                Assert.Equal((nint)2, Vector128.Sum(vector));
+            }
+            else
+            {
+                Assert.Equal((nint)4, Vector128.Sum(vector));
+            }
+        }
+
+        [Fact]
+        public void Vector128NUIntSumTest()
+        {
+            Vector128<nuint> vector = Vector128.Create((nuint)0x01);
+
+            if (Environment.Is64BitProcess)
+            {
+                Assert.Equal((nuint)2, Vector128.Sum(vector));
+            }
+            else
+            {
+                Assert.Equal((nuint)4, Vector128.Sum(vector));
+            }
+        }
+
+        [Fact]
+        public void Vector128SByteSumTest()
+        {
+            Vector128<sbyte> vector = Vector128.Create((sbyte)0x01);
+            Assert.Equal((sbyte)16, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128SingleSumTest()
+        {
+            Vector128<float> vector = Vector128.Create((float)0x01);
+            Assert.Equal(4.0f, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128UInt16SumTest()
+        {
+            Vector128<ushort> vector = Vector128.Create((ushort)0x01);
+            Assert.Equal((ushort)8, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128UInt32SumTest()
+        {
+            Vector128<uint> vector = Vector128.Create((uint)0x01);
+            Assert.Equal((uint)4, Vector128.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector128UInt64SumTest()
+        {
+            Vector128<ulong> vector = Vector128.Create((ulong)0x01);
+            Assert.Equal((ulong)2, Vector128.Sum(vector));
+        }
+
         [Theory]
         [InlineData(0, 0, 0, 0)]
         [InlineData(1, 1, 1, 1)]
index e14349c..13727ba 100644 (file)
 ï»¿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Linq;
+using System.Runtime.InteropServices;
 using Xunit;
 
 namespace System.Runtime.Intrinsics.Tests.Vectors
 {
     public sealed class Vector256Tests
     {
+        [Fact]
+        public unsafe void Vector256ByteExtractMostSignificantBitsTest()
+        {
+            Vector256<byte> vector = Vector256.Create(
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80
+            );
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010_10101010_10101010_10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleExtractMostSignificantBitsTest()
+        {
+            Vector256<double> vector = Vector256.Create(
+                +1.0,
+                -0.0,
+                +1.0,
+                -0.0
+            );
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16ExtractMostSignificantBitsTest()
+        {
+            Vector256<short> vector = Vector256.Create(
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000
+            ).AsInt16();
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010_10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32ExtractMostSignificantBitsTest()
+        {
+            Vector256<int> vector = Vector256.Create(
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U
+            ).AsInt32();
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64ExtractMostSignificantBitsTest()
+        {
+            Vector256<long> vector = Vector256.Create(
+                0x0000000000000001UL,
+                0x8000000000000000UL,
+                0x0000000000000001UL,
+                0x8000000000000000UL
+            ).AsInt64();
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntExtractMostSignificantBitsTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector256<nint> vector = Vector256.Create(
+                    0x0000000000000001UL,
+                    0x8000000000000000UL,
+                    0x0000000000000001UL,
+                    0x8000000000000000UL
+                ).AsNInt();
+
+                uint result = Vector256.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b1010u, result);
+            }
+            else
+            {
+                Vector256<nint> vector = Vector256.Create(
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U
+                ).AsNInt();
+
+                uint result = Vector256.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b10101010u, result);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntExtractMostSignificantBitsTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector256<nuint> vector = Vector256.Create(
+                    0x0000000000000001UL,
+                    0x8000000000000000UL,
+                    0x0000000000000001UL,
+                    0x8000000000000000UL
+                ).AsNUInt();
+
+                uint result = Vector256.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b1010u, result);
+            }
+            else
+            {
+                Vector256<nuint> vector = Vector256.Create(
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U,
+                    0x00000001U,
+                    0x80000000U
+                ).AsNUInt();
+
+                uint result = Vector256.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b10101010u, result);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteExtractMostSignificantBitsTest()
+        {
+            Vector256<sbyte> vector = Vector256.Create(
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80
+            ).AsSByte();
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010_10101010_10101010_10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleExtractMostSignificantBitsTest()
+        {
+            Vector256<float> vector = Vector256.Create(
+                +1.0f,
+                -0.0f,
+                +1.0f,
+                -0.0f,
+                +1.0f,
+                -0.0f,
+                +1.0f,
+                -0.0f
+            );
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16ExtractMostSignificantBitsTest()
+        {
+            Vector256<ushort> vector = Vector256.Create(
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000
+            );
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010_10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32ExtractMostSignificantBitsTest()
+        {
+            Vector256<uint> vector = Vector256.Create(
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U,
+                0x00000001U,
+                0x80000000U
+            );
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64ExtractMostSignificantBitsTest()
+        {
+            Vector256<ulong> vector = Vector256.Create(
+                0x0000000000000001UL,
+                0x8000000000000000UL,
+                0x0000000000000001UL,
+                0x8000000000000000UL
+            );
+
+            uint result = Vector256.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteLoadTest()
+        {
+            byte* value = stackalloc byte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256<byte> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleLoadTest()
+        {
+            double* value = stackalloc double[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256<double> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<double>.Count; index++)
+            {
+                Assert.Equal((double)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16LoadTest()
+        {
+            short* value = stackalloc short[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256<short> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32LoadTest()
+        {
+            int* value = stackalloc int[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256<int> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64LoadTest()
+        {
+            long* value = stackalloc long[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256<long> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntLoadTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256<nint> vector = Vector256.Load(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256<nint> vector = Vector256.Load(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntLoadTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256<nuint> vector = Vector256.Load(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256<nuint> vector = Vector256.Load(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteLoadTest()
+        {
+            sbyte* value = stackalloc sbyte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256<sbyte> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleLoadTest()
+        {
+            float* value = stackalloc float[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256<float> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<float>.Count; index++)
+            {
+                Assert.Equal((float)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16LoadTest()
+        {
+            ushort* value = stackalloc ushort[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256<ushort> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32LoadTest()
+        {
+            uint* value = stackalloc uint[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256<uint> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64LoadTest()
+        {
+            ulong* value = stackalloc ulong[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256<ulong> vector = Vector256.Load(value);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteLoadAlignedTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256<byte> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleLoadAlignedTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256<double> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<double>.Count; index++)
+                {
+                    Assert.Equal((double)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16LoadAlignedTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256<short> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<short>.Count; index++)
+                {
+                    Assert.Equal((short)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32LoadAlignedTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256<int> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<int>.Count; index++)
+                {
+                    Assert.Equal((int)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64LoadAlignedTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256<long> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<long>.Count; index++)
+                {
+                    Assert.Equal((long)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntLoadAlignedTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256<nint> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntLoadAlignedTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256<nuint> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteLoadAlignedTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256<sbyte> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleLoadAlignedTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256<float> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<float>.Count; index++)
+                {
+                    Assert.Equal((float)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16LoadAlignedTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256<ushort> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32LoadAlignedTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256<uint> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64LoadAlignedTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256<ulong> vector = Vector256.LoadAligned(value);
+
+                for (int index = 0; index < Vector256<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteLoadAlignedNonTemporalTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256<byte> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleLoadAlignedNonTemporalTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256<double> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<double>.Count; index++)
+                {
+                    Assert.Equal((double)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16LoadAlignedNonTemporalTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256<short> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<short>.Count; index++)
+                {
+                    Assert.Equal((short)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32LoadAlignedNonTemporalTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256<int> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<int>.Count; index++)
+                {
+                    Assert.Equal((int)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64LoadAlignedNonTemporalTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256<long> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<long>.Count; index++)
+                {
+                    Assert.Equal((long)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntLoadAlignedNonTemporalTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256<nint> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntLoadAlignedNonTemporalTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256<nuint> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteLoadAlignedNonTemporalTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256<sbyte> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleLoadAlignedNonTemporalTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256<float> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<float>.Count; index++)
+                {
+                    Assert.Equal((float)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16LoadAlignedNonTemporalTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256<ushort> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32LoadAlignedNonTemporalTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256<uint> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64LoadAlignedNonTemporalTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256<ulong> vector = Vector256.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteLoadUnsafeTest()
+        {
+            byte* value = stackalloc byte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256<byte> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleLoadUnsafeTest()
+        {
+            double* value = stackalloc double[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256<double> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<double>.Count; index++)
+            {
+                Assert.Equal((double)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16LoadUnsafeTest()
+        {
+            short* value = stackalloc short[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256<short> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32LoadUnsafeTest()
+        {
+            int* value = stackalloc int[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256<int> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64LoadUnsafeTest()
+        {
+            long* value = stackalloc long[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256<long> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntLoadUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256<nint> vector = Vector256.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256<nint> vector = Vector256.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntLoadUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256<nuint> vector = Vector256.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256<nuint> vector = Vector256.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteLoadUnsafeTest()
+        {
+            sbyte* value = stackalloc sbyte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256<sbyte> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleLoadUnsafeTest()
+        {
+            float* value = stackalloc float[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256<float> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<float>.Count; index++)
+            {
+                Assert.Equal((float)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16LoadUnsafeTest()
+        {
+            ushort* value = stackalloc ushort[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256<ushort> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32LoadUnsafeTest()
+        {
+            uint* value = stackalloc uint[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256<uint> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64LoadUnsafeTest()
+        {
+            ulong* value = stackalloc ulong[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256<ulong> vector = Vector256.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteLoadUnsafeIndexTest()
+        {
+            byte* value = stackalloc byte[32 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+                32,
+            };
+
+            Vector256<byte> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleLoadUnsafeIndexTest()
+        {
+            double* value = stackalloc double[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector256<double> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<double>.Count; index++)
+            {
+                Assert.Equal((double)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16LoadUnsafeIndexTest()
+        {
+            short* value = stackalloc short[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector256<short> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32LoadUnsafeIndexTest()
+        {
+            int* value = stackalloc int[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector256<int> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64LoadUnsafeIndexTest()
+        {
+            long* value = stackalloc long[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector256<long> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntLoadUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector256<nint> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)(index + 1), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[8 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                    8,
+                };
+
+                Vector256<nint> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)(index + 1), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntLoadUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector256<nuint> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)(index + 1), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[8 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                    8,
+                };
+
+                Vector256<nuint> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)(index + 1), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteLoadUnsafeIndexTest()
+        {
+            sbyte* value = stackalloc sbyte[32 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+                32,
+            };
+
+            Vector256<sbyte> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleLoadUnsafeIndexTest()
+        {
+            float* value = stackalloc float[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector256<float> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<float>.Count; index++)
+            {
+                Assert.Equal((float)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16LoadUnsafeIndexTest()
+        {
+            ushort* value = stackalloc ushort[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector256<ushort> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32LoadUnsafeIndexTest()
+        {
+            uint* value = stackalloc uint[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector256<uint> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64LoadUnsafeIndexTest()
+        {
+            ulong* value = stackalloc ulong[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector256<ulong> vector = Vector256.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256ByteShiftLeftTest()
+        {
+            Vector256<byte> vector = Vector256.Create((byte)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int16ShiftLeftTest()
+        {
+            Vector256<short> vector = Vector256.Create((short)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int32ShiftLeftTest()
+        {
+            Vector256<int> vector = Vector256.Create((int)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int64ShiftLeftTest()
+        {
+            Vector256<long> vector = Vector256.Create((long)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256NIntShiftLeftTest()
+        {
+            Vector256<nint> vector = Vector256.Create((nint)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<nint>.Count; index++)
+            {
+                Assert.Equal((nint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256NUIntShiftLeftTest()
+        {
+            Vector256<nuint> vector = Vector256.Create((nuint)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<nuint>.Count; index++)
+            {
+                Assert.Equal((nuint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256SByteShiftLeftTest()
+        {
+            Vector256<sbyte> vector = Vector256.Create((sbyte)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256UInt16ShiftLeftTest()
+        {
+            Vector256<ushort> vector = Vector256.Create((ushort)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256UInt32ShiftLeftTest()
+        {
+            Vector256<uint> vector = Vector256.Create((uint)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256UInt64ShiftLeftTest()
+        {
+            Vector256<ulong> vector = Vector256.Create((ulong)0x01);
+            vector = Vector256.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int16ShiftRightArithmeticTest()
+        {
+            Vector256<short> vector = Vector256.Create(unchecked((short)0x8000));
+            vector = Vector256.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal(unchecked((short)0xF800), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int32ShiftRightArithmeticTest()
+        {
+            Vector256<int> vector = Vector256.Create(unchecked((int)0x80000000));
+            vector = Vector256.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal(unchecked((int)0xF8000000), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int64ShiftRightArithmeticTest()
+        {
+            Vector256<long> vector = Vector256.Create(unchecked((long)0x8000000000000000));
+            vector = Vector256.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal(unchecked((long)0xF800000000000000), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256NIntShiftRightArithmeticTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector256<nint> vector = Vector256.Create(unchecked((nint)0x8000000000000000));
+                vector = Vector256.ShiftRightArithmetic(vector, 4);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0xF800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector256<nint> vector = Vector256.Create(unchecked((nint)0x80000000));
+                vector = Vector256.ShiftRightArithmetic(vector, 4);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0xF8000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector256SByteShiftRightArithmeticTest()
+        {
+            Vector256<sbyte> vector = Vector256.Create(unchecked((sbyte)0x80));
+            vector = Vector256.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal(unchecked((sbyte)0xF8), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256ByteShiftRightLogicalTest()
+        {
+            Vector256<byte> vector = Vector256.Create((byte)0x80);
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x08, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int16ShiftRightLogicalTest()
+        {
+            Vector256<short> vector = Vector256.Create(unchecked((short)0x8000));
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)0x0800, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int32ShiftRightLogicalTest()
+        {
+            Vector256<int> vector = Vector256.Create(unchecked((int)0x80000000));
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)0x08000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256Int64ShiftRightLogicalTest()
+        {
+            Vector256<long> vector = Vector256.Create(unchecked((long)0x8000000000000000));
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)0x0800000000000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256NIntShiftRightLogicalTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector256<nint> vector = Vector256.Create(unchecked((nint)0x8000000000000000));
+                vector = Vector256.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0x0800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector256<nint> vector = Vector256.Create(unchecked((nint)0x80000000));
+                vector = Vector256.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0x08000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector256NUIntShiftRightLogicalTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector256<nuint> vector = Vector256.Create(unchecked((nuint)0x8000000000000000));
+                vector = Vector256.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nuint)0x0800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector256<nuint> vector = Vector256.Create(unchecked((nuint)0x80000000));
+                vector = Vector256.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nuint)0x08000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector256SByteShiftRightLogicalTest()
+        {
+            Vector256<sbyte> vector = Vector256.Create(unchecked((sbyte)0x80));
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x08, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256UInt16ShiftRightLogicalTest()
+        {
+            Vector256<ushort> vector = Vector256.Create(unchecked((ushort)0x8000));
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x0800, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256UInt32ShiftRightLogicalTest()
+        {
+            Vector256<uint> vector = Vector256.Create(0x80000000);
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x08000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector256UInt64ShiftRightLogicalTest()
+        {
+            Vector256<ulong> vector = Vector256.Create(0x8000000000000000);
+            vector = Vector256.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x0800000000000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteStoreTest()
+        {
+            byte* value = stackalloc byte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256.Create((byte)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleStoreTest()
+        {
+            double* value = stackalloc double[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256.Create((double)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16StoreTest()
+        {
+            short* value = stackalloc short[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256.Create((short)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32StoreTest()
+        {
+            int* value = stackalloc int[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256.Create((int)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64StoreTest()
+        {
+            long* value = stackalloc long[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256.Create((long)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntStoreTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256.Create((nint)0x1).Store(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256.Create((nint)0x1).Store(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntStoreTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256.Create((nuint)0x1).Store(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256.Create((nuint)0x1).Store(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteStoreTest()
+        {
+            sbyte* value = stackalloc sbyte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256.Create((sbyte)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleStoreTest()
+        {
+            float* value = stackalloc float[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256.Create((float)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16StoreTest()
+        {
+            ushort* value = stackalloc ushort[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256.Create((ushort)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32StoreTest()
+        {
+            uint* value = stackalloc uint[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256.Create((uint)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64StoreTest()
+        {
+            ulong* value = stackalloc ulong[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256.Create((ulong)0x1).Store(value);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteStoreAlignedTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256.Create((byte)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleStoreAlignedTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256.Create((double)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<double>.Count; index++)
+                {
+                    Assert.Equal((double)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16StoreAlignedTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256.Create((short)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<short>.Count; index++)
+                {
+                    Assert.Equal((short)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32StoreAlignedTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256.Create((int)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<int>.Count; index++)
+                {
+                    Assert.Equal((int)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64StoreAlignedTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256.Create((long)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<long>.Count; index++)
+                {
+                    Assert.Equal((long)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntStoreAlignedTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256.Create((nint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntStoreAlignedTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256.Create((nuint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteStoreAlignedTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256.Create((sbyte)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleStoreAlignedTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256.Create((float)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<float>.Count; index++)
+                {
+                    Assert.Equal((float)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16StoreAlignedTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256.Create((ushort)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32StoreAlignedTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256.Create((uint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64StoreAlignedTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256.Create((ulong)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector256<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteStoreAlignedNonTemporalTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256.Create((byte)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleStoreAlignedNonTemporalTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256.Create((double)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<double>.Count; index++)
+                {
+                    Assert.Equal((double)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16StoreAlignedNonTemporalTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256.Create((short)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<short>.Count; index++)
+                {
+                    Assert.Equal((short)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32StoreAlignedNonTemporalTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256.Create((int)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<int>.Count; index++)
+                {
+                    Assert.Equal((int)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64StoreAlignedNonTemporalTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256.Create((long)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<long>.Count; index++)
+                {
+                    Assert.Equal((long)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntStoreAlignedNonTemporalTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256.Create((nint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntStoreAlignedNonTemporalTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                    value[2] = 2;
+                    value[3] = 3;
+                    value[4] = 4;
+                    value[5] = 5;
+                    value[6] = 6;
+                    value[7] = 7;
+                }
+
+                Vector256.Create((nuint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteStoreAlignedNonTemporalTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+                value[16] = 16;
+                value[17] = 17;
+                value[18] = 18;
+                value[19] = 19;
+                value[20] = 20;
+                value[21] = 21;
+                value[22] = 22;
+                value[23] = 23;
+                value[24] = 24;
+                value[25] = 25;
+                value[26] = 26;
+                value[27] = 27;
+                value[28] = 28;
+                value[29] = 29;
+                value[30] = 30;
+                value[31] = 31;
+
+                Vector256.Create((sbyte)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleStoreAlignedNonTemporalTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256.Create((float)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<float>.Count; index++)
+                {
+                    Assert.Equal((float)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16StoreAlignedNonTemporalTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+                value[8] = 8;
+                value[9] = 9;
+                value[10] = 10;
+                value[11] = 11;
+                value[12] = 12;
+                value[13] = 13;
+                value[14] = 14;
+                value[15] = 15;
+
+                Vector256.Create((ushort)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32StoreAlignedNonTemporalTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector256.Create((uint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64StoreAlignedNonTemporalTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 32, alignment: 32);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector256.Create((ulong)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector256<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteStoreUnsafeTest()
+        {
+            byte* value = stackalloc byte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256.Create((byte)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleStoreUnsafeTest()
+        {
+            double* value = stackalloc double[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256.Create((double)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16StoreUnsafeTest()
+        {
+            short* value = stackalloc short[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256.Create((short)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32StoreUnsafeTest()
+        {
+            int* value = stackalloc int[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256.Create((int)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64StoreUnsafeTest()
+        {
+            long* value = stackalloc long[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256.Create((long)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntStoreUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256.Create((nint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256.Create((nint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntStoreUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[4] {
+                    0,
+                    1,
+                    2,
+                    3,
+                };
+
+                Vector256.Create((nuint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[8] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                };
+
+                Vector256.Create((nuint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteStoreUnsafeTest()
+        {
+            sbyte* value = stackalloc sbyte[32] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+            };
+
+            Vector256.Create((sbyte)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleStoreUnsafeTest()
+        {
+            float* value = stackalloc float[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256.Create((float)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16StoreUnsafeTest()
+        {
+            ushort* value = stackalloc ushort[16] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+            };
+
+            Vector256.Create((ushort)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32StoreUnsafeTest()
+        {
+            uint* value = stackalloc uint[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector256.Create((uint)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64StoreUnsafeTest()
+        {
+            ulong* value = stackalloc ulong[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector256.Create((ulong)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256ByteStoreUnsafeIndexTest()
+        {
+            byte* value = stackalloc byte[32 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+                32,
+            };
+
+            Vector256.Create((byte)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256DoubleStoreUnsafeIndexTest()
+        {
+            double* value = stackalloc double[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector256.Create((double)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int16StoreUnsafeIndexTest()
+        {
+            short* value = stackalloc short[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector256.Create((short)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int32StoreUnsafeIndexTest()
+        {
+            int* value = stackalloc int[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector256.Create((int)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256Int64StoreUnsafeIndexTest()
+        {
+            long* value = stackalloc long[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector256.Create((long)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NIntStoreUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector256.Create((nint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index + 1]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[8 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                    8,
+                };
+
+                Vector256.Create((nint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index + 1]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256NUIntStoreUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[4 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                };
+
+                Vector256.Create((nuint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index + 1]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[8 + 1] {
+                    0,
+                    1,
+                    2,
+                    3,
+                    4,
+                    5,
+                    6,
+                    7,
+                    8,
+                };
+
+                Vector256.Create((nuint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector256<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index + 1]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SByteStoreUnsafeIndexTest()
+        {
+            sbyte* value = stackalloc sbyte[32 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+                17,
+                18,
+                19,
+                20,
+                21,
+                22,
+                23,
+                24,
+                25,
+                26,
+                27,
+                28,
+                29,
+                30,
+                31,
+                32,
+            };
+
+            Vector256.Create((sbyte)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256SingleStoreUnsafeIndexTest()
+        {
+            float* value = stackalloc float[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector256.Create((float)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt16StoreUnsafeIndexTest()
+        {
+            ushort* value = stackalloc ushort[16 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+                9,
+                10,
+                11,
+                12,
+                13,
+                14,
+                15,
+                16,
+            };
+
+            Vector256.Create((ushort)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt32StoreUnsafeIndexTest()
+        {
+            uint* value = stackalloc uint[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector256.Create((uint)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector256UInt64StoreUnsafeIndexTest()
+        {
+            ulong* value = stackalloc ulong[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector256.Create((ulong)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector256<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public void Vector256ByteSumTest()
+        {
+            Vector256<byte> vector = Vector256.Create((byte)0x01);
+            Assert.Equal((byte)32, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256DoubleSumTest()
+        {
+            Vector256<double> vector = Vector256.Create((double)0x01);
+            Assert.Equal(4.0, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256Int16SumTest()
+        {
+            Vector256<short> vector = Vector256.Create((short)0x01);
+            Assert.Equal((short)16, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256Int32SumTest()
+        {
+            Vector256<int> vector = Vector256.Create((int)0x01);
+            Assert.Equal((int)8, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256Int64SumTest()
+        {
+            Vector256<long> vector = Vector256.Create((long)0x01);
+            Assert.Equal((long)4, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256NIntSumTest()
+        {
+            Vector256<nint> vector = Vector256.Create((nint)0x01);
+
+            if (Environment.Is64BitProcess)
+            {
+                Assert.Equal((nint)4, Vector256.Sum(vector));
+            }
+            else
+            {
+                Assert.Equal((nint)8, Vector256.Sum(vector));
+            }
+        }
+
+        [Fact]
+        public void Vector256NUIntSumTest()
+        {
+            Vector256<nuint> vector = Vector256.Create((nuint)0x01);
+
+            if (Environment.Is64BitProcess)
+            {
+                Assert.Equal((nuint)4, Vector256.Sum(vector));
+            }
+            else
+            {
+                Assert.Equal((nuint)8, Vector256.Sum(vector));
+            }
+        }
+
+        [Fact]
+        public void Vector256SByteSumTest()
+        {
+            Vector256<sbyte> vector = Vector256.Create((sbyte)0x01);
+            Assert.Equal((sbyte)32, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256SingleSumTest()
+        {
+            Vector256<float> vector = Vector256.Create((float)0x01);
+            Assert.Equal(8.0f, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256UInt16SumTest()
+        {
+            Vector256<ushort> vector = Vector256.Create((ushort)0x01);
+            Assert.Equal((ushort)16, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256UInt32SumTest()
+        {
+            Vector256<uint> vector = Vector256.Create((uint)0x01);
+            Assert.Equal((uint)8, Vector256.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector256UInt64SumTest()
+        {
+            Vector256<ulong> vector = Vector256.Create((ulong)0x01);
+            Assert.Equal((ulong)4, Vector256.Sum(vector));
+        }
+
         [Theory]
         [InlineData(0, 0, 0, 0, 0, 0, 0, 0)]
         [InlineData(1, 1, 1, 1, 1, 1, 1, 1)]
index 49cd5b6..30297c8 100644 (file)
 ï»¿// Licensed to the .NET Foundation under one or more agreements.
 // The .NET Foundation licenses this file to you under the MIT license.
 
-using System.Linq;
+using System.Runtime.InteropServices;
 using Xunit;
 
 namespace System.Runtime.Intrinsics.Tests.Vectors
 {
     public sealed class Vector64Tests
     {
+        [Fact]
+        public unsafe void Vector64ByteExtractMostSignificantBitsTest()
+        {
+            Vector64<byte> vector = Vector64.Create(
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80
+            );
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleExtractMostSignificantBitsTest()
+        {
+            Vector64<double> vector = Vector64.Create(
+                +1.0
+            );
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b0u, result);
+
+            vector = Vector64.Create(
+                -0.0
+            );
+
+            result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16ExtractMostSignificantBitsTest()
+        {
+            Vector64<short> vector = Vector64.Create(
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000
+            ).AsInt16();
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32ExtractMostSignificantBitsTest()
+        {
+            Vector64<int> vector = Vector64.Create(
+                0x00000001U,
+                0x80000000U
+            ).AsInt32();
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64ExtractMostSignificantBitsTest()
+        {
+            Vector64<long> vector = Vector64.Create(
+                0x0000000000000001UL
+            ).AsInt64();
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b0u, result);
+
+            vector = Vector64.Create(
+                0x8000000000000000UL
+            ).AsInt64();
+
+            result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntExtractMostSignificantBitsTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector64<nint> vector = Vector64.Create(
+                    0x0000000000000001UL
+                ).AsNInt();
+
+                uint result = Vector64.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b0u, result);
+
+                vector = Vector64.Create(
+                    0x8000000000000000UL
+                ).AsNInt();
+
+                result = Vector64.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b1u, result);
+            }
+            else
+            {
+                Vector64<nint> vector = Vector64.Create(
+                    0x00000001U,
+                    0x80000000U
+                ).AsNInt();
+
+                uint result = Vector64.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b10u, result);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntExtractMostSignificantBitsTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector64<nuint> vector = Vector64.Create(
+                    0x0000000000000001UL
+                ).AsNUInt();
+
+                uint result = Vector64.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b0u, result);
+
+                vector = Vector64.Create(
+                    0x8000000000000000UL
+                ).AsNUInt();
+
+                result = Vector64.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b1u, result);
+            }
+            else
+            {
+                Vector64<nuint> vector = Vector64.Create(
+                    0x00000001U,
+                    0x80000000U
+                ).AsNUInt();
+
+                uint result = Vector64.ExtractMostSignificantBits(vector);
+                Assert.Equal(0b10u, result);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteExtractMostSignificantBitsTest()
+        {
+            Vector64<sbyte> vector = Vector64.Create(
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80,
+                0x01,
+                0x80
+            ).AsSByte();
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10101010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleExtractMostSignificantBitsTest()
+        {
+            Vector64<float> vector = Vector64.Create(
+                +1.0f,
+                -0.0f
+            );
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16ExtractMostSignificantBitsTest()
+        {
+            Vector64<ushort> vector = Vector64.Create(
+                0x0001,
+                0x8000,
+                0x0001,
+                0x8000
+            );
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1010u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32ExtractMostSignificantBitsTest()
+        {
+            Vector64<uint> vector = Vector64.Create(
+                0x00000001U,
+                0x80000000U
+            );
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b10u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64ExtractMostSignificantBitsTest()
+        {
+            Vector64<ulong> vector = Vector64.Create(
+                0x0000000000000001UL
+            ); 
+
+            uint result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b0u, result);
+
+            vector = Vector64.Create(
+                0x8000000000000000UL
+            );
+
+            result = Vector64.ExtractMostSignificantBits(vector);
+            Assert.Equal(0b1u, result);
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteLoadTest()
+        {
+            byte* value = stackalloc byte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64<byte> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleLoadTest()
+        {
+            double* value = stackalloc double[1] {
+                0,
+            };
+
+            Vector64<double> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<double>.Count; index++)
+            {
+                Assert.Equal((double)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16LoadTest()
+        {
+            short* value = stackalloc short[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64<short> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32LoadTest()
+        {
+            int* value = stackalloc int[2] {
+                0,
+                1,
+            };
+
+            Vector64<int> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64LoadTest()
+        {
+            long* value = stackalloc long[1] {
+                0,
+            };
+
+            Vector64<long> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntLoadTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[1] {
+                    0,
+                };
+
+                Vector64<nint> vector = Vector64.Load(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64<nint> vector = Vector64.Load(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntLoadTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[1] {
+                    0,
+                };
+
+                Vector64<nuint> vector = Vector64.Load(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64<nuint> vector = Vector64.Load(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteLoadTest()
+        {
+            sbyte* value = stackalloc sbyte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64<sbyte> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleLoadTest()
+        {
+            float* value = stackalloc float[2] {
+                0,
+                1,
+            };
+
+            Vector64<float> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<float>.Count; index++)
+            {
+                Assert.Equal((float)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16LoadTest()
+        {
+            ushort* value = stackalloc ushort[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64<ushort> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32LoadTest()
+        {
+            uint* value = stackalloc uint[2] {
+                0,
+                1,
+            };
+
+            Vector64<uint> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64LoadTest()
+        {
+            ulong* value = stackalloc ulong[1] {
+                0,
+            };
+
+            Vector64<ulong> vector = Vector64.Load(value);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteLoadAlignedTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64<byte> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }            
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleLoadAlignedTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64<double> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<double>.Count; index++)
+                {
+                    Assert.Equal((double)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16LoadAlignedTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64<short> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<short>.Count; index++)
+                {
+                    Assert.Equal((short)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32LoadAlignedTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64<int> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<int>.Count; index++)
+                {
+                    Assert.Equal((int)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64LoadAlignedTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64<long> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<long>.Count; index++)
+                {
+                    Assert.Equal((long)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntLoadAlignedTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64<nint> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntLoadAlignedTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64<nuint> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteLoadAlignedTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64<sbyte> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleLoadAlignedTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64<float> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<float>.Count; index++)
+                {
+                    Assert.Equal((float)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16LoadAlignedTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64<ushort> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32LoadAlignedTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64<uint> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64LoadAlignedTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64<ulong> vector = Vector64.LoadAligned(value);
+
+                for (int index = 0; index < Vector64<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteLoadAlignedNonTemporalTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64<byte> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleLoadAlignedNonTemporalTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64<double> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<double>.Count; index++)
+                {
+                    Assert.Equal((double)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16LoadAlignedNonTemporalTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64<short> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<short>.Count; index++)
+                {
+                    Assert.Equal((short)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32LoadAlignedNonTemporalTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64<int> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<int>.Count; index++)
+                {
+                    Assert.Equal((int)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64LoadAlignedNonTemporalTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64<long> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<long>.Count; index++)
+                {
+                    Assert.Equal((long)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntLoadAlignedNonTemporalTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64<nint> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntLoadAlignedNonTemporalTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64<nuint> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteLoadAlignedNonTemporalTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64<sbyte> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleLoadAlignedNonTemporalTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64<float> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<float>.Count; index++)
+                {
+                    Assert.Equal((float)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16LoadAlignedNonTemporalTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64<ushort> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32LoadAlignedNonTemporalTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64<uint> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64LoadAlignedNonTemporalTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64<ulong> vector = Vector64.LoadAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)index, vector.GetElement(index));
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteLoadUnsafeTest()
+        {
+            byte* value = stackalloc byte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64<byte> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleLoadUnsafeTest()
+        {
+            double* value = stackalloc double[1] {
+                0,
+            };
+
+            Vector64<double> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<double>.Count; index++)
+            {
+                Assert.Equal((double)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16LoadUnsafeTest()
+        {
+            short* value = stackalloc short[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64<short> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32LoadUnsafeTest()
+        {
+            int* value = stackalloc int[2] {
+                0,
+                1,
+            };
+
+            Vector64<int> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64LoadUnsafeTest()
+        {
+            long* value = stackalloc long[1] {
+                0,
+            };
+
+            Vector64<long> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntLoadUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[1] {
+                    0,
+                };
+
+                Vector64<nint> vector = Vector64.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64<nint> vector = Vector64.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntLoadUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[1] {
+                    0,
+                };
+
+                Vector64<nuint> vector = Vector64.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64<nuint> vector = Vector64.LoadUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)index, vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteLoadUnsafeTest()
+        {
+            sbyte* value = stackalloc sbyte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64<sbyte> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleLoadUnsafeTest()
+        {
+            float* value = stackalloc float[2] {
+                0,
+                1,
+            };
+
+            Vector64<float> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<float>.Count; index++)
+            {
+                Assert.Equal((float)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16LoadUnsafeTest()
+        {
+            ushort* value = stackalloc ushort[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64<ushort> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32LoadUnsafeTest()
+        {
+            uint* value = stackalloc uint[2] {
+                0,
+                1,
+            };
+
+            Vector64<uint> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64LoadUnsafeTest()
+        {
+            ulong* value = stackalloc ulong[1] {
+                0,
+            };
+
+            Vector64<ulong> vector = Vector64.LoadUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)index, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteLoadUnsafeIndexTest()
+        {
+            byte* value = stackalloc byte[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector64<byte> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleLoadUnsafeIndexTest()
+        {
+            double* value = stackalloc double[1 + 1] {
+                0,
+                1,
+            };
+
+            Vector64<double> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<double>.Count; index++)
+            {
+                Assert.Equal((double)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16LoadUnsafeIndexTest()
+        {
+            short* value = stackalloc short[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector64<short> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32LoadUnsafeIndexTest()
+        {
+            int* value = stackalloc int[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector64<int> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64LoadUnsafeIndexTest()
+        {
+            long* value = stackalloc long[1 + 1] {
+                0,
+                1,
+            };
+
+            Vector64<long> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntLoadUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[1 + 1] {
+                    0,
+                    1,
+                };
+
+                Vector64<nint> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)(index + 1), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector64<nint> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)(index + 1), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntLoadUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[1 + 1] {
+                    0,
+                    1,
+                };
+
+                Vector64<nuint> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)(index + 1), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector64<nuint> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)(index + 1), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteLoadUnsafeIndexTest()
+        {
+            sbyte* value = stackalloc sbyte[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector64<sbyte> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleLoadUnsafeIndexTest()
+        {
+            float* value = stackalloc float[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector64<float> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<float>.Count; index++)
+            {
+                Assert.Equal((float)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16LoadUnsafeIndexTest()
+        {
+            ushort* value = stackalloc ushort[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector64<ushort> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32LoadUnsafeIndexTest()
+        {
+            uint* value = stackalloc uint[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector64<uint> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64LoadUnsafeIndexTest()
+        {
+            ulong* value = stackalloc ulong[1 + 1] {
+                0,
+                1,
+            };
+
+            Vector64<ulong> vector = Vector64.LoadUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)(index + 1), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64ByteShiftLeftTest()
+        {
+            Vector64<byte> vector = Vector64.Create((byte)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int16ShiftLeftTest()
+        {
+            Vector64<short> vector = Vector64.Create((short)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int32ShiftLeftTest()
+        {
+            Vector64<int> vector = Vector64.Create((int)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int64ShiftLeftTest()
+        {
+            Vector64<long> vector = Vector64.Create((long)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64NIntShiftLeftTest()
+        {
+            Vector64<nint> vector = Vector64.Create((nint)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<nint>.Count; index++)
+            {
+                Assert.Equal((nint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64NUIntShiftLeftTest()
+        {
+            Vector64<nuint> vector = Vector64.Create((nuint)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<nuint>.Count; index++)
+            {
+                Assert.Equal((nuint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64SByteShiftLeftTest()
+        {
+            Vector64<sbyte> vector = Vector64.Create((sbyte)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64UInt16ShiftLeftTest()
+        {
+            Vector64<ushort> vector = Vector64.Create((ushort)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64UInt32ShiftLeftTest()
+        {
+            Vector64<uint> vector = Vector64.Create((uint)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64UInt64ShiftLeftTest()
+        {
+            Vector64<ulong> vector = Vector64.Create((ulong)0x01);
+            vector = Vector64.ShiftLeft(vector, 4);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x10, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int16ShiftRightArithmeticTest()
+        {
+            Vector64<short> vector = Vector64.Create(unchecked((short)0x8000));
+            vector = Vector64.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal(unchecked((short)0xF800), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int32ShiftRightArithmeticTest()
+        {
+            Vector64<int> vector = Vector64.Create(unchecked((int)0x80000000));
+            vector = Vector64.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal(unchecked((int)0xF8000000), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int64ShiftRightArithmeticTest()
+        {
+            Vector64<long> vector = Vector64.Create(unchecked((long)0x8000000000000000));
+            vector = Vector64.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal(unchecked((long)0xF800000000000000), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64NIntShiftRightArithmeticTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector64<nint> vector = Vector64.Create(unchecked((nint)0x8000000000000000));
+                vector = Vector64.ShiftRightArithmetic(vector, 4);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0xF800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector64<nint> vector = Vector64.Create(unchecked((nint)0x80000000));
+                vector = Vector64.ShiftRightArithmetic(vector, 4);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0xF8000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector64SByteShiftRightArithmeticTest()
+        {
+            Vector64<sbyte> vector = Vector64.Create(unchecked((sbyte)0x80));
+            vector = Vector64.ShiftRightArithmetic(vector, 4);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal(unchecked((sbyte)0xF8), vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64ByteShiftRightLogicalTest()
+        {
+            Vector64<byte> vector = Vector64.Create((byte)0x80);
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x08, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int16ShiftRightLogicalTest()
+        {
+            Vector64<short> vector = Vector64.Create(unchecked((short)0x8000));
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)0x0800, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int32ShiftRightLogicalTest()
+        {
+            Vector64<int> vector = Vector64.Create(unchecked((int)0x80000000));
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)0x08000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64Int64ShiftRightLogicalTest()
+        {
+            Vector64<long> vector = Vector64.Create(unchecked((long)0x8000000000000000));
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)0x0800000000000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64NIntShiftRightLogicalTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector64<nint> vector = Vector64.Create(unchecked((nint)0x8000000000000000));
+                vector = Vector64.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0x0800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector64<nint> vector = Vector64.Create(unchecked((nint)0x80000000));
+                vector = Vector64.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nint)0x08000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector64NUIntShiftRightLogicalTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                Vector64<nuint> vector = Vector64.Create(unchecked((nuint)0x8000000000000000));
+                vector = Vector64.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nuint)0x0800000000000000), vector.GetElement(index));
+                }
+            }
+            else
+            {
+                Vector64<nuint> vector = Vector64.Create(unchecked((nuint)0x80000000));
+                vector = Vector64.ShiftRightLogical(vector, 4);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal(unchecked((nuint)0x08000000), vector.GetElement(index));
+                }
+            }
+        }
+
+        [Fact]
+        public void Vector64SByteShiftRightLogicalTest()
+        {
+            Vector64<sbyte> vector = Vector64.Create(unchecked((sbyte)0x80));
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x08, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64UInt16ShiftRightLogicalTest()
+        {
+            Vector64<ushort> vector = Vector64.Create(unchecked((ushort)0x8000));
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x0800, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64UInt32ShiftRightLogicalTest()
+        {
+            Vector64<uint> vector = Vector64.Create(0x80000000);
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x08000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public void Vector64UInt64ShiftRightLogicalTest()
+        {
+            Vector64<ulong> vector = Vector64.Create(0x8000000000000000);
+            vector = Vector64.ShiftRightLogical(vector, 4);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x0800000000000000, vector.GetElement(index));
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteStoreTest()
+        {
+            byte* value = stackalloc byte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64.Create((byte)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleStoreTest()
+        {
+            double* value = stackalloc double[1] {
+                0,
+            };
+
+            Vector64.Create((double)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16StoreTest()
+        {
+            short* value = stackalloc short[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64.Create((short)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32StoreTest()
+        {
+            int* value = stackalloc int[2] {
+                0,
+                1,
+            };
+
+            Vector64.Create((int)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64StoreTest()
+        {
+            long* value = stackalloc long[1] {
+                0,
+            };
+
+            Vector64.Create((long)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntStoreTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[1] {
+                    0,
+                };
+
+                Vector64.Create((nint)0x1).Store(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64.Create((nint)0x1).Store(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntStoreTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[1] {
+                    0,
+                };
+
+                Vector64.Create((nuint)0x1).Store(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64.Create((nuint)0x1).Store(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteStoreTest()
+        {
+            sbyte* value = stackalloc sbyte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64.Create((sbyte)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleStoreTest()
+        {
+            float* value = stackalloc float[2] {
+                0,
+                1,
+            };
+
+            Vector64.Create((float)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16StoreTest()
+        {
+            ushort* value = stackalloc ushort[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64.Create((ushort)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32StoreTest()
+        {
+            uint* value = stackalloc uint[2] {
+                0,
+                1,
+            };
+
+            Vector64.Create((uint)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64StoreTest()
+        {
+            ulong* value = stackalloc ulong[1] {
+                0,
+            };
+
+            Vector64.Create((ulong)0x1).Store(value);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteStoreAlignedTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64.Create((byte)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleStoreAlignedTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64.Create((double)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<double>.Count; index++)
+                {
+                    Assert.Equal((double)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16StoreAlignedTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64.Create((short)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<short>.Count; index++)
+                {
+                    Assert.Equal((short)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32StoreAlignedTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64.Create((int)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<int>.Count; index++)
+                {
+                    Assert.Equal((int)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64StoreAlignedTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64.Create((long)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<long>.Count; index++)
+                {
+                    Assert.Equal((long)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntStoreAlignedTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64.Create((nint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntStoreAlignedTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64.Create((nuint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteStoreAlignedTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64.Create((sbyte)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleStoreAlignedTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64.Create((float)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<float>.Count; index++)
+                {
+                    Assert.Equal((float)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16StoreAlignedTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64.Create((ushort)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32StoreAlignedTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64.Create((uint)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64StoreAlignedTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64.Create((ulong)0x1).StoreAligned(value);
+
+                for (int index = 0; index < Vector64<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteStoreAlignedNonTemporalTest()
+        {
+            byte* value = null;
+
+            try
+            {
+                value = (byte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64.Create((byte)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<byte>.Count; index++)
+                {
+                    Assert.Equal((byte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleStoreAlignedNonTemporalTest()
+        {
+            double* value = null;
+
+            try
+            {
+                value = (double*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64.Create((double)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<double>.Count; index++)
+                {
+                    Assert.Equal((double)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16StoreAlignedNonTemporalTest()
+        {
+            short* value = null;
+
+            try
+            {
+                value = (short*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64.Create((short)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<short>.Count; index++)
+                {
+                    Assert.Equal((short)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32StoreAlignedNonTemporalTest()
+        {
+            int* value = null;
+
+            try
+            {
+                value = (int*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64.Create((int)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<int>.Count; index++)
+                {
+                    Assert.Equal((int)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64StoreAlignedNonTemporalTest()
+        {
+            long* value = null;
+
+            try
+            {
+                value = (long*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64.Create((long)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<long>.Count; index++)
+                {
+                    Assert.Equal((long)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntStoreAlignedNonTemporalTest()
+        {
+            nint* value = null;
+
+            try
+            {
+                value = (nint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64.Create((nint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntStoreAlignedNonTemporalTest()
+        {
+            nuint* value = null;
+
+            try
+            {
+                value = (nuint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                if (Environment.Is64BitProcess)
+                {
+                    value[0] = 0;
+                }
+                else
+                {
+                    value[0] = 0;
+                    value[1] = 1;
+                }
+
+                Vector64.Create((nuint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteStoreAlignedNonTemporalTest()
+        {
+            sbyte* value = null;
+
+            try
+            {
+                value = (sbyte*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+                value[4] = 4;
+                value[5] = 5;
+                value[6] = 6;
+                value[7] = 7;
+
+                Vector64.Create((sbyte)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<sbyte>.Count; index++)
+                {
+                    Assert.Equal((sbyte)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleStoreAlignedNonTemporalTest()
+        {
+            float* value = null;
+
+            try
+            {
+                value = (float*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64.Create((float)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<float>.Count; index++)
+                {
+                    Assert.Equal((float)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16StoreAlignedNonTemporalTest()
+        {
+            ushort* value = null;
+
+            try
+            {
+                value = (ushort*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+                value[2] = 2;
+                value[3] = 3;
+
+                Vector64.Create((ushort)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<ushort>.Count; index++)
+                {
+                    Assert.Equal((ushort)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32StoreAlignedNonTemporalTest()
+        {
+            uint* value = null;
+
+            try
+            {
+                value = (uint*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+                value[1] = 1;
+
+                Vector64.Create((uint)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<uint>.Count; index++)
+                {
+                    Assert.Equal((uint)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64StoreAlignedNonTemporalTest()
+        {
+            ulong* value = null;
+
+            try
+            {
+                value = (ulong*)NativeMemory.AlignedAlloc(byteCount: 8, alignment: 8);
+
+                value[0] = 0;
+
+                Vector64.Create((ulong)0x1).StoreAlignedNonTemporal(value);
+
+                for (int index = 0; index < Vector64<ulong>.Count; index++)
+                {
+                    Assert.Equal((ulong)0x1, value[index]);
+                }
+            }
+            finally
+            {
+                NativeMemory.AlignedFree(value);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteStoreUnsafeTest()
+        {
+            byte* value = stackalloc byte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64.Create((byte)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleStoreUnsafeTest()
+        {
+            double* value = stackalloc double[1] {
+                0,
+            };
+
+            Vector64.Create((double)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16StoreUnsafeTest()
+        {
+            short* value = stackalloc short[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64.Create((short)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32StoreUnsafeTest()
+        {
+            int* value = stackalloc int[2] {
+                0,
+                1,
+            };
+
+            Vector64.Create((int)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64StoreUnsafeTest()
+        {
+            long* value = stackalloc long[1] {
+                0,
+            };
+
+            Vector64.Create((long)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntStoreUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[1] {
+                    0,
+                };
+
+                Vector64.Create((nint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64.Create((nint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntStoreUnsafeTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[1] {
+                    0,
+                };
+
+                Vector64.Create((nuint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[2] {
+                    0,
+                    1,
+                };
+
+                Vector64.Create((nuint)0x1).StoreUnsafe(ref value[0]);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteStoreUnsafeTest()
+        {
+            sbyte* value = stackalloc sbyte[8] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+            };
+
+            Vector64.Create((sbyte)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleStoreUnsafeTest()
+        {
+            float* value = stackalloc float[2] {
+                0,
+                1,
+            };
+
+            Vector64.Create((float)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16StoreUnsafeTest()
+        {
+            ushort* value = stackalloc ushort[4] {
+                0,
+                1,
+                2,
+                3,
+            };
+
+            Vector64.Create((ushort)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32StoreUnsafeTest()
+        {
+            uint* value = stackalloc uint[2] {
+                0,
+                1,
+            };
+
+            Vector64.Create((uint)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64StoreUnsafeTest()
+        {
+            ulong* value = stackalloc ulong[1] {
+                0,
+            };
+
+            Vector64.Create((ulong)0x1).StoreUnsafe(ref value[0]);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64ByteStoreUnsafeIndexTest()
+        {
+            byte* value = stackalloc byte[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector64.Create((byte)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<byte>.Count; index++)
+            {
+                Assert.Equal((byte)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64DoubleStoreUnsafeIndexTest()
+        {
+            double* value = stackalloc double[1 + 1] {
+                0,
+                1,
+            };
+
+            Vector64.Create((double)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<double>.Count; index++)
+            {
+                Assert.Equal((double)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int16StoreUnsafeIndexTest()
+        {
+            short* value = stackalloc short[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector64.Create((short)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<short>.Count; index++)
+            {
+                Assert.Equal((short)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int32StoreUnsafeIndexTest()
+        {
+            int* value = stackalloc int[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector64.Create((int)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<int>.Count; index++)
+            {
+                Assert.Equal((int)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64Int64StoreUnsafeIndexTest()
+        {
+            long* value = stackalloc long[1 + 1] {
+                0,
+                1,
+            };
+
+            Vector64.Create((long)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<long>.Count; index++)
+            {
+                Assert.Equal((long)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NIntStoreUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nint* value = stackalloc nint[1 + 1] {
+                    0,
+                    1,
+                };
+
+                Vector64.Create((nint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index + 1]);
+                }
+            }
+            else
+            {
+                nint* value = stackalloc nint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector64.Create((nint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nint>.Count; index++)
+                {
+                    Assert.Equal((nint)0x1, value[index + 1]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64NUIntStoreUnsafeIndexTest()
+        {
+            if (Environment.Is64BitProcess)
+            {
+                nuint* value = stackalloc nuint[1 + 1] {
+                    0,
+                    1,
+                };
+
+                Vector64.Create((nuint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index + 1]);
+                }
+            }
+            else
+            {
+                nuint* value = stackalloc nuint[2 + 1] {
+                    0,
+                    1,
+                    2,
+                };
+
+                Vector64.Create((nuint)0x1).StoreUnsafe(ref value[0], 1);
+
+                for (int index = 0; index < Vector64<nuint>.Count; index++)
+                {
+                    Assert.Equal((nuint)0x1, value[index + 1]);
+                }
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SByteStoreUnsafeIndexTest()
+        {
+            sbyte* value = stackalloc sbyte[8 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+                5,
+                6,
+                7,
+                8,
+            };
+
+            Vector64.Create((sbyte)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<sbyte>.Count; index++)
+            {
+                Assert.Equal((sbyte)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64SingleStoreUnsafeIndexTest()
+        {
+            float* value = stackalloc float[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector64.Create((float)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<float>.Count; index++)
+            {
+                Assert.Equal((float)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt16StoreUnsafeIndexTest()
+        {
+            ushort* value = stackalloc ushort[4 + 1] {
+                0,
+                1,
+                2,
+                3,
+                4,
+            };
+
+            Vector64.Create((ushort)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<ushort>.Count; index++)
+            {
+                Assert.Equal((ushort)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt32StoreUnsafeIndexTest()
+        {
+            uint* value = stackalloc uint[2 + 1] {
+                0,
+                1,
+                2,
+            };
+
+            Vector64.Create((uint)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<uint>.Count; index++)
+            {
+                Assert.Equal((uint)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public unsafe void Vector64UInt64StoreUnsafeIndexTest()
+        {
+            ulong* value = stackalloc ulong[1 + 1] {
+                0,
+                1,
+            };
+
+            Vector64.Create((ulong)0x1).StoreUnsafe(ref value[0], 1);
+
+            for (int index = 0; index < Vector64<ulong>.Count; index++)
+            {
+                Assert.Equal((ulong)0x1, value[index + 1]);
+            }
+        }
+
+        [Fact]
+        public void Vector64ByteSumTest()
+        {
+            Vector64<byte> vector = Vector64.Create((byte)0x01);
+            Assert.Equal((byte)8, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64DoubleSumTest()
+        {
+            Vector64<double> vector = Vector64.Create((double)0x01);
+            Assert.Equal(1.0, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64Int16SumTest()
+        {
+            Vector64<short> vector = Vector64.Create((short)0x01);
+            Assert.Equal((short)4, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64Int32SumTest()
+        {
+            Vector64<int> vector = Vector64.Create((int)0x01);
+            Assert.Equal((int)2, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64Int64SumTest()
+        {
+            Vector64<long> vector = Vector64.Create((long)0x01);
+            Assert.Equal((long)1, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64NIntSumTest()
+        {
+            Vector64<nint> vector = Vector64.Create((nint)0x01);
+
+            if (Environment.Is64BitProcess)
+            {
+                Assert.Equal((nint)1, Vector64.Sum(vector));
+            }
+            else
+            {
+                Assert.Equal((nint)2, Vector64.Sum(vector));
+            }
+        }
+
+        [Fact]
+        public void Vector64NUIntSumTest()
+        {
+            Vector64<nuint> vector = Vector64.Create((nuint)0x01);
+
+            if (Environment.Is64BitProcess)
+            {
+                Assert.Equal((nuint)1, Vector64.Sum(vector));
+            }
+            else
+            {
+                Assert.Equal((nuint)2, Vector64.Sum(vector));
+            }
+        }
+
+        [Fact]
+        public void Vector64SByteSumTest()
+        {
+            Vector64<sbyte> vector = Vector64.Create((sbyte)0x01);
+            Assert.Equal((sbyte)8, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64SingleSumTest()
+        {
+            Vector64<float> vector = Vector64.Create((float)0x01);
+            Assert.Equal(2.0f, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64UInt16SumTest()
+        {
+            Vector64<ushort> vector = Vector64.Create((ushort)0x01);
+            Assert.Equal((ushort)4, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64UInt32SumTest()
+        {
+            Vector64<uint> vector = Vector64.Create((uint)0x01);
+            Assert.Equal((uint)2, Vector64.Sum(vector));
+        }
+
+        [Fact]
+        public void Vector64UInt64SumTest()
+        {
+            Vector64<ulong> vector = Vector64.Create((ulong)0x01);
+            Assert.Equal((ulong)1, Vector64.Sum(vector));
+        }
+
         [Theory]
         [InlineData(0, 0)]
         [InlineData(1, 1)]
index 41c562f..27cbc4c 100644 (file)
@@ -13601,6 +13601,61 @@ namespace System.Runtime.InteropServices
     {
         public SuppressGCTransitionAttribute() { }
     }
+    public enum UnmanagedType
+    {
+        Bool = 2,
+        I1 = 3,
+        U1 = 4,
+        I2 = 5,
+        U2 = 6,
+        I4 = 7,
+        U4 = 8,
+        I8 = 9,
+        U8 = 10,
+        R4 = 11,
+        R8 = 12,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        [System.ObsoleteAttribute("Marshalling as Currency may be unavailable in future releases.")]
+        Currency = 15,
+        BStr = 19,
+        LPStr = 20,
+        LPWStr = 21,
+        LPTStr = 22,
+        ByValTStr = 23,
+        IUnknown = 25,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        IDispatch = 26,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        Struct = 27,
+        Interface = 28,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        SafeArray = 29,
+        ByValArray = 30,
+        SysInt = 31,
+        SysUInt = 32,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        [System.ObsoleteAttribute("Marshalling as VBByRefString may be unavailable in future releases.")]
+        VBByRefStr = 34,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        [System.ObsoleteAttribute("Marshalling as AnsiBStr may be unavailable in future releases.")]
+        AnsiBStr = 35,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        [System.ObsoleteAttribute("Marshalling as TBstr may be unavailable in future releases.")]
+        TBStr = 36,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        VariantBool = 37,
+        FunctionPtr = 38,
+        [System.ComponentModel.EditorBrowsableAttribute(System.ComponentModel.EditorBrowsableState.Never)]
+        [System.ObsoleteAttribute("Marshalling arbitrary types may be unavailable in future releases. Specify the type you wish to marshal as.")]
+        AsAny = 40,
+        LPArray = 42,
+        LPStruct = 43,
+        CustomMarshaler = 44,
+        Error = 45,
+        IInspectable = 46,
+        HString = 47,
+        LPUTF8Str = 48,
+    }
 }
 namespace System.Runtime.Remoting
 {