Implement AVX2 MaskLoad and MaskStore (#19513)
authorFei Peng <fei.peng@intel.com>
Thu, 23 Aug 2018 00:01:40 +0000 (17:01 -0700)
committerCarol Eidt <carol.eidt@microsoft.com>
Thu, 23 Aug 2018 00:01:40 +0000 (17:01 -0700)
* Implement AVX2 MaskLoad and MaskStore

* Add test cases for AVX2 MaskLoad and MaskStore

* Fix AVX MaskStore tests

* template AVX MaskLoad tests

45 files changed:
src/jit/emitxarch.cpp
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/instrsxarch.h
tests/src/JIT/HardwareIntrinsics/X86/Avx/Avx_r.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx/Avx_ro.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.Double.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.Single.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.cs [deleted file]
tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad_r.csproj [deleted file]
tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad_ro.csproj [deleted file]
tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskStore.Double.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskStore.Single.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx/Program.Avx.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/Avx2_r.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx2/Avx2_ro.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.Int32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.Int64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.UInt32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.UInt64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.Int32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.Int64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.UInt32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.UInt64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2/Program.Avx2.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/Avx2_r.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/Avx2_ro.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.Int32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.Int64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.UInt32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.UInt64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.Int32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.Int64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.UInt32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.UInt64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/Program.Avx2_Vector128.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/Avx_r.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/Avx_ro.csproj
tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskLoad.Double.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskLoad.Single.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskStore.Double.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskStore.Single.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/Program.Avx_Vector128.cs
tests/src/JIT/HardwareIntrinsics/X86/Shared/GenerateTests.csx
tests/src/JIT/HardwareIntrinsics/X86/Shared/LoadBinOpTest.template [new file with mode: 0644]

index 08860079c4f3881e1f1866acd3b6bf94bafa9a18..c2b5c934bfd2a791e9b7d72162c9c396af0a1f0e 100644 (file)
@@ -360,6 +360,8 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
         case INS_vinserti128:
         case INS_vmaskmovps:
         case INS_vmaskmovpd:
+        case INS_vpmaskmovd:
+        case INS_vpmaskmovq:
         case INS_vpblendd:
         case INS_vperm2i128:
         case INS_vperm2f128:
@@ -586,6 +588,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
         case INS_vfnmsub132sd:
         case INS_vfnmsub213sd:
         case INS_vfnmsub231sd:
+        case INS_vpmaskmovq:
             return true;
         default:
             break;
index ad1be6a061e69adc6e78ab9e8fc9ce99ac8b06a2..474cba5406acbdcedc281be4f4a52edc42840e6c 100644 (file)
@@ -164,7 +164,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if (category == HW_Category_MemoryLoad)
                 {
-                    if (intrinsicId == NI_AVX_MaskLoad)
+                    if (intrinsicId == NI_AVX_MaskLoad || intrinsicId == NI_AVX2_MaskLoad)
                     {
                         emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op2Reg, op1Reg);
                     }
@@ -253,7 +253,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else if (category == HW_Category_MemoryStore)
                 {
-                    if (intrinsicId == NI_AVX_MaskStore)
+                    if (intrinsicId == NI_AVX_MaskStore || intrinsicId == NI_AVX2_MaskStore)
                     {
                         emit->emitIns_AR_R_R(ins, simdSize, op2Reg, op3Reg, op1Reg, 0);
                     }
index f085ff545fe308c4805581a12ceca404ce747953..04486fec65516cf98ac343b0bc09a7d3ba55e3c5 100644 (file)
@@ -436,6 +436,8 @@ HARDWARE_INTRINSIC(AVX2_HorizontalSubtract,                         "HorizontalS
 HARDWARE_INTRINSIC(AVX2_HorizontalSubtractSaturate,                 "HorizontalSubtractSaturate",                   AVX2,         -1,              32,           2,     {INS_invalid,           INS_invalid,        INS_phsubsw,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2_InsertVector128,                            "InsertVector128",                              AVX2,         -1,              32,           3,     {INS_vinserti128,       INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_vinserti128,    INS_invalid,        INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(AVX2_LoadAlignedVector256NonTemporal,            "LoadAlignedVector256NonTemporal",              AVX2,         -1,              32,           1,     {INS_movntdqa,          INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_movntdqa,       INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoContainment)
+HARDWARE_INTRINSIC(AVX2_MaskLoad,                                   "MaskLoad",                                     AVX2,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpmaskmovd,     INS_vpmaskmovd,     INS_vpmaskmovq,     INS_vpmaskmovq,     INS_invalid,        INS_invalid},           HW_Category_MemoryLoad,             HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize)
+HARDWARE_INTRINSIC(AVX2_MaskStore,                                  "MaskStore",                                    AVX2,         -1,               0,           3,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_vpmaskmovd,     INS_vpmaskmovd,     INS_vpmaskmovq,     INS_vpmaskmovq,     INS_invalid,        INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoContainment|HW_Flag_UnfixedSIMDSize|HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(AVX2_Max,                                        "Max",                                          AVX2,         -1,              32,           2,     {INS_pmaxsb,            INS_pmaxub,         INS_pmaxsw,         INS_pmaxuw,         INS_pmaxsd,         INS_pmaxud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_Min,                                        "Min",                                          AVX2,         -1,              32,           2,     {INS_pminsb,            INS_pminub,         INS_pminsw,         INS_pminuw,         INS_pminsd,         INS_pminud,         INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
 HARDWARE_INTRINSIC(AVX2_MoveMask,                                   "MoveMask",                                     AVX2,         -1,              32,           1,     {INS_pmovmskb,          INS_pmovmskb,       INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
index e38638aa4095b4b74c53022600a610ba372ae133..7fc0f9f182fad90a3f52043b15d474f0be38a3f4 100644 (file)
@@ -500,6 +500,8 @@ INST3(vbroadcastf128,"broadcastf128",0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SS
 INST3(vbroadcasti128,"broadcasti128",0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0x5A))   // Broadcast packed integer values read from memory to entire ymm register
 INST3(vmaskmovps,    "maskmovps"    ,0, IUM_WR, 0, 0, SSE38(0x2E),  BAD_CODE, SSE38(0x2C))   // Conditional SIMD Packed Single-Precision Floating-Point Loads and Stores
 INST3(vmaskmovpd,    "maskmovpd"    ,0, IUM_WR, 0, 0, SSE38(0x2F),  BAD_CODE, SSE38(0x2D))   // Conditional SIMD Packed Double-Precision Floating-Point Loads and Stores
+INST3(vpmaskmovd,    "pmaskmovd"    ,0, IUM_WR, 0, 0, SSE38(0x8E),  BAD_CODE, SSE38(0x8C))   // Conditional SIMD Integer Packed Dword Loads and Stores
+INST3(vpmaskmovq,    "pmaskmovq"    ,0, IUM_WR, 0, 0, SSE38(0x8E),  BAD_CODE, SSE38(0x8C))   // Conditional SIMD Integer Packed Qword Loads and Stores
 
 INST3(FIRST_FMA_INSTRUCTION, "FIRST_FMA_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
 //    enum            name             FP updmode rf wf MR            MI        RM
index 64b40616aa4f594246b31cc25882d0245dad2ec7..719ed25228b83ec0f2f4b1b4c2c794ffc13f9e6f 100644 (file)
     <Compile Include="GetLowerHalf.UInt16.cs" />
     <Compile Include="GetLowerHalf.UInt32.cs" />
     <Compile Include="GetLowerHalf.UInt64.cs" />
+    <Compile Include="MaskLoad.Double.cs" />
+    <Compile Include="MaskLoad.Single.cs" />
     <Compile Include="MaskStore.Double.cs" />
     <Compile Include="MaskStore.Single.cs" />
     <Compile Include="Max.Double.cs" />
index 7552265a247b3a704c077a7c60145acfa0749abf..468eed4a276cf90ef2d0f8e4f12150d89b1c1bc7 100644 (file)
     <Compile Include="GetLowerHalf.UInt16.cs" />
     <Compile Include="GetLowerHalf.UInt32.cs" />
     <Compile Include="GetLowerHalf.UInt64.cs" />
+    <Compile Include="MaskLoad.Double.cs" />
+    <Compile Include="MaskLoad.Single.cs" />
     <Compile Include="MaskStore.Double.cs" />
     <Compile Include="MaskStore.Single.cs" />
     <Compile Include="Max.Double.cs" />
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.Double.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.Double.cs
new file mode 100644 (file)
index 0000000..a5e9158
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadDouble()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadDouble();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadDouble
+    {
+        private struct TestStruct
+        {
+            public Vector256<Double> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Double>, byte>(ref testStruct._fld2), ref Unsafe.As<Double, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Double>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadDouble testClass)
+            {
+                var result = Avx.MaskLoad((Double*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Double>>() / sizeof(Double);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Double>>() / sizeof(Double);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Double>>() / sizeof(Double);
+
+        private static Double[] _data1 = new Double[Op1ElementCount];
+        private static Double[] _data2 = new Double[Op2ElementCount];
+
+        private static Vector256<Double> _clsVar2;
+
+        private Vector256<Double> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Double, Double, Double> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadDouble()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Double>, byte>(ref _clsVar2), ref Unsafe.As<Double, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Double>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadDouble()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Double>, byte>(ref _fld2), ref Unsafe.As<Double, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Double>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Double, Double, Double>(_data1, _data2, new Double[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector256<Double>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                Avx.LoadVector256((Double*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector256((Double*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Double*), typeof(Vector256<Double>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Double*)),
+                                        Unsafe.Read<Vector256<Double>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Double>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Double*), typeof(Vector256<Double>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Double*)),
+                                        Avx.LoadVector256((Double*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Double>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Double*), typeof(Vector256<Double>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Double*)),
+                                        Avx.LoadAlignedVector256((Double*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Double>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Double*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector256<Double>>(_dataTable.inArray2Ptr);
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Double*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector256((Double*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Double*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector256((Double*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadDouble();
+            var result = Avx.MaskLoad((Double*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx.MaskLoad((Double*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx.MaskLoad((Double*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector256<Double> right, void* result, [CallerMemberName] string method = "")
+        {
+            Double[] inArray1 = new Double[Op1ElementCount];
+            Double[] inArray2 = new Double[Op2ElementCount];
+            Double[] outArray = new Double[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Double>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Double, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Double>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Double[] inArray1 = new Double[Op1ElementCount];
+            Double[] inArray2 = new Double[Op2ElementCount];
+            Double[] outArray = new Double[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Double>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Double>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Double>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Double[] left, Double[] right, Double[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[0]) < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[i]) < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx)}.{nameof(Avx.MaskLoad)}<Double>(Double*, Vector256<Double>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.Single.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.Single.cs
new file mode 100644 (file)
index 0000000..4d96d68
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadSingle()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadSingle();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadSingle
+    {
+        private struct TestStruct
+        {
+            public Vector256<Single> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Single>, byte>(ref testStruct._fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Single>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadSingle testClass)
+            {
+                var result = Avx.MaskLoad((Single*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector256<Single> _clsVar2;
+
+        private Vector256<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadSingle()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Single>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadSingle()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector256<Single>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                Avx.LoadVector256((Single*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector256((Single*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Single*), typeof(Vector256<Single>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Single*)),
+                                        Unsafe.Read<Vector256<Single>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Single*), typeof(Vector256<Single>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Single*)),
+                                        Avx.LoadVector256((Single*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Single*), typeof(Vector256<Single>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Single*)),
+                                        Avx.LoadAlignedVector256((Single*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Single*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector256<Single>>(_dataTable.inArray2Ptr);
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Single*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector256((Single*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Single*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector256((Single*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadSingle();
+            var result = Avx.MaskLoad((Single*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx.MaskLoad((Single*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx.MaskLoad((Single*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector256<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Single>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[0]) < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[i]) < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx)}.{nameof(Avx.MaskLoad)}<Single>(Single*, Vector256<Single>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad.cs
deleted file mode 100644 (file)
index cb1ca41..0000000
+++ /dev/null
@@ -1,140 +0,0 @@
-// Licensed to the .NET Foundation under one or more agreements.
-// The .NET Foundation licenses this file to you under the MIT license.
-// See the LICENSE file in the project root for more information.
-//
-
-using System;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
-using System.Runtime.Intrinsics.X86;
-using System.Runtime.Intrinsics;
-
-namespace IntelHardwareIntrinsicTest
-{
-    class Program
-    {
-        const int Pass = 100;
-        const int Fail = 0;
-
-        static unsafe int Main(string[] args)
-        {
-            int testResult = Pass;
-
-            if (Avx.IsSupported)
-            {
-                using (TestTable<float, uint> floatTable = new TestTable<float, uint>(new float[8] { 1, -5, 100, 0, 1, 2, 3, 4 }, new uint[8] { uint.MaxValue, uint.MaxValue, 0, 0, uint.MaxValue, uint.MaxValue, 0, 0 }, new float[8]))
-                {
-                    Vector256<float> vf = Avx.MaskLoad((float*)(floatTable.inArrayPtr), Avx.LoadVector256((float*)(floatTable.maskArrayPtr)));
-                    Unsafe.Write(floatTable.outArrayPtr, vf);
-
-                    if (!floatTable.CheckResult((x, m, y) => m == uint.MaxValue ? BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y) : BitConverter.SingleToInt32Bits(y) == 0))
-                    {
-                        Console.WriteLine("AVX MaskLoad failed on float:");
-                        foreach (var item in floatTable.outArray)
-                        {
-                            Console.Write(item + ", ");
-                        }
-                        Console.WriteLine();
-                        testResult = Fail;
-                    }
-                }
-
-                using (TestTable<double, ulong> doubleTable = new TestTable<double, ulong>(new double[4] { 1, -5, 100, 0}, new ulong[4] { 0, ulong.MaxValue, ulong.MaxValue, 0}, new double[4]))
-                {
-                    Vector256<double> vf = Avx.MaskLoad((double*)(doubleTable.inArrayPtr), Avx.LoadVector256((double*)(doubleTable.maskArrayPtr)));
-                    Unsafe.Write(doubleTable.outArrayPtr, vf);
-
-                    if (!doubleTable.CheckResult((x, m, y) =>  m == ulong.MaxValue ? BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y) : BitConverter.DoubleToInt64Bits(y) == 0))
-                    {
-                        Console.WriteLine("AVX MaskLoad failed on double:");
-                        foreach (var item in doubleTable.outArray)
-                        {
-                            Console.Write(item + ", ");
-                        }
-                        Console.WriteLine();
-                        testResult = Fail;
-                    }
-                }
-
-                using (TestTable<float, uint> floatTable = new TestTable<float, uint>(new float[4] { 1, -5, 100, 0 }, new uint[4] { uint.MaxValue, 0, 0, uint.MaxValue }, new float[4]))
-                {
-                    Vector128<float> vf = Avx.MaskLoad((float*)(floatTable.inArrayPtr), Sse.LoadVector128((float*)(floatTable.maskArrayPtr)));
-                    Unsafe.Write(floatTable.outArrayPtr, vf);
-
-                    if (!floatTable.CheckResult((x, m, y) => m == uint.MaxValue ? BitConverter.SingleToInt32Bits(x) == BitConverter.SingleToInt32Bits(y) : BitConverter.SingleToInt32Bits(y) == 0))
-                    {
-                        Console.WriteLine("AVX MaskLoad failed on Vector128 float:");
-                        foreach (var item in floatTable.outArray)
-                        {
-                            Console.Write(item + ", ");
-                        }
-                        Console.WriteLine();
-                        testResult = Fail;
-                    }
-                }
-
-                using (TestTable<double, ulong> doubleTable = new TestTable<double, ulong>(new double[2] { 1, -5}, new ulong[2] { 0, ulong.MaxValue}, new double[2]))
-                {
-                    Vector128<double> vf = Avx.MaskLoad((double*)(doubleTable.inArrayPtr), Sse2.LoadVector128((double*)(doubleTable.maskArrayPtr)));
-                    Unsafe.Write(doubleTable.outArrayPtr, vf);
-
-                    if (!doubleTable.CheckResult((x, m, y) =>  m == ulong.MaxValue ? BitConverter.DoubleToInt64Bits(x) == BitConverter.DoubleToInt64Bits(y) : BitConverter.DoubleToInt64Bits(y) == 0))
-                    {
-                        Console.WriteLine("AVX MaskLoad failed on double:");
-                        foreach (var item in doubleTable.outArray)
-                        {
-                            Console.Write(item + ", ");
-                        }
-                        Console.WriteLine();
-                        testResult = Fail;
-                    }
-                }
-            }
-
-            return testResult;
-        }
-
-        public unsafe struct TestTable<T, U> : IDisposable where T : struct
-        {
-            public T[] inArray;
-            public U[] maskArray;
-            public T[] outArray;
-
-            public void* inArrayPtr => inHandle.AddrOfPinnedObject().ToPointer();
-            public void* maskArrayPtr => maskHandle.AddrOfPinnedObject().ToPointer();
-            public void* outArrayPtr => outHandle.AddrOfPinnedObject().ToPointer();
-
-            GCHandle inHandle;
-            GCHandle maskHandle;
-            GCHandle outHandle;
-            public TestTable(T[] a, U[] mask, T[] b)
-            {
-                this.inArray = a;
-                this.maskArray = mask;
-                this.outArray = b;
-
-                inHandle = GCHandle.Alloc(inArray, GCHandleType.Pinned);
-                maskHandle = GCHandle.Alloc(maskArray, GCHandleType.Pinned);
-                outHandle = GCHandle.Alloc(outArray, GCHandleType.Pinned);
-            }
-            public bool CheckResult(Func<T, U, T, bool> check)
-            {
-                for (int i = 0; i < inArray.Length; i++)
-                {
-                    if (!check(inArray[i], maskArray[i], outArray[i]))
-                    {
-                        return false;
-                    }
-                }
-                return true;
-            }
-
-            public void Dispose()
-            {
-                inHandle.Free();
-                outHandle.Free();
-            }
-        }
-
-    }
-}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad_r.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad_r.csproj
deleted file mode 100644 (file)
index 49c7463..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?> 
-<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">       
-  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />     
-  <PropertyGroup>      
-    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>        
-    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>      
-    <SchemaVersion>2.0</SchemaVersion> 
-    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>  
-    <OutputType>Exe</OutputType>       
-    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids> 
-    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>      
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>        
-  </PropertyGroup>     
-  <!-- Default configurations to help VS understand the configurations -->     
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>       
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />   
-  <ItemGroup>  
-    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">   
-      <Visible>False</Visible> 
-    </CodeAnalysisDependentAssemblyPaths>      
-  </ItemGroup> 
-  <PropertyGroup>      
-    <DebugType>Embedded</DebugType>    
-    <Optimize></Optimize>      
-  </PropertyGroup>     
-  <ItemGroup>  
-    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />       
-  </ItemGroup> 
-  <ItemGroup>  
-    <Compile Include="MaskLoad.cs" />  
-  </ItemGroup> 
-  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> 
-  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>   
-</Project>
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad_ro.csproj b/tests/src/JIT/HardwareIntrinsics/X86/Avx/MaskLoad_ro.csproj
deleted file mode 100644 (file)
index 559feb3..0000000
+++ /dev/null
@@ -1,34 +0,0 @@
-<?xml version="1.0" encoding="utf-8"?> 
-<Project ToolsVersion="12.0" DefaultTargets="Build" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">       
-  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.props))\dir.props" />     
-  <PropertyGroup>      
-    <Configuration Condition=" '$(Configuration)' == '' ">Debug</Configuration>        
-    <Platform Condition=" '$(Platform)' == '' ">AnyCPU</Platform>      
-    <SchemaVersion>2.0</SchemaVersion> 
-    <ProjectGuid>{95DFC527-4DC1-495E-97D7-E94EE1F7140D}</ProjectGuid>  
-    <OutputType>Exe</OutputType>       
-    <ProjectTypeGuids>{786C830F-07A1-408B-BD7F-6EE04809D6DB};{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}</ProjectTypeGuids> 
-    <SolutionDir Condition="$(SolutionDir) == '' Or $(SolutionDir) == '*Undefined*'">..\..\</SolutionDir>      
-    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>        
-  </PropertyGroup>     
-  <!-- Default configurations to help VS understand the configurations -->     
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Debug|AnyCPU' "></PropertyGroup>       
-  <PropertyGroup Condition=" '$(Configuration)|$(Platform)' == 'Release|AnyCPU' " />   
-  <ItemGroup>  
-    <CodeAnalysisDependentAssemblyPaths Condition=" '$(VS100COMNTOOLS)' != '' " Include="$(VS100COMNTOOLS)..\IDE\PrivateAssemblies">   
-      <Visible>False</Visible> 
-    </CodeAnalysisDependentAssemblyPaths>      
-  </ItemGroup> 
-  <PropertyGroup>      
-    <DebugType>Embedded</DebugType>    
-    <Optimize>True</Optimize>  
-  </PropertyGroup>     
-  <ItemGroup>  
-    <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />       
-  </ItemGroup> 
-  <ItemGroup>  
-    <Compile Include="MaskLoad.cs" />  
-  </ItemGroup> 
-  <Import Project="$([MSBuild]::GetDirectoryNameOfFileAbove($(MSBuildThisFileDirectory), dir.targets))\dir.targets" /> 
-  <PropertyGroup Condition=" '$(MsBuildProjectDirOverride)' != '' "></PropertyGroup>   
-</Project>
index 705a865fc7757f5f236c019ee980be141b2e7cad..6b505d11cef50dbe76d6c1774a2a24aa0805effd 100644 (file)
@@ -366,7 +366,7 @@ namespace JIT.HardwareIntrinsics.X86
 
         private void ValidateResult(Double[] left, Double[] right, Double[] result, [CallerMemberName] string method = "")
         {
-            if (BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : 0))
+            if (BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : BitConverter.DoubleToInt64Bits(result[0])))
             {
                 Succeeded = false;
             }
@@ -374,7 +374,7 @@ namespace JIT.HardwareIntrinsics.X86
             {
                 for (var i = 1; i < RetElementCount; i++)
                 {
-                    if (BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : 0))
+                    if (BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : BitConverter.DoubleToInt64Bits(result[i])))
                     {
                         Succeeded = false;
                         break;
index fde559f71060e2577bd9b1538d485e5063508b28..67de485806b10e0cb2cb81e26a0084608bfb806a 100644 (file)
@@ -366,7 +366,7 @@ namespace JIT.HardwareIntrinsics.X86
 
         private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
         {
-            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : 0))
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : BitConverter.SingleToInt32Bits(result[0])))
             {
                 Succeeded = false;
             }
@@ -374,7 +374,7 @@ namespace JIT.HardwareIntrinsics.X86
             {
                 for (var i = 1; i < RetElementCount; i++)
                 {
-                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : 0))
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : BitConverter.SingleToInt32Bits(result[i])))
                     {
                         Succeeded = false;
                         break;
index 72f08b5699f4fbf38a8fb8d5a98f300fddf540f9..1e77cdac5218967ba6d17e7338ff12c7279062b2 100644 (file)
@@ -113,6 +113,8 @@ namespace JIT.HardwareIntrinsics.X86
                 ["GetLowerHalf.UInt16"] = GetLowerHalfUInt16,
                 ["GetLowerHalf.UInt32"] = GetLowerHalfUInt32,
                 ["GetLowerHalf.UInt64"] = GetLowerHalfUInt64,
+                ["MaskLoad.Double"] = MaskLoadDouble,
+                ["MaskLoad.Single"] = MaskLoadSingle,
                 ["MaskStore.Double"] = MaskStoreDouble,
                 ["MaskStore.Single"] = MaskStoreSingle,
                 ["Max.Double"] = MaxDouble,
index 14c3683aea904fc55b9321da051fcd46af06459b..70d8773964dc526b1e6ce1369e2473b8731feb2d 100644 (file)
     <Compile Include="InsertVector128.UInt32.1.Load.cs" />
     <Compile Include="InsertVector128.Int64.1.Load.cs" />
     <Compile Include="InsertVector128.UInt64.1.Load.cs" />
+    <Compile Include="MaskLoad.Int32.cs" />
+    <Compile Include="MaskLoad.UInt32.cs" />
+    <Compile Include="MaskLoad.Int64.cs" />
+    <Compile Include="MaskLoad.UInt64.cs" />
+    <Compile Include="MaskStore.Int32.cs" />
+    <Compile Include="MaskStore.UInt32.cs" />
+    <Compile Include="MaskStore.Int64.cs" />
+    <Compile Include="MaskStore.UInt64.cs" />
     <Compile Include="Max.Byte.cs" />
     <Compile Include="Max.Int16.cs" />
     <Compile Include="Max.Int32.cs" />
index dc8cab48313147940770a2438fedbd9c4124c230..305f65d7c39e62b297d19e8e248b9b6f9ec83909 100644 (file)
     <Compile Include="InsertVector128.UInt32.1.Load.cs" />
     <Compile Include="InsertVector128.Int64.1.Load.cs" />
     <Compile Include="InsertVector128.UInt64.1.Load.cs" />
+    <Compile Include="MaskLoad.Int32.cs" />
+    <Compile Include="MaskLoad.UInt32.cs" />
+    <Compile Include="MaskLoad.Int64.cs" />
+    <Compile Include="MaskLoad.UInt64.cs" />
+    <Compile Include="MaskStore.Int32.cs" />
+    <Compile Include="MaskStore.UInt32.cs" />
+    <Compile Include="MaskStore.Int64.cs" />
+    <Compile Include="MaskStore.UInt64.cs" />
     <Compile Include="Max.Byte.cs" />
     <Compile Include="Max.Int16.cs" />
     <Compile Include="Max.Int32.cs" />
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.Int32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.Int32.cs
new file mode 100644 (file)
index 0000000..7b0c3c2
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadInt32()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadInt32
+    {
+        private struct TestStruct
+        {
+            public Vector256<Int32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadInt32 testClass)
+            {
+                var result = Avx2.MaskLoad((Int32*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data1 = new Int32[Op1ElementCount];
+        private static Int32[] _data2 = new Int32[Op2ElementCount];
+
+        private static Vector256<Int32> _clsVar2;
+
+        private Vector256<Int32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int32, Int32, Int32> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int32, Int32, Int32>(_data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector256<Int32>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                Avx.LoadVector256((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int32*), typeof(Vector256<Int32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int32*)),
+                                        Unsafe.Read<Vector256<Int32>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int32*), typeof(Vector256<Int32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int32*)),
+                                        Avx.LoadVector256((Int32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int32*), typeof(Vector256<Int32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int32*)),
+                                        Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Int32*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Int32*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector256((Int32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Int32*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadInt32();
+            var result = Avx2.MaskLoad((Int32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((Int32*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((Int32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector256<Int32> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] left, Int32[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((right[0] < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((right[i] < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<Int32>(Int32*, Vector256<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.Int64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.Int64.cs
new file mode 100644 (file)
index 0000000..30467b2
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadInt64()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadInt64
+    {
+        private struct TestStruct
+        {
+            public Vector256<Int64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref testStruct._fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadInt64 testClass)
+            {
+                var result = Avx2.MaskLoad((Int64*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int64>>() / sizeof(Int64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int64>>() / sizeof(Int64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int64>>() / sizeof(Int64);
+
+        private static Int64[] _data1 = new Int64[Op1ElementCount];
+        private static Int64[] _data2 = new Int64[Op2ElementCount];
+
+        private static Vector256<Int64> _clsVar2;
+
+        private Vector256<Int64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int64, Int64, Int64> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref _clsVar2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref _fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int64, Int64, Int64>(_data1, _data2, new Int64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector256<Int64>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                Avx.LoadVector256((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int64*), typeof(Vector256<Int64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int64*)),
+                                        Unsafe.Read<Vector256<Int64>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int64*), typeof(Vector256<Int64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int64*)),
+                                        Avx.LoadVector256((Int64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int64*), typeof(Vector256<Int64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int64*)),
+                                        Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Int64*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector256<Int64>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Int64*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector256((Int64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Int64*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadInt64();
+            var result = Avx2.MaskLoad((Int64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((Int64*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((Int64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector256<Int64> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int64[] left, Int64[] right, Int64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((right[0] < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((right[i] < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<Int64>(Int64*, Vector256<Int64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.UInt32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.UInt32.cs
new file mode 100644 (file)
index 0000000..8792e98
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadUInt32()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadUInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadUInt32
+    {
+        private struct TestStruct
+        {
+            public Vector256<UInt32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadUInt32 testClass)
+            {
+                var result = Avx2.MaskLoad((UInt32*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<UInt32>>() / sizeof(UInt32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<UInt32>>() / sizeof(UInt32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<UInt32>>() / sizeof(UInt32);
+
+        private static UInt32[] _data1 = new UInt32[Op1ElementCount];
+        private static UInt32[] _data2 = new UInt32[Op2ElementCount];
+
+        private static Vector256<UInt32> _clsVar2;
+
+        private Vector256<UInt32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadUInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref _clsVar2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadUInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref _fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32>(_data1, _data2, new UInt32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                Avx.LoadVector256((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt32*), typeof(Vector256<UInt32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt32*)),
+                                        Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<UInt32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt32*), typeof(Vector256<UInt32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt32*)),
+                                        Avx.LoadVector256((UInt32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<UInt32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt32*), typeof(Vector256<UInt32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt32*)),
+                                        Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<UInt32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (UInt32*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (UInt32*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector256((UInt32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (UInt32*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadUInt32();
+            var result = Avx2.MaskLoad((UInt32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((UInt32*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((UInt32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector256<UInt32> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt32[] left, UInt32[] right, UInt32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((right[0] & (1U << 31)) != 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((right[i] & (1U << 31)) != 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<UInt32>(UInt32*, Vector256<UInt32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.UInt64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskLoad.UInt64.cs
new file mode 100644 (file)
index 0000000..b32c67b
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadUInt64()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadUInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadUInt64
+    {
+        private struct TestStruct
+        {
+            public Vector256<UInt64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadUInt64 testClass)
+            {
+                var result = Avx2.MaskLoad((UInt64*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<UInt64>>() / sizeof(UInt64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<UInt64>>() / sizeof(UInt64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<UInt64>>() / sizeof(UInt64);
+
+        private static UInt64[] _data1 = new UInt64[Op1ElementCount];
+        private static UInt64[] _data2 = new UInt64[Op2ElementCount];
+
+        private static Vector256<UInt64> _clsVar2;
+
+        private Vector256<UInt64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadUInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref _clsVar2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadUInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref _fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64>(_data1, _data2, new UInt64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                Avx.LoadVector256((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt64*), typeof(Vector256<UInt64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt64*)),
+                                        Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<UInt64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt64*), typeof(Vector256<UInt64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt64*)),
+                                        Avx.LoadVector256((UInt64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<UInt64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt64*), typeof(Vector256<UInt64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt64*)),
+                                        Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<UInt64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (UInt64*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (UInt64*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector256((UInt64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (UInt64*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadUInt64();
+            var result = Avx2.MaskLoad((UInt64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((UInt64*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((UInt64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector256<UInt64> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt64[] left, UInt64[] right, UInt64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((right[0] & (1UL << 63)) != 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((right[i] & (1UL << 63)) != 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<UInt64>(UInt64*, Vector256<UInt64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.Int32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.Int32.cs
new file mode 100644 (file)
index 0000000..4f4720b
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreInt32()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreInt32
+    {
+        private struct TestStruct
+        {
+            public Vector256<Int32> _fld1;
+            public Vector256<Int32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld1), ref Unsafe.As<Int32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreInt32 testClass)
+            {
+                Avx2.MaskStore((Int32*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data1 = new Int32[Op1ElementCount];
+        private static Int32[] _data2 = new Int32[Op2ElementCount];
+
+        private static Vector256<Int32> _clsVar1;
+        private static Vector256<Int32> _clsVar2;
+
+        private Vector256<Int32> _fld1;
+        private Vector256<Int32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int32, Int32, Int32> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar1), ref Unsafe.As<Int32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld1), ref Unsafe.As<Int32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int32, Int32, Int32>(_data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector256<Int32>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<Int32>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                Avx.LoadVector256((Int32*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int32*), typeof(Vector256<Int32>), typeof(Vector256<Int32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int32*)),
+                            Unsafe.Read<Vector256<Int32>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector256<Int32>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int32*), typeof(Vector256<Int32>), typeof(Vector256<Int32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int32*)),
+                            Avx.LoadVector256((Int32*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadVector256((Int32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int32*), typeof(Vector256<Int32>), typeof(Vector256<Int32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int32*)),
+                            Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Avx.LoadVector256((Int32*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadVector256((Int32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreInt32();
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector256<Int32> left, Vector256<Int32> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] left, Int32[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((left[0] < 0) ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((left[i] < 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.Int64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.Int64.cs
new file mode 100644 (file)
index 0000000..30f2fae
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreInt64()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreInt64
+    {
+        private struct TestStruct
+        {
+            public Vector256<Int64> _fld1;
+            public Vector256<Int64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref testStruct._fld1), ref Unsafe.As<Int64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref testStruct._fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreInt64 testClass)
+            {
+                Avx2.MaskStore((Int64*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int64>>() / sizeof(Int64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int64>>() / sizeof(Int64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int64>>() / sizeof(Int64);
+
+        private static Int64[] _data1 = new Int64[Op1ElementCount];
+        private static Int64[] _data2 = new Int64[Op2ElementCount];
+
+        private static Vector256<Int64> _clsVar1;
+        private static Vector256<Int64> _clsVar2;
+
+        private Vector256<Int64> _fld1;
+        private Vector256<Int64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int64, Int64, Int64> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref _clsVar1), ref Unsafe.As<Int64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref _clsVar2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref _fld1), ref Unsafe.As<Int64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int64>, byte>(ref _fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int64, Int64, Int64>(_data1, _data2, new Int64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector256<Int64>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<Int64>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                Avx.LoadVector256((Int64*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int64*), typeof(Vector256<Int64>), typeof(Vector256<Int64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int64*)),
+                            Unsafe.Read<Vector256<Int64>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector256<Int64>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int64*), typeof(Vector256<Int64>), typeof(Vector256<Int64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int64*)),
+                            Avx.LoadVector256((Int64*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadVector256((Int64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int64*), typeof(Vector256<Int64>), typeof(Vector256<Int64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int64*)),
+                            Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector256<Int64>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector256<Int64>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Avx.LoadVector256((Int64*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadVector256((Int64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadAlignedVector256((Int64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreInt64();
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector256<Int64> left, Vector256<Int64> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int64[] left, Int64[] right, Int64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((left[0] < 0) ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((left[i] < 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<Int64>(Vector256<Int64>, Vector256<Int64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.UInt32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.UInt32.cs
new file mode 100644 (file)
index 0000000..28a5480
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreUInt32()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreUInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreUInt32
+    {
+        private struct TestStruct
+        {
+            public Vector256<UInt32> _fld1;
+            public Vector256<UInt32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref testStruct._fld1), ref Unsafe.As<UInt32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreUInt32 testClass)
+            {
+                Avx2.MaskStore((UInt32*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<UInt32>>() / sizeof(UInt32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<UInt32>>() / sizeof(UInt32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<UInt32>>() / sizeof(UInt32);
+
+        private static UInt32[] _data1 = new UInt32[Op1ElementCount];
+        private static UInt32[] _data2 = new UInt32[Op2ElementCount];
+
+        private static Vector256<UInt32> _clsVar1;
+        private static Vector256<UInt32> _clsVar2;
+
+        private Vector256<UInt32> _fld1;
+        private Vector256<UInt32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreUInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref _clsVar1), ref Unsafe.As<UInt32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref _clsVar2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreUInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref _fld1), ref Unsafe.As<UInt32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt32>, byte>(ref _fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32>(_data1, _data2, new UInt32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                Avx.LoadVector256((UInt32*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt32*), typeof(Vector256<UInt32>), typeof(Vector256<UInt32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt32*)),
+                            Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt32*), typeof(Vector256<UInt32>), typeof(Vector256<UInt32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt32*)),
+                            Avx.LoadVector256((UInt32*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadVector256((UInt32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt32*), typeof(Vector256<UInt32>), typeof(Vector256<UInt32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt32*)),
+                            Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector256<UInt32>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Avx.LoadVector256((UInt32*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadVector256((UInt32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadAlignedVector256((UInt32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreUInt32();
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector256<UInt32> left, Vector256<UInt32> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt32[] left, UInt32[] right, UInt32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((left[0] & (1U << 31)) != 0)  ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((left[i] & (1U << 31)) != 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<UInt32>(Vector256<UInt32>, Vector256<UInt32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.UInt64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2/MaskStore.UInt64.cs
new file mode 100644 (file)
index 0000000..1beecb6
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreUInt64()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreUInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreUInt64
+    {
+        private struct TestStruct
+        {
+            public Vector256<UInt64> _fld1;
+            public Vector256<UInt64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref testStruct._fld1), ref Unsafe.As<UInt64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreUInt64 testClass)
+            {
+                Avx2.MaskStore((UInt64*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<UInt64>>() / sizeof(UInt64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<UInt64>>() / sizeof(UInt64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<UInt64>>() / sizeof(UInt64);
+
+        private static UInt64[] _data1 = new UInt64[Op1ElementCount];
+        private static UInt64[] _data2 = new UInt64[Op2ElementCount];
+
+        private static Vector256<UInt64> _clsVar1;
+        private static Vector256<UInt64> _clsVar2;
+
+        private Vector256<UInt64> _fld1;
+        private Vector256<UInt64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreUInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref _clsVar1), ref Unsafe.As<UInt64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref _clsVar2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreUInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref _fld1), ref Unsafe.As<UInt64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<UInt64>, byte>(ref _fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64>(_data1, _data2, new UInt64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                Avx.LoadVector256((UInt64*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt64*), typeof(Vector256<UInt64>), typeof(Vector256<UInt64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt64*)),
+                            Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt64*), typeof(Vector256<UInt64>), typeof(Vector256<UInt64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt64*)),
+                            Avx.LoadVector256((UInt64*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadVector256((UInt64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt64*), typeof(Vector256<UInt64>), typeof(Vector256<UInt64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt64*)),
+                            Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray1Ptr)),
+                            Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector256<UInt64>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Avx.LoadVector256((UInt64*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadVector256((UInt64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray1Ptr));
+            var right = Avx.LoadAlignedVector256((UInt64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreUInt64();
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector256<UInt64> left, Vector256<UInt64> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt64[] left, UInt64[] right, UInt64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((left[0] & (1UL << 63)) != 0)  ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((left[i] & (1UL << 63)) != 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<UInt64>(Vector256<UInt64>, Vector256<UInt64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
index 3dd3790cf30aaa8e9b54c1c35d5d06bff48daa22..d8a8a3043e27ec4a0648665f6a85d3e17553e2d6 100644 (file)
@@ -127,6 +127,14 @@ namespace JIT.HardwareIntrinsics.X86
                 ["InsertVector128.UInt32.1.Load"] = InsertVector128UInt321Load,
                 ["InsertVector128.Int64.1.Load"] = InsertVector128Int641Load,
                 ["InsertVector128.UInt64.1.Load"] = InsertVector128UInt641Load,
+                ["MaskLoad.Int32"] = MaskLoadInt32,
+                ["MaskLoad.UInt32"] = MaskLoadUInt32,
+                ["MaskLoad.Int64"] = MaskLoadInt64,
+                ["MaskLoad.UInt64"] = MaskLoadUInt64,
+                ["MaskStore.Int32"] = MaskStoreInt32,
+                ["MaskStore.UInt32"] = MaskStoreUInt32,
+                ["MaskStore.Int64"] = MaskStoreInt64,
+                ["MaskStore.UInt64"] = MaskStoreUInt64,
                 ["Max.Int16"] = MaxInt16,
                 ["Max.Byte"] = MaxByte,
                 ["Max.Int32"] = MaxInt32,
index 99106cda7f2d9e2fedb2dd724b53566bc54a330d..68aabac11a43a8b257941b761ede1188bff8a9ca 100644 (file)
     <Compile Include="Blend.UInt32.2.cs" />
     <Compile Include="Blend.UInt32.4.cs" />
     <Compile Include="Blend.UInt32.85.cs" />
+    <Compile Include="MaskLoad.Int32.cs" />
+    <Compile Include="MaskLoad.UInt32.cs" />
+    <Compile Include="MaskLoad.Int64.cs" />
+    <Compile Include="MaskLoad.UInt64.cs" />
+    <Compile Include="MaskStore.Int32.cs" />
+    <Compile Include="MaskStore.UInt32.cs" />
+    <Compile Include="MaskStore.Int64.cs" />
+    <Compile Include="MaskStore.UInt64.cs" />
     <Compile Include="Program.Avx2_Vector128.cs" />
     <Compile Include="..\Shared\Program.cs" />
     <Compile Include="..\Shared\SimpleBinOpTest_DataTable.cs" />
index ae8924c4341a68199ff77809ebe3e5786234c8e8..fc317461f844e0399c079baa124044843354c395 100644 (file)
     <Compile Include="Blend.UInt32.2.cs" />
     <Compile Include="Blend.UInt32.4.cs" />
     <Compile Include="Blend.UInt32.85.cs" />
+    <Compile Include="MaskLoad.Int32.cs" />
+    <Compile Include="MaskLoad.UInt32.cs" />
+    <Compile Include="MaskLoad.Int64.cs" />
+    <Compile Include="MaskLoad.UInt64.cs" />
+    <Compile Include="MaskStore.Int32.cs" />
+    <Compile Include="MaskStore.UInt32.cs" />
+    <Compile Include="MaskStore.Int64.cs" />
+    <Compile Include="MaskStore.UInt64.cs" />
     <Compile Include="Program.Avx2_Vector128.cs" />
     <Compile Include="..\Shared\Program.cs" />
     <Compile Include="..\Shared\SimpleBinOpTest_DataTable.cs" />
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.Int32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.Int32.cs
new file mode 100644 (file)
index 0000000..ad07ffe
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadInt32()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadInt32
+    {
+        private struct TestStruct
+        {
+            public Vector128<Int32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadInt32 testClass)
+            {
+                var result = Avx2.MaskLoad((Int32*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data1 = new Int32[Op1ElementCount];
+        private static Int32[] _data2 = new Int32[Op2ElementCount];
+
+        private static Vector128<Int32> _clsVar2;
+
+        private Vector128<Int32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int32, Int32, Int32> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int32, Int32, Int32>(_data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector128<Int32>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                Sse2.LoadVector128((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int32*), typeof(Vector128<Int32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int32*)),
+                                        Unsafe.Read<Vector128<Int32>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int32*), typeof(Vector128<Int32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int32*)),
+                                        Sse2.LoadVector128((Int32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int32*), typeof(Vector128<Int32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int32*)),
+                                        Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (Int32*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Int32*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Int32*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadVector128((Int32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Int32*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadInt32();
+            var result = Avx2.MaskLoad((Int32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((Int32*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((Int32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector128<Int32> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] left, Int32[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((right[0] < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((right[i] < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<Int32>(Int32*, Vector128<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.Int64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.Int64.cs
new file mode 100644 (file)
index 0000000..f81bbf7
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadInt64()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadInt64
+    {
+        private struct TestStruct
+        {
+            public Vector128<Int64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref testStruct._fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadInt64 testClass)
+            {
+                var result = Avx2.MaskLoad((Int64*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int64>>() / sizeof(Int64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int64>>() / sizeof(Int64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int64>>() / sizeof(Int64);
+
+        private static Int64[] _data1 = new Int64[Op1ElementCount];
+        private static Int64[] _data2 = new Int64[Op2ElementCount];
+
+        private static Vector128<Int64> _clsVar2;
+
+        private Vector128<Int64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int64, Int64, Int64> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _clsVar2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int64, Int64, Int64>(_data1, _data2, new Int64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector128<Int64>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                Sse2.LoadVector128((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int64*), typeof(Vector128<Int64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int64*)),
+                                        Unsafe.Read<Vector128<Int64>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int64*), typeof(Vector128<Int64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int64*)),
+                                        Sse2.LoadVector128((Int64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(Int64*), typeof(Vector128<Int64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Int64*)),
+                                        Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (Int64*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Int64*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector128<Int64>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Int64*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadVector128((Int64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Int64*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadInt64();
+            var result = Avx2.MaskLoad((Int64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((Int64*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((Int64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector128<Int64> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int64[] left, Int64[] right, Int64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((right[0] < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((right[i] < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<Int64>(Int64*, Vector128<Int64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.UInt32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.UInt32.cs
new file mode 100644 (file)
index 0000000..a518d52
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadUInt32()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadUInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadUInt32
+    {
+        private struct TestStruct
+        {
+            public Vector128<UInt32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadUInt32 testClass)
+            {
+                var result = Avx2.MaskLoad((UInt32*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<UInt32>>() / sizeof(UInt32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<UInt32>>() / sizeof(UInt32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<UInt32>>() / sizeof(UInt32);
+
+        private static UInt32[] _data1 = new UInt32[Op1ElementCount];
+        private static UInt32[] _data2 = new UInt32[Op2ElementCount];
+
+        private static Vector128<UInt32> _clsVar2;
+
+        private Vector128<UInt32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadUInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _clsVar2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadUInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32>(_data1, _data2, new UInt32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                Sse2.LoadVector128((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt32*), typeof(Vector128<UInt32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt32*)),
+                                        Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<UInt32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt32*), typeof(Vector128<UInt32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt32*)),
+                                        Sse2.LoadVector128((UInt32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<UInt32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt32*), typeof(Vector128<UInt32>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt32*)),
+                                        Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<UInt32>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (UInt32*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (UInt32*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (UInt32*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadVector128((UInt32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (UInt32*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadUInt32();
+            var result = Avx2.MaskLoad((UInt32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((UInt32*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((UInt32*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector128<UInt32> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt32[] left, UInt32[] right, UInt32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((right[0] & (1U << 31)) != 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((right[i] & (1U << 31)) != 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<UInt32>(UInt32*, Vector128<UInt32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.UInt64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskLoad.UInt64.cs
new file mode 100644 (file)
index 0000000..ca4e1de
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadUInt64()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadUInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadUInt64
+    {
+        private struct TestStruct
+        {
+            public Vector128<UInt64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadUInt64 testClass)
+            {
+                var result = Avx2.MaskLoad((UInt64*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<UInt64>>() / sizeof(UInt64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<UInt64>>() / sizeof(UInt64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<UInt64>>() / sizeof(UInt64);
+
+        private static UInt64[] _data1 = new UInt64[Op1ElementCount];
+        private static UInt64[] _data2 = new UInt64[Op2ElementCount];
+
+        private static Vector128<UInt64> _clsVar2;
+
+        private Vector128<UInt64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadUInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _clsVar2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadUInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64>(_data1, _data2, new UInt64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                Sse2.LoadVector128((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt64*), typeof(Vector128<UInt64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt64*)),
+                                        Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<UInt64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt64*), typeof(Vector128<UInt64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt64*)),
+                                        Sse2.LoadVector128((UInt64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<UInt64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx2).GetMethod(nameof(Avx2.MaskLoad), new Type[] { typeof(UInt64*), typeof(Vector128<UInt64>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(UInt64*)),
+                                        Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<UInt64>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx2.MaskLoad(
+                (UInt64*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (UInt64*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray2Ptr);
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (UInt64*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadVector128((UInt64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (UInt64*)_dataTable.inArray1Ptr;
+            var right = Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray2Ptr));
+            var result = Avx2.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadUInt64();
+            var result = Avx2.MaskLoad((UInt64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx2.MaskLoad((UInt64*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx2.MaskLoad((UInt64*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector128<UInt64> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt64[] left, UInt64[] right, UInt64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((right[0] & (1UL << 63)) != 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((right[i] & (1UL << 63)) != 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskLoad)}<UInt64>(UInt64*, Vector128<UInt64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.Int32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.Int32.cs
new file mode 100644 (file)
index 0000000..33d80c1
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreInt32()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreInt32
+    {
+        private struct TestStruct
+        {
+            public Vector128<Int32> _fld1;
+            public Vector128<Int32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld1), ref Unsafe.As<Int32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreInt32 testClass)
+            {
+                Avx2.MaskStore((Int32*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data1 = new Int32[Op1ElementCount];
+        private static Int32[] _data2 = new Int32[Op2ElementCount];
+
+        private static Vector128<Int32> _clsVar1;
+        private static Vector128<Int32> _clsVar2;
+
+        private Vector128<Int32> _fld1;
+        private Vector128<Int32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int32, Int32, Int32> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar1), ref Unsafe.As<Int32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld1), ref Unsafe.As<Int32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld2), ref Unsafe.As<Int32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int32, Int32, Int32>(_data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector128<Int32>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Int32>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                Sse2.LoadVector128((Int32*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadVector128((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int32*), typeof(Vector128<Int32>), typeof(Vector128<Int32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int32*)),
+                            Unsafe.Read<Vector128<Int32>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector128<Int32>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int32*), typeof(Vector128<Int32>), typeof(Vector128<Int32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int32*)),
+                            Sse2.LoadVector128((Int32*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadVector128((Int32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int32*), typeof(Vector128<Int32>), typeof(Vector128<Int32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int32*)),
+                            Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (Int32*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Sse2.LoadVector128((Int32*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadVector128((Int32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadAlignedVector128((Int32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreInt32();
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((Int32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Int32> left, Vector128<Int32> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray1 = new Int32[Op1ElementCount];
+            Int32[] inArray2 = new Int32[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] left, Int32[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((left[0] < 0) ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((left[i] < 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.Int64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.Int64.cs
new file mode 100644 (file)
index 0000000..d731f8b
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreInt64()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreInt64
+    {
+        private struct TestStruct
+        {
+            public Vector128<Int64> _fld1;
+            public Vector128<Int64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref testStruct._fld1), ref Unsafe.As<Int64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref testStruct._fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreInt64 testClass)
+            {
+                Avx2.MaskStore((Int64*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int64>>() / sizeof(Int64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int64>>() / sizeof(Int64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int64>>() / sizeof(Int64);
+
+        private static Int64[] _data1 = new Int64[Op1ElementCount];
+        private static Int64[] _data2 = new Int64[Op2ElementCount];
+
+        private static Vector128<Int64> _clsVar1;
+        private static Vector128<Int64> _clsVar2;
+
+        private Vector128<Int64> _fld1;
+        private Vector128<Int64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Int64, Int64, Int64> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _clsVar1), ref Unsafe.As<Int64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _clsVar2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _fld1), ref Unsafe.As<Int64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _fld2), ref Unsafe.As<Int64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Int64, Int64, Int64>(_data1, _data2, new Int64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector128<Int64>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Int64>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                Sse2.LoadVector128((Int64*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadVector128((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int64*), typeof(Vector128<Int64>), typeof(Vector128<Int64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int64*)),
+                            Unsafe.Read<Vector128<Int64>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector128<Int64>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int64*), typeof(Vector128<Int64>), typeof(Vector128<Int64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int64*)),
+                            Sse2.LoadVector128((Int64*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadVector128((Int64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(Int64*), typeof(Vector128<Int64>), typeof(Vector128<Int64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(Int64*)),
+                            Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (Int64*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector128<Int64>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Int64>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Sse2.LoadVector128((Int64*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadVector128((Int64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadAlignedVector128((Int64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreInt64();
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((Int64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Int64> left, Vector128<Int64> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int64[] inArray1 = new Int64[Op1ElementCount];
+            Int64[] inArray2 = new Int64[Op2ElementCount];
+            Int64[] outArray = new Int64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int64[] left, Int64[] right, Int64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != ((left[0] < 0) ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != ((left[i] < 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<Int64>(Vector128<Int64>, Vector128<Int64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.UInt32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.UInt32.cs
new file mode 100644 (file)
index 0000000..6d30d3c
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreUInt32()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreUInt32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreUInt32
+    {
+        private struct TestStruct
+        {
+            public Vector128<UInt32> _fld1;
+            public Vector128<UInt32> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref testStruct._fld1), ref Unsafe.As<UInt32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreUInt32 testClass)
+            {
+                Avx2.MaskStore((UInt32*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<UInt32>>() / sizeof(UInt32);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<UInt32>>() / sizeof(UInt32);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<UInt32>>() / sizeof(UInt32);
+
+        private static UInt32[] _data1 = new UInt32[Op1ElementCount];
+        private static UInt32[] _data2 = new UInt32[Op2ElementCount];
+
+        private static Vector128<UInt32> _clsVar1;
+        private static Vector128<UInt32> _clsVar2;
+
+        private Vector128<UInt32> _fld1;
+        private Vector128<UInt32> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreUInt32()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _clsVar1), ref Unsafe.As<UInt32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _clsVar2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreUInt32()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _fld1), ref Unsafe.As<UInt32, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _fld2), ref Unsafe.As<UInt32, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt32(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt32(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt32, UInt32, UInt32>(_data1, _data2, new UInt32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                Sse2.LoadVector128((UInt32*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadVector128((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt32*), typeof(Vector128<UInt32>), typeof(Vector128<UInt32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt32*)),
+                            Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt32*), typeof(Vector128<UInt32>), typeof(Vector128<UInt32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt32*)),
+                            Sse2.LoadVector128((UInt32*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadVector128((UInt32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt32*), typeof(Vector128<UInt32>), typeof(Vector128<UInt32>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt32*)),
+                            Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (UInt32*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<UInt32>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Sse2.LoadVector128((UInt32*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadVector128((UInt32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadAlignedVector128((UInt32*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreUInt32();
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((UInt32*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<UInt32> left, Vector128<UInt32> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt32[] inArray1 = new UInt32[Op1ElementCount];
+            UInt32[] inArray2 = new UInt32[Op2ElementCount];
+            UInt32[] outArray = new UInt32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt32[] left, UInt32[] right, UInt32[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((left[0] & (1U << 31)) != 0) ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((left[i] & (1U << 31)) != 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<UInt32>(Vector128<UInt32>, Vector128<UInt32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.UInt64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx2_Vector128/MaskStore.UInt64.cs
new file mode 100644 (file)
index 0000000..c193270
--- /dev/null
@@ -0,0 +1,395 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Reflection;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskStoreUInt64()
+        {
+            var test = new StoreBinaryOpTest__MaskStoreUInt64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse2.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class StoreBinaryOpTest__MaskStoreUInt64
+    {
+        private struct TestStruct
+        {
+            public Vector128<UInt64> _fld1;
+            public Vector128<UInt64> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref testStruct._fld1), ref Unsafe.As<UInt64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref testStruct._fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(StoreBinaryOpTest__MaskStoreUInt64 testClass)
+            {
+                Avx2.MaskStore((UInt64*)testClass._dataTable.outArrayPtr, _fld1, _fld2);
+                testClass.ValidateResult(_fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<UInt64>>() / sizeof(UInt64);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<UInt64>>() / sizeof(UInt64);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<UInt64>>() / sizeof(UInt64);
+
+        private static UInt64[] _data1 = new UInt64[Op1ElementCount];
+        private static UInt64[] _data2 = new UInt64[Op2ElementCount];
+
+        private static Vector128<UInt64> _clsVar1;
+        private static Vector128<UInt64> _clsVar2;
+
+        private Vector128<UInt64> _fld1;
+        private Vector128<UInt64> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64> _dataTable;
+
+        static StoreBinaryOpTest__MaskStoreUInt64()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _clsVar1), ref Unsafe.As<UInt64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _clsVar2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+        }
+
+        public StoreBinaryOpTest__MaskStoreUInt64()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _fld1), ref Unsafe.As<UInt64, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _fld2), ref Unsafe.As<UInt64, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetUInt64(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetUInt64(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<UInt64, UInt64, UInt64>(_data1, _data2, new UInt64[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx2.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray2Ptr)
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                Sse2.LoadVector128((UInt64*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadVector128((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray1Ptr)),
+                Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray2Ptr))
+            );
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt64*), typeof(Vector128<UInt64>), typeof(Vector128<UInt64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt64*)),
+                            Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray1Ptr),
+                            Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray2Ptr)
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt64*), typeof(Vector128<UInt64>), typeof(Vector128<UInt64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt64*)),
+                            Sse2.LoadVector128((UInt64*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadVector128((UInt64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            typeof(Avx2).GetMethod(nameof(Avx2.MaskStore), new Type[] { typeof(UInt64*), typeof(Vector128<UInt64>), typeof(Vector128<UInt64>) })
+                         .Invoke(null, new object[] {
+                            Pointer.Box(_dataTable.outArrayPtr, typeof(UInt64*)),
+                            Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray1Ptr)),
+                            Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray2Ptr))
+                         });
+
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            Avx2.MaskStore(
+                (UInt64*)_dataTable.outArrayPtr,
+                _clsVar1,
+                _clsVar2
+            );
+
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<UInt64>>(_dataTable.inArray2Ptr);
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = Sse2.LoadVector128((UInt64*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadVector128((UInt64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray1Ptr));
+            var right = Sse2.LoadAlignedVector128((UInt64*)(_dataTable.inArray2Ptr));
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, left, right);
+
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new StoreBinaryOpTest__MaskStoreUInt64();
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, _fld1, _fld2);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            Avx2.MaskStore((UInt64*)_dataTable.outArrayPtr, test._fld1, test._fld2);
+
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<UInt64> left, Vector128<UInt64> right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            UInt64[] inArray1 = new UInt64[Op1ElementCount];
+            UInt64[] inArray2 = new UInt64[Op2ElementCount];
+            UInt64[] outArray = new UInt64[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<UInt64, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(UInt64[] left, UInt64[] right, UInt64[] result, [CallerMemberName] string method = "")
+        {
+            if (result[0] != (((left[0] & (1UL << 63))  != 0) ? right[0] : result[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (result[i] != (((left[i] & (1UL << 63)) != 0) ? right[i] : result[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx2)}.{nameof(Avx2.MaskStore)}<UInt64>(Vector128<UInt64>, Vector128<UInt64>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
index 3253b8b271939790cf19c3b3ce6d72295849da7d..fe6d657b1b270ab20960877b8795497e02968df6 100644 (file)
@@ -20,6 +20,14 @@ namespace JIT.HardwareIntrinsics.X86
                 ["Blend.UInt32.2"] = BlendUInt322,
                 ["Blend.UInt32.4"] = BlendUInt324,
                 ["Blend.UInt32.85"] = BlendUInt3285,
+                ["MaskLoad.Int32"] = MaskLoadInt32,
+                ["MaskLoad.UInt32"] = MaskLoadUInt32,
+                ["MaskLoad.Int64"] = MaskLoadInt64,
+                ["MaskLoad.UInt64"] = MaskLoadUInt64,
+                ["MaskStore.Int32"] = MaskStoreInt32,
+                ["MaskStore.UInt32"] = MaskStoreUInt32,
+                ["MaskStore.Int64"] = MaskStoreInt64,
+                ["MaskStore.UInt64"] = MaskStoreUInt64,
             };
         }
     }
index 28573c30c782bde2428b1defca8f4e1e576c66d6..0c01a676c2c86eee14231ed550386c432d2d6fdf 100644 (file)
@@ -27,6 +27,8 @@
     <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="MaskLoad.Double.cs" />
+    <Compile Include="MaskLoad.Single.cs" />
     <Compile Include="MaskStore.Double.cs" />
     <Compile Include="MaskStore.Single.cs" />
     <Compile Include="Program.Avx_Vector128.cs" />
index 74d9978a5b7216c97a42755090ac1da4451294e5..ab44c158a83f4bce43e718ac6a6113ec0f522d59 100644 (file)
@@ -27,6 +27,8 @@
     <Service Include="{82A7F48D-3B50-4B1E-B82E-3ADA8210C358}" />
   </ItemGroup>
   <ItemGroup>
+    <Compile Include="MaskLoad.Double.cs" />
+    <Compile Include="MaskLoad.Single.cs" />
     <Compile Include="MaskStore.Double.cs" />
     <Compile Include="MaskStore.Single.cs" />
     <Compile Include="Program.Avx_Vector128.cs" />
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskLoad.Double.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskLoad.Double.cs
new file mode 100644 (file)
index 0000000..1a8806c
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadDouble()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadDouble();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadDouble
+    {
+        private struct TestStruct
+        {
+            public Vector128<Double> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Double>, byte>(ref testStruct._fld2), ref Unsafe.As<Double, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Double>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadDouble testClass)
+            {
+                var result = Avx.MaskLoad((Double*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Double>>() / sizeof(Double);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Double>>() / sizeof(Double);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Double>>() / sizeof(Double);
+
+        private static Double[] _data1 = new Double[Op1ElementCount];
+        private static Double[] _data2 = new Double[Op2ElementCount];
+
+        private static Vector128<Double> _clsVar2;
+
+        private Vector128<Double> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Double, Double, Double> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadDouble()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Double>, byte>(ref _clsVar2), ref Unsafe.As<Double, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Double>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadDouble()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Double>, byte>(ref _fld2), ref Unsafe.As<Double, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Double>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetDouble(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetDouble(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Double, Double, Double>(_data1, _data2, new Double[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector128<Double>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                Avx.LoadVector128((Double*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector128((Double*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Double*), typeof(Vector128<Double>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Double*)),
+                                        Unsafe.Read<Vector128<Double>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Double>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Double*), typeof(Vector128<Double>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Double*)),
+                                        Avx.LoadVector128((Double*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Double>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Double*), typeof(Vector128<Double>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Double*)),
+                                        Avx.LoadAlignedVector128((Double*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Double>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx.MaskLoad(
+                (Double*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Double*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector128<Double>>(_dataTable.inArray2Ptr);
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Double*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector128((Double*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Double*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector128((Double*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadDouble();
+            var result = Avx.MaskLoad((Double*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx.MaskLoad((Double*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx.MaskLoad((Double*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector128<Double> right, void* result, [CallerMemberName] string method = "")
+        {
+            Double[] inArray1 = new Double[Op1ElementCount];
+            Double[] inArray2 = new Double[Op2ElementCount];
+            Double[] outArray = new Double[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Double>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Double, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Double>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Double[] inArray1 = new Double[Op1ElementCount];
+            Double[] inArray2 = new Double[Op2ElementCount];
+            Double[] outArray = new Double[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Double>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Double>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Double, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Double>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Double[] left, Double[] right, Double[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[0]) < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[i]) < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx)}.{nameof(Avx.MaskLoad)}<Double>(Double*, Vector128<Double>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskLoad.Single.cs b/tests/src/JIT/HardwareIntrinsics/X86/Avx_Vector128/MaskLoad.Single.cs
new file mode 100644 (file)
index 0000000..e924907
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MaskLoadSingle()
+        {
+            var test = new SimpleBinaryOpTest__MaskLoadSingle();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__MaskLoadSingle
+    {
+        private struct TestStruct
+        {
+            public Vector128<Single> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref testStruct._fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__MaskLoadSingle testClass)
+            {
+                var result = Avx.MaskLoad((Single*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__MaskLoadSingle()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__MaskLoadSingle()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetSingle(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSingle(); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Avx.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                Avx.LoadVector128((Single*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                Avx.LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Single*), typeof(Vector128<Single>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Single*)),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Single*), typeof(Vector128<Single>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Single*)),
+                                        Avx.LoadVector128((Single*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(Avx).GetMethod(nameof(Avx.MaskLoad), new Type[] { typeof(Single*), typeof(Vector128<Single>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof(Single*)),
+                                        Avx.LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = Avx.MaskLoad(
+                (Single*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = (Single*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = (Single*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = (Single*)_dataTable.inArray1Ptr;
+            var right = Avx.LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Avx.MaskLoad(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__MaskLoadSingle();
+            var result = Avx.MaskLoad((Single*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = Avx.MaskLoad((Single*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = Avx.MaskLoad((Single*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[0]) < 0) ? left[0] : 0))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[i]) < 0) ? left[i] : 0))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(Avx)}.{nameof(Avx.MaskLoad)}<Single>(Single*, Vector128<Single>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}
index 629d26ef016172213fe5aeec89d8d028b13f02fb..64a8757f3e6c9d447f45b61ab72f914c25c834db 100644 (file)
@@ -366,7 +366,7 @@ namespace JIT.HardwareIntrinsics.X86
 
         private void ValidateResult(Double[] left, Double[] right, Double[] result, [CallerMemberName] string method = "")
         {
-            if (BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : 0))
+            if (BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : BitConverter.DoubleToInt64Bits(result[0])))
             {
                 Succeeded = false;
             }
@@ -374,7 +374,7 @@ namespace JIT.HardwareIntrinsics.X86
             {
                 for (var i = 1; i < RetElementCount; i++)
                 {
-                    if (BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : 0))
+                    if (BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : BitConverter.DoubleToInt64Bits(result[i])))
                     {
                         Succeeded = false;
                         break;
index 77c5ba369d6807ddefa3f2919c2d86d599afd410..9ac35daabf3c557393b59e26f6505c96c35408a3 100644 (file)
@@ -366,7 +366,7 @@ namespace JIT.HardwareIntrinsics.X86
 
         private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
         {
-            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : 0))
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : BitConverter.SingleToInt32Bits(result[0])))
             {
                 Succeeded = false;
             }
@@ -374,7 +374,7 @@ namespace JIT.HardwareIntrinsics.X86
             {
                 for (var i = 1; i < RetElementCount; i++)
                 {
-                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : 0))
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : BitConverter.SingleToInt32Bits(result[i])))
                     {
                         Succeeded = false;
                         break;
index 6455947621606cd4fbcb05333ed44d7b70482b5f..cdeba98559a66ae291abfdb5b0ed15afefa9f894 100644 (file)
@@ -12,6 +12,8 @@ namespace JIT.HardwareIntrinsics.X86
         static Program()
         {
             TestList = new Dictionary<string, Action>() {
+                ["MaskLoad.Double"] = MaskLoadDouble,
+                ["MaskLoad.Single"] = MaskLoadSingle,
                 ["MaskStore.Double"] = MaskStoreDouble,
                 ["MaskStore.Single"] = MaskStoreSingle,
             };
index 61f5fe3680fce0ca725fca88ca1dafd88d67c379..fadec1e7cbf3be3fc40bc79cef778fc816b2fd3a 100644 (file)
@@ -510,8 +510,10 @@ private static readonly (string templateFileName, Dictionary<string, string> tem
     ("GenericUnOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "GetLowerHalf",            ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt16",                                                                                                                                                                    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt16()",                                                                                                                     ["ValidateFirstResult"] = "firstOp[0] != result[0]",                                                                                                                                                                                                                            ["ValidateRemainingResults"] = "result[i] != ((i < (Op1ElementCount / 2)) ? firstOp[i] : 0)"}),
     ("GenericUnOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "GetLowerHalf",            ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32",                                                                                                                                                                    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",                                                                                                                     ["ValidateFirstResult"] = "firstOp[0] != result[0]",                                                                                                                                                                                                                            ["ValidateRemainingResults"] = "result[i] != ((i < (Op1ElementCount / 2)) ? firstOp[i] : 0)"}),
     ("GenericUnOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "GetLowerHalf",            ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64",                                                                                                                                                                    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()",                                                                                                                     ["ValidateFirstResult"] = "firstOp[0] != result[0]",                                                                                                                                                                                                                            ["ValidateRemainingResults"] = "result[i] != ((i < (Op1ElementCount / 2)) ? firstOp[i] : 0)"}),
-    ("StoreBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore",               ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : 0)",                                                                                                          ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : 0)"}),
-    ("StoreBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore",               ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                             ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : 0)",                                                                                                          ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : 0)"}),
+    ("LoadBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskLoad",                ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[0]) < 0) ? left[0] : 0)",                                                                                                          ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[i]) < 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskLoad",                ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                             ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[0]) < 0) ? left[0] : 0)",                                                                                                          ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[i]) < 0) ? left[i] : 0)"}),
+    ("StoreBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore",               ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : BitConverter.DoubleToInt64Bits(result[0]))",                                                                  ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : BitConverter.DoubleToInt64Bits(result[i]))"}),
+    ("StoreBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore",               ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                             ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : BitConverter.SingleToInt32Bits(result[0]))",                                                                  ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : BitConverter.SingleToInt32Bits(result[i]))"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Max",                     ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(Math.Max(left[0], right[0])) != BitConverter.DoubleToInt64Bits(result[0])",                                                                                                                                           ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(Math.Max(left[i], right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Max",                     ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Single",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()",                                                             ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(MathF.Max(left[0], right[0])) != BitConverter.SingleToInt32Bits(result[0])",                                                                                                                                          ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(MathF.Max(left[i], right[i])) != BitConverter.SingleToInt32Bits(result[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "Min",                     ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Double",                                                                                                       ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()",                                                             ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(Math.Min(left[0], right[0])) != BitConverter.DoubleToInt64Bits(result[0])",                                                                                                                                           ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(Math.Min(left[i], right[i])) != BitConverter.DoubleToInt64Bits(result[i])"}),
@@ -570,8 +572,10 @@ private static readonly (string templateFileName, Dictionary<string, string> tem
 
 private static readonly (string templateFileName, Dictionary<string, string> templateData)[] Avx_Vector128Inputs = new []
 {
-    ("StoreBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : 0)", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : 0)"}),
-    ("StoreBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : 0)", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : 0)"}),
+    ("LoadBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskLoad",                ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[0]) < 0) ? left[0] : 0)",                                         ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(right[i]) < 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskLoad",                ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[0]) < 0) ? left[0] : 0)",                                         ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(right[i]) < 0) ? left[i] : 0)"}),
+    ("StoreBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore",               ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetDouble()", ["NextValueOp2"] = "TestLibrary.Generator.GetDouble()", ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[0]) < 0) ? right[0] : BitConverter.DoubleToInt64Bits(result[0]))", ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits((BitConverter.DoubleToInt64Bits(left[i]) < 0) ? right[i] : BitConverter.DoubleToInt64Bits(result[i]))"}),
+    ("StoreBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx", ["LoadIsa"] = "Avx", ["Method"] = "MaskStore",               ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetSingle()", ["NextValueOp2"] = "TestLibrary.Generator.GetSingle()", ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[0]) < 0) ? right[0] : BitConverter.SingleToInt32Bits(result[0]))", ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((BitConverter.SingleToInt32Bits(left[i]) < 0) ? right[i] : BitConverter.SingleToInt32Bits(result[i]))"}),
 };
 
 private static readonly (string templateFileName, Dictionary<string, string> templateData)[] Avx2Inputs = new []
@@ -691,6 +695,14 @@ private static readonly (string templateFileName, Dictionary<string, string> tem
     ("InsertLoadTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "InsertVector128",             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                                                            ["Imm"] = "1",    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                          ["ValidateFirstResult"] = "result[0] != left[0]",                                                                    ["ValidateRemainingResults"] = "(i > 3 ? result[i] != right[i - 4] : result[i] != left[i])"}),
     ("InsertLoadTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "InsertVector128",             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                                                             ["Imm"] = "1",    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                           ["ValidateFirstResult"] = "result[0] != left[0]",                                                                    ["ValidateRemainingResults"] = "(i > 1 ? result[i] != right[i - 2] : result[i] != left[i])"}),
     ("InsertLoadTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "InsertVector128",             ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                                                            ["Imm"] = "1",    ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                          ["ValidateFirstResult"] = "result[0] != left[0]",                                                                    ["ValidateRemainingResults"] = "(i > 1 ? result[i] != right[i - 2] : result[i] != left[i])"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32",                                                                               ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                           ["ValidateFirstResult"] = "result[0] != ((right[0] < 0) ? left[0] : 0)",                                             ["ValidateRemainingResults"] = "result[i] != ((right[i] < 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32",                                                                              ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                          ["ValidateFirstResult"] = "result[0] != (((right[0] & (1U << 31)) != 0) ? left[0] : 0)",                             ["ValidateRemainingResults"] = "result[i] != (((right[i] & (1U << 31)) != 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64",                                                                               ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                           ["ValidateFirstResult"] = "result[0] != ((right[0] < 0) ? left[0] : 0)",                                             ["ValidateRemainingResults"] = "result[i] != ((right[i] < 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64",                                                                              ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                          ["ValidateFirstResult"] = "result[0] != (((right[0] & (1UL << 63)) != 0) ? left[0] : 0)",                            ["ValidateRemainingResults"] = "result[i] != (((right[i] & (1UL << 63)) != 0) ? left[i] : 0)"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32",                                                                               ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                           ["ValidateFirstResult"] = "result[0] != ((left[0] < 0) ? right[0] : result[0])",                                     ["ValidateRemainingResults"] = "result[i] != ((left[i] < 0) ? right[i] : result[i])"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt32",                                                                              ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()",                                                         ["ValidateFirstResult"] = "result[0] != (((left[0] & (1U << 31)) != 0)  ? right[0] : result[0])",                    ["ValidateRemainingResults"] = "result[i] != (((left[i] & (1U << 31)) != 0) ? right[i] : result[i])"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int64",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int64",                                                                               ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",                                                            ["ValidateFirstResult"] = "result[0] != ((left[0] < 0) ? right[0] : result[0])",                                     ["ValidateRemainingResults"] = "result[i] != ((left[i] < 0) ? right[i] : result[i])"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector256", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "UInt64",                                                                              ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()",                                                          ["ValidateFirstResult"] = "result[0] != (((left[0] & (1UL << 63)) != 0)  ? right[0] : result[0])",                   ["ValidateRemainingResults"] = "result[i] != (((left[i] & (1UL << 63)) != 0) ? right[i] : result[i])"}),
     ("SimpleBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "Max",                         ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int16",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int16",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int16",                                                                               ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt16()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt16()",                                                           ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
     ("SimpleBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "Max",                         ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Byte",   ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Byte",   ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Byte",                                                                                ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetByte()",   ["NextValueOp2"] = "TestLibrary.Generator.GetByte()",                                                            ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
     ("SimpleBinOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Avx",  ["Method"] = "Max",                         ["RetVectorType"] = "Vector256", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector256", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector256", ["Op2BaseType"] = "Int32",                                                                               ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",                                                           ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
@@ -779,14 +791,23 @@ private static readonly (string templateFileName, Dictionary<string, string> tem
 
 private static readonly (string templateFileName, Dictionary<string, string> templateData)[] Avx2_Vector128Inputs = new []
 {
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "1",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((1 & (1 << 0)) == 0) ? left[0] : right[0])",  ["ValidateRemainingResults"] = "result[i] != (((1 & (1 << i)) == 0) ? left[i] : right[i])"}),
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "2",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((2 & (1 << 0)) == 0) ? left[0] : right[0])",  ["ValidateRemainingResults"] = "result[i] != (((2 & (1 << i)) == 0) ? left[i] : right[i])"}),
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "4",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((4 & (1 << 0)) == 0) ? left[0] : right[0])",  ["ValidateRemainingResults"] = "result[i] != (((4 & (1 << i)) == 0) ? left[i] : right[i])"}),
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "85", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((85 & (1 << 0)) == 0) ? left[0] : right[0])", ["ValidateRemainingResults"] = "result[i] != (((85 & (1 << i)) == 0) ? left[i] : right[i])"}),
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "1",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((1 & (1 << 0)) == 0) ? left[0] : right[0])",  ["ValidateRemainingResults"] = "result[i] != (((1 & (1 << i)) == 0) ? left[i] : right[i])"}),
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "2",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((2 & (1 << 0)) == 0) ? left[0] : right[0])",  ["ValidateRemainingResults"] = "result[i] != (((2 & (1 << i)) == 0) ? left[i] : right[i])"}),
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "4",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((4 & (1 << 0)) == 0) ? left[0] : right[0])",  ["ValidateRemainingResults"] = "result[i] != (((4 & (1 << i)) == 0) ? left[i] : right[i])"}),
-    ("ImmBinOpTest.template", new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend", ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "85", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((85 & (1 << 0)) == 0) ? left[0] : right[0])", ["ValidateRemainingResults"] = "result[i] != (((85 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "1",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((1 & (1 << 0)) == 0) ? left[0] : right[0])",                                ["ValidateRemainingResults"] = "result[i] != (((1 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "2",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((2 & (1 << 0)) == 0) ? left[0] : right[0])",                                ["ValidateRemainingResults"] = "result[i] != (((2 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "4",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((4 & (1 << 0)) == 0) ? left[0] : right[0])",                                ["ValidateRemainingResults"] = "result[i] != (((4 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",  ["Imm"] = "85", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != (((85 & (1 << 0)) == 0) ? left[0] : right[0])",                               ["ValidateRemainingResults"] = "result[i] != (((85 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "1",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((1 & (1 << 0)) == 0) ? left[0] : right[0])",                                ["ValidateRemainingResults"] = "result[i] != (((1 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "2",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((2 & (1 << 0)) == 0) ? left[0] : right[0])",                                ["ValidateRemainingResults"] = "result[i] != (((2 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "4",  ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((4 & (1 << 0)) == 0) ? left[0] : right[0])",                                ["ValidateRemainingResults"] = "result[i] != (((4 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("ImmBinOpTest.template",         new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "Blend",                       ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32", ["Imm"] = "85", ["LargestVectorSize"] = "32", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((85 & (1 << 0)) == 0) ? left[0] : right[0])",                               ["ValidateRemainingResults"] = "result[i] != (((85 & (1 << i)) == 0) ? left[i] : right[i])"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != ((right[0] < 0) ? left[0] : 0)",                                              ["ValidateRemainingResults"] = "result[i] != ((right[i] < 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                 ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((right[0] & (1U << 31)) != 0) ? left[0] : 0)",                              ["ValidateRemainingResults"] = "result[i] != (((right[i] & (1U << 31)) != 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",  ["ValidateFirstResult"] = "result[0] != ((right[0] < 0) ? left[0] : 0)",                                              ["ValidateRemainingResults"] = "result[i] != ((right[i] < 0) ? left[i] : 0)"}),
+    ("LoadBinOpTest.template",        new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskLoad",                    ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                 ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "result[0] != (((right[0] & (1UL << 63)) != 0) ? left[0] : 0)",                             ["ValidateRemainingResults"] = "result[i] != (((right[i] & (1UL << 63)) != 0) ? left[i] : 0)"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt32()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt32()",  ["ValidateFirstResult"] = "result[0] != ((left[0] < 0) ? right[0] : result[0])",                                      ["ValidateRemainingResults"] = "result[i] != ((left[i] < 0) ? right[i] : result[i])"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt32", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt32",                 ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt32()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt32()", ["ValidateFirstResult"] = "result[0] != (((left[0] & (1U << 31)) != 0) ? right[0] : result[0])",                      ["ValidateRemainingResults"] = "result[i] != (((left[i] & (1U << 31)) != 0) ? right[i] : result[i])"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",  ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "Int64",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int64",                  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetInt64()",  ["NextValueOp2"] = "TestLibrary.Generator.GetInt64()",  ["ValidateFirstResult"] = "result[0] != ((left[0] < 0) ? right[0] : result[0])",                                      ["ValidateRemainingResults"] = "result[i] != ((left[i] < 0) ? right[i] : result[i])"}),
+    ("StoreBinOpTest.template",       new Dictionary<string, string> { ["Isa"] = "Avx2", ["LoadIsa"] = "Sse2", ["Method"] = "MaskStore",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64", ["Op1VectorType"] = "Vector128", ["Op1BaseType"] = "UInt64", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt64",                 ["LargestVectorSize"] = "16", ["NextValueOp1"] = "TestLibrary.Generator.GetUInt64()", ["NextValueOp2"] = "TestLibrary.Generator.GetUInt64()", ["ValidateFirstResult"] = "result[0] != (((left[0] & (1UL << 63))  != 0) ? right[0] : result[0])",                    ["ValidateRemainingResults"] = "result[i] != (((left[i] & (1UL << 63)) != 0) ? right[i] : result[i])"}),
+
 };
 
 private static readonly (string templateFileName, Dictionary<string, string> templateData)[] Fma_Vector128Inputs = new []
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Shared/LoadBinOpTest.template b/tests/src/JIT/HardwareIntrinsics/X86/Shared/LoadBinOpTest.template
new file mode 100644 (file)
index 0000000..48ec635
--- /dev/null
@@ -0,0 +1,400 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Reflection;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void {Method}{RetBaseType}()
+        {
+            var test = new SimpleBinaryOpTest__{Method}{RetBaseType}();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if ({LoadIsa}.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if ({LoadIsa}.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if ({LoadIsa}.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__{Method}{RetBaseType}
+    {
+        private struct TestStruct
+        {
+            public {Op2VectorType}<{Op2BaseType}> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
+
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref testStruct._fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleBinaryOpTest__{Method}{RetBaseType} testClass)
+            {
+                var result = {Isa}.{Method}(({RetBaseType}*)testClass._dataTable.inArray1Ptr, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(testClass._dataTable.inArray1Ptr, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = {LargestVectorSize};
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>() / sizeof({Op1BaseType});
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>() / sizeof({Op2BaseType});
+        private static readonly int RetElementCount = Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>() / sizeof({RetBaseType});
+
+        private static {Op1BaseType}[] _data1 = new {Op1BaseType}[Op1ElementCount];
+        private static {Op2BaseType}[] _data2 = new {Op2BaseType}[Op2ElementCount];
+
+        private static {Op2VectorType}<{Op2BaseType}> _clsVar2;
+
+        private {Op2VectorType}<{Op2BaseType}> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}> _dataTable;
+
+        static SimpleBinaryOpTest__{Method}{RetBaseType}()
+        {
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
+
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _clsVar2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>());
+        }
+
+        public SimpleBinaryOpTest__{Method}{RetBaseType}()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2VectorType}<{Op2BaseType}>, byte>(ref _fld2), ref Unsafe.As<{Op2BaseType}, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = {NextValueOp1}; }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = {NextValueOp2}; }
+            _dataTable = new SimpleBinaryOpTest__DataTable<{RetBaseType}, {Op1BaseType}, {Op2BaseType}>(_data1, _data2, new {RetBaseType}[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => {Isa}.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = {Isa}.{Method}(
+                ({RetBaseType}*)_dataTable.inArray1Ptr,
+                Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = {Isa}.{Method}(
+                ({RetBaseType}*)_dataTable.inArray1Ptr,
+                {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = {Isa}.{Method}(
+                ({RetBaseType}*)_dataTable.inArray1Ptr,
+                {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({RetBaseType}*), typeof({Op2VectorType}<{Op2BaseType}>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof({RetBaseType}*)),
+                                        Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({RetBaseType}*), typeof({Op2VectorType}<{Op2BaseType}>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof({RetBaseType}*)),
+                                        {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof({Isa}).GetMethod(nameof({Isa}.{Method}), new Type[] { typeof({RetBaseType}*), typeof({Op2VectorType}<{Op2BaseType}>) })
+                                     .Invoke(null, new object[] {
+                                        Pointer.Box(_dataTable.inArray1Ptr, typeof({RetBaseType}*)),
+                                        {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, ({RetVectorType}<{RetBaseType}>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = {Isa}.{Method}(
+                ({RetBaseType}*)_dataTable.inArray1Ptr,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var left = ({RetBaseType}*)_dataTable.inArray1Ptr;
+            var right = Unsafe.Read<{Op2VectorType}<{Op2BaseType}>>(_dataTable.inArray2Ptr);
+            var result = {Isa}.{Method}(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var left = ({RetBaseType}*)_dataTable.inArray1Ptr;
+            var right = {LoadIsa}.Load{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr));
+            var result = {Isa}.{Method}(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var left = ({RetBaseType}*)_dataTable.inArray1Ptr;
+            var right = {LoadIsa}.LoadAligned{Op2VectorType}(({Op2BaseType}*)(_dataTable.inArray2Ptr));
+            var result = {Isa}.{Method}(left, right);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleBinaryOpTest__{Method}{RetBaseType}();
+            var result = {Isa}.{Method}(({RetBaseType}*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = {Isa}.{Method}(({RetBaseType}*)_dataTable.inArray1Ptr, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = {Isa}.{Method}(({RetBaseType}*)_dataTable.inArray1Ptr, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(void* left, {Op2VectorType}<{Op2BaseType}> right, void* result, [CallerMemberName] string method = "")
+        {
+            {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount];
+            {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount];
+            {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+            Unsafe.WriteUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            {Op1BaseType}[] inArray1 = new {Op1BaseType}[Op1ElementCount];
+            {Op2BaseType}[] inArray2 = new {Op2BaseType}[Op2ElementCount];
+            {RetBaseType}[] outArray = new {RetBaseType}[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op1BaseType}, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<{Op1VectorType}<{Op1BaseType}>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{Op2BaseType}, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<{Op2VectorType}<{Op2BaseType}>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<{RetBaseType}, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<{RetVectorType}<{RetBaseType}>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult({Op1BaseType}[] left, {Op2BaseType}[] right, {RetBaseType}[] result, [CallerMemberName] string method = "")
+        {
+            if ({ValidateFirstResult})
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if ({ValidateRemainingResults})
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof({Isa})}.{nameof({Isa}.{Method})}<{RetBaseType}>({Op1BaseType}*, {Op2VectorType}<{Op2BaseType}>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"    left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"   right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+            }
+        }
+    }
+}