Fixing up the Sse41.Insert float HWIntrinsics
authorTanner Gooding <tagoo@outlook.com>
Sat, 30 Jun 2018 21:58:14 +0000 (14:58 -0700)
committerTanner Gooding <tagoo@outlook.com>
Mon, 2 Jul 2018 19:24:37 +0000 (12:24 -0700)
27 files changed:
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/lowerxarch.cpp
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.Byte.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.Int16.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.Int32.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.Int64.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.SByte.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.UInt16.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.UInt32.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Avx2/InsertVector128.UInt64.1.cs
tests/src/JIT/HardwareIntrinsics/X86/Shared/GenerateTests.csx
tests/src/JIT/HardwareIntrinsics/X86/Shared/InsertVector128Test.template
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.0.cs
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.1.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.128.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.129.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.16.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.192.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.2.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.32.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.4.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.48.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.64.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.8.cs [new file with mode: 0644]
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Program.Sse41.cs
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Sse41_r.csproj
tests/src/JIT/HardwareIntrinsics/X86/Sse41/Sse41_ro.csproj

index 0107273c0cd1dc356e44cce02c529e1f180bfc64..db493a9407f1ea26caa94aed6e9d4286ae27ec97 100644 (file)
@@ -239,27 +239,10 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                     {
                         ssize_t ival = op3->AsIntCon()->IconValue();
                         assert((ival >= 0) && (ival <= 255));
-
-                        if ((intrinsicId == NI_SSE41_Insert) && (baseType == TYP_FLOAT))
-                        {
-                            // Bits 6 and 7 impact the index that is selected from op2
-                            // when op2 is already in register. However, our API exposes
-                            // op2 as a scalar and so bits 6 and 7 must be set to 0.
-                            ival &= 0x3F;
-                        }
-
                         emitSwCase((int8_t)ival);
                     }
                     else
                     {
-                        if ((intrinsicId == NI_SSE41_Insert) && (baseType == TYP_FLOAT))
-                        {
-                            // Bits 6 and 7 impact the index that is selected from op2
-                            // when op2 is already in register. However, our API exposes
-                            // op2 as a scalar and so bits 6 and 7 must be set to 0.
-                            emit->emitIns_R_I(INS_and, EA_1BYTE, op3Reg, 0x3F);
-                        }
-
                         // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                         // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                         // can also occur if the consumer calls it directly and just doesn't pass a constant value.
index c912a2d7287ea28e9eb013ace160947403fbc4d6..4787e45d743c14484babc4d7c5c701ffa015ac18 100644 (file)
@@ -2401,8 +2401,69 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
                     break;
                 }
 
-                case NI_SSE2_Insert:
                 case NI_SSE41_Insert:
+                {
+                    if (containingNode->gtSIMDBaseType == TYP_FLOAT)
+                    {
+                        assert(supportsSIMDScalarLoads == false);
+
+                        GenTree* op1 = containingNode->gtGetOp1();
+                        GenTree* op2 = containingNode->gtGetOp2();
+                        GenTree* op3 = nullptr;
+
+                        assert(op1->OperIsList());
+                        assert(containingNode->gtGetOp2() == nullptr);
+
+                        GenTreeArgList* argList = op1->AsArgList();
+
+                        op1     = argList->Current();
+                        argList = argList->Rest();
+
+                        op2     = argList->Current();
+                        argList = argList->Rest();
+
+                        assert(node == op2);
+
+                        op3 = argList->Current();
+
+                        // The upper two bits of the immediate value are ignored if
+                        // op2 comes from memory. In order to support using the upper
+                        // bits, we need to disable containment support if op3 is not
+                        // constant or if the constant is greater than 0x3F (which means
+                        // at least one of the upper two bits is set).
+
+                        if (op3->IsCnsIntOrI())
+                        {
+                            ssize_t ival = op3->AsIntCon()->IconValue();
+                            assert((ival >= 0) && (ival <= 255));
+
+                            if (ival <= 0x3F)
+                            {
+                                supportsAlignedSIMDLoads   = !comp->canUseVexEncoding();
+                                supportsUnalignedSIMDLoads = !supportsAlignedSIMDLoads;
+                                supportsGeneralLoads       = supportsUnalignedSIMDLoads;
+
+                                break;
+                            }
+                        }
+
+                        assert(supportsAlignedSIMDLoads == false);
+                        assert(supportsUnalignedSIMDLoads == false);
+                        assert(supportsGeneralLoads == false);
+                    }
+                    else
+                    {
+                        assert(supportsAlignedSIMDLoads == false);
+                        assert(supportsUnalignedSIMDLoads == false);
+
+                        supportsSIMDScalarLoads = true;
+                        supportsGeneralLoads    = supportsSIMDScalarLoads;
+                    }
+
+                    break;
+                }
+
+                case NI_SSE2_Insert:
                 case NI_AVX_CompareScalar:
                 {
                     assert(supportsAlignedSIMDLoads == false);
index 71ff744478c844a3e2a162ba9c486bb62cf2ccef..4fe983215fa1340f0890be4ded140c477a81d69d 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index 226d8471dab4389e006d04477d930e90f351bff7..bbb1906849586594e50863f70bc092e31da0274d 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index ddf47a583861de3231f74e8b821f370fe6f6ba8f..b50d7a8e22425b66658777f6dacab52ceda5fdf6 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index e8cdf0f40a8759e0de340bf2fbcc95b7fdb745c3..d5629d532243230dbbfbcc4cdfd848b5d8317da2 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index 94c92ee0d6580cd7897207cba0f0c5c2c5de00a8..449acbf05de33d46af3cc83d917deb6881ca0aa8 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index 57f1d2da54942f0cf33212683cc9770cf165f2eb..1c2c0b3247e2f9446947dcaffaf83481cca7c438 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index 244d788ca891b4163171a1dff165f34e8b1833d3..93778dd8935c5700c6172f44fb9ac5836289e120 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index 76ac558da50b5462efed195db0238d97338e5b0e..38cb48a34c20422a05faf81d55fa46144831ea3c 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index 13c621f0f1233246a48415f6fe452de298626a4b..81bdf1f64e1b360b1ba1378298dbee32d23dec6c 100644 (file)
@@ -297,18 +297,30 @@ private static readonly (string templateFileName, Dictionary<string, string> tem
     ("SimpleUnOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Floor",                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                                                                   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Floor(firstOp[0]))",                                                                                                                                               ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(MathF.Floor(firstOp[i]))"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "FloorScalar",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double",                                                                                                      ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(double)(random.NextDouble())",                        ["NextValueOp2"] = "(double)(random.NextDouble())",                                                                                    ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Floor(right[0]))",                                                                                                                                                  ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "FloorScalar",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                                      ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Floor(right[0]))",                                                                                                                                                 ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "0",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0])",                                                                                                                                                              ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte",    ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["Data"] = "(byte)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(byte)(random.Next(0, byte.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["Data"] = "(sbyte)2", ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)(random.Next(0, sbyte.MaxValue))",                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["Data"] = "(int)2",   ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)(random.Next(0, int.MaxValue))",                                                                                                                                                         ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["Data"] = "(uint)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(uint)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["Data"] = "(long)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(long)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["Data"] = "(ulong)2", ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ulong)(random.Next(0, int.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(0.0f)",                                                                                                                                                                  ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "2",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0])",                                                                                                                                                              ["ValidateRemainingResults"] = "i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "4",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0])",                                                                                                                                                              ["ValidateRemainingResults"] = "i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "8",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0])",                                                                                                                                                              ["ValidateRemainingResults"] = "i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "16",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(left[0])",                                                                                                                                                               ["ValidateRemainingResults"] = "i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(right[0]) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "32",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(left[0])",                                                                                                                                                               ["ValidateRemainingResults"] = "i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(right[0]) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "48",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(left[0])",                                                                                                                                                               ["ValidateRemainingResults"] = "i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(right[0]) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "64",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[1])",                                                                                                                                                              ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "128", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[2])",                                                                                                                                                              ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte",    ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["Data"] = "(byte)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(byte)(random.Next(0, byte.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["Data"] = "(sbyte)2", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)(random.Next(0, sbyte.MaxValue))",                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["Data"] = "(int)2",   ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)(random.Next(0, int.MaxValue))",                                                                                                                                                         ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["Data"] = "(uint)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(uint)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["Data"] = "(long)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(long)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
     ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["Data"] = "(ulong)2", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ulong)(random.Next(0, int.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(0.0f)",                                                                                                                                                                  ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
+    ("InsertVector128Test.template", new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                     ["Imm"] = "192", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[3])",                                                                                                                                                              ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Max",                           ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                                                                       ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)(random.Next(int.MinValue, int.MaxValue))",       ["NextValueOp2"] = "(int)(random.Next(int.MinValue, int.MaxValue))",                                                                   ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                                                                                                                                                                           ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Max",                           ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                                                                       ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)(random.Next(sbyte.MinValue, sbyte.MaxValue))", ["NextValueOp2"] = "(sbyte)(random.Next(sbyte.MinValue, sbyte.MaxValue))",                                                             ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                                                                                                                                                                           ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Max",                           ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                                                                      ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ushort)(random.Next(0, ushort.MaxValue))",            ["NextValueOp2"] = "(ushort)(random.Next(0, ushort.MaxValue))",                                                                        ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                                                                                                                                                                           ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
index e95bb2b8d77ec928c818737180d25970755bfb2a..401980f9a2f6037a52fbe04f5451b244a1a09b02 100644 (file)
@@ -14,6 +14,7 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
 using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
index 5ee20531daf891d6a00847add75be3113bf393b5..19a9902c35d9f0a3aaa98ca2dd87e927a96d8506 100644 (file)
@@ -14,6 +14,8 @@ using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Runtime.Intrinsics;
 using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
 
 namespace JIT.HardwareIntrinsics.X86
 {
@@ -21,10 +23,8 @@ namespace JIT.HardwareIntrinsics.X86
     {
         private static void InsertSingle0()
         {
-            var test = new SimpleUnaryOpTest__InsertSingle0();
-            
-            try
-            {
+            var test = new SimpleBinaryOpTest__InsertSingle0();
+
             if (test.IsSupported)
             {
                 // Validates basic functionality works, using Unsafe.Read
@@ -77,11 +77,6 @@ namespace JIT.HardwareIntrinsics.X86
                 // Validates we throw on unsupported hardware
                 test.RunUnsupportedScenario();
             }
-            }
-            catch (PlatformNotSupportedException)
-            {
-                test.Succeeded = true;
-            }
 
             if (!test.Succeeded)
             {
@@ -90,40 +85,49 @@ namespace JIT.HardwareIntrinsics.X86
         }
     }
 
-    public sealed unsafe class SimpleUnaryOpTest__InsertSingle0
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle0
     {
         private static readonly int LargestVectorSize = 16;
 
         private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
         private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
 
-        private static Single[] _data = new Single[Op1ElementCount];
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
 
-        private static Vector128<Single> _clsVar;
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
 
-        private Vector128<Single> _fld;
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
 
-        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
 
-        static SimpleUnaryOpTest__InsertSingle0()
+        static SimpleBinaryOpTest__InsertSingle0()
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
-            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
         }
 
-        public SimpleUnaryOpTest__InsertSingle0()
+        public SimpleBinaryOpTest__InsertSingle0()
         {
             Succeeded = true;
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
-            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
-            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
         }
 
         public bool IsSupported => Sse41.IsSupported;
@@ -133,132 +137,135 @@ namespace JIT.HardwareIntrinsics.X86
         public void RunBasicScenario_UnsafeRead()
         {
             var result = Sse41.Insert(
-                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
-                (float)2,
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
                 0
             );
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
         }
 
         public void RunBasicScenario_Load()
         {
             var result = Sse41.Insert(
-                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
-                (float)2,
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
                 0
             );
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
         }
 
         public void RunBasicScenario_LoadAligned()
         {
             var result = Sse41.Insert(
-                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
-                (float)2,
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
                 0
             );
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
         }
 
         public void RunReflectionScenario_UnsafeRead()
         {
-            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
                                      .Invoke(null, new object[] {
-                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
-                                        (float)2,
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
                                         (byte)0
                                      });
 
             Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
-            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
         }
 
         public void RunReflectionScenario_Load()
         {
-            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
                                      .Invoke(null, new object[] {
-                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
-                                        (float)2,
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
                                         (byte)0
                                      });
 
             Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
-            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
         }
 
         public void RunReflectionScenario_LoadAligned()
         {
-            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
                                      .Invoke(null, new object[] {
-                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
-                                        (float)2,
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
                                         (byte)0
                                      });
 
             Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
-            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
         }
 
         public void RunClsVarScenario()
         {
             var result = Sse41.Insert(
-                _clsVar,
-                (float)2,
+                _clsVar1,
+                _clsVar2,
                 0
             );
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
         }
 
         public void RunLclVarScenario_UnsafeRead()
         {
-            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
-            var result = Sse41.Insert(firstOp, (float)2, 0);
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 0);
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(firstOp, _dataTable.outArrayPtr);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
         }
 
         public void RunLclVarScenario_Load()
         {
-            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
-            var result = Sse41.Insert(firstOp, (float)2, 0);
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 0);
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(firstOp, _dataTable.outArrayPtr);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
         }
 
         public void RunLclVarScenario_LoadAligned()
         {
-            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
-            var result = Sse41.Insert(firstOp, (float)2, 0);
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 0);
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(firstOp, _dataTable.outArrayPtr);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
         }
 
         public void RunLclFldScenario()
         {
-            var test = new SimpleUnaryOpTest__InsertSingle0();
-            var result = Sse41.Insert(test._fld, (float)2, 0);
+            var test = new SimpleBinaryOpTest__InsertSingle0();
+            var result = Sse41.Insert(test._fld1, test._fld2, 0);
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(test._fld, _dataTable.outArrayPtr);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
         }
 
         public void RunFldScenario()
         {
-            var result = Sse41.Insert(_fld, (float)2, 0);
+            var result = Sse41.Insert(_fld1, _fld2, 0);
 
             Unsafe.Write(_dataTable.outArrayPtr, result);
-            ValidateResult(_fld, _dataTable.outArrayPtr);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
         }
 
         public void RunUnsupportedScenario()
@@ -275,45 +282,56 @@ namespace JIT.HardwareIntrinsics.X86
             }
         }
 
-        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
         {
-            Single[] inArray = new Single[Op1ElementCount];
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
             Single[] outArray = new Single[RetElementCount];
 
-            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
 
-            ValidateResult(inArray, outArray, method);
+            ValidateResult(inArray1, inArray2, outArray, method);
         }
 
-        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
         {
-            Single[] inArray = new Single[Op1ElementCount];
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
             Single[] outArray = new Single[RetElementCount];
 
-            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
 
-            ValidateResult(inArray, outArray, method);
+            ValidateResult(inArray1, inArray2, outArray, method);
         }
 
-        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
         {
-
-            for (var i = 0; i < RetElementCount; i++)
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0]))
+            {
+                Succeeded = false;
+            }
+            else
             {
-                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                for (var i = 1; i < RetElementCount; i++)
                 {
-                    Succeeded = false;
-                    break;
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
                 }
             }
 
             if (!Succeeded)
             {
-                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
-                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
-                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.0): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
                 Console.WriteLine();
             }
         }
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.1.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.1.cs
new file mode 100644 (file)
index 0000000..ba6b5a0
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle1()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle1();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle1
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle1()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle1()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)1
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)1
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)1
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle1();
+            var result = Sse41.Insert(test._fld1, test._fld2, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(0.0f))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.1): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.128.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.128.cs
new file mode 100644 (file)
index 0000000..c736e7b
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle128()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle128();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle128
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle128()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle128()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)128
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)128
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)128
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle128();
+            var result = Sse41.Insert(test._fld1, test._fld2, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[2]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.128): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.129.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.129.cs
new file mode 100644 (file)
index 0000000..07e25f3
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle129()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle129();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle129
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle129()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle129()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)129
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)129
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)129
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle129();
+            var result = Sse41.Insert(test._fld1, test._fld2, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(0.0f))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.129): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.16.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.16.cs
new file mode 100644 (file)
index 0000000..07d80ab
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle16()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle16();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle16
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle16()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle16()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)16
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)16
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)16
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle16();
+            var result = Sse41.Insert(test._fld1, test._fld2, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(left[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(right[0]) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.16): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.192.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.192.cs
new file mode 100644 (file)
index 0000000..c5f7eb9
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle192()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle192();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle192
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle192()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle192()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)192
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)192
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)192
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle192();
+            var result = Sse41.Insert(test._fld1, test._fld2, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[3]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.192): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.2.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.2.cs
new file mode 100644 (file)
index 0000000..da4ff5f
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle2()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle2();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle2
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle2()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle2()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)2
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)2
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)2
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle2();
+            var result = Sse41.Insert(test._fld1, test._fld2, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.2): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.32.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.32.cs
new file mode 100644 (file)
index 0000000..3aaf3ea
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle32()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle32();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle32
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle32()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle32()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)32
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)32
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)32
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle32();
+            var result = Sse41.Insert(test._fld1, test._fld2, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(left[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(right[0]) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.32): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.4.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.4.cs
new file mode 100644 (file)
index 0000000..c961127
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle4()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle4();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle4
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle4()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle4()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)4
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)4
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)4
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle4();
+            var result = Sse41.Insert(test._fld1, test._fld2, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.4): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.48.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.48.cs
new file mode 100644 (file)
index 0000000..9651430
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle48()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle48();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle48
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle48()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle48()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)48
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)48
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)48
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle48();
+            var result = Sse41.Insert(test._fld1, test._fld2, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(left[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(right[0]) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.48): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.64.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.64.cs
new file mode 100644 (file)
index 0000000..d47c8b0
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle64()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle64();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle64
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle64()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle64()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)64
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)64
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)64
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle64();
+            var result = Sse41.Insert(test._fld1, test._fld2, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[1]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.64): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.8.cs b/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.8.cs
new file mode 100644 (file)
index 0000000..ae2f080
--- /dev/null
@@ -0,0 +1,339 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using static System.Runtime.Intrinsics.X86.Sse;
+using static System.Runtime.Intrinsics.X86.Sse2;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle8()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle8();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleBinaryOpTest__InsertSingle8
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data1 = new Single[Op1ElementCount];
+        private static Single[] _data2 = new Single[Op2ElementCount];
+
+        private static Vector128<Single> _clsVar1;
+        private static Vector128<Single> _clsVar2;
+
+        private Vector128<Single> _fld1;
+        private Vector128<Single> _fld2;
+
+        private SimpleBinaryOpTest__DataTable<Single, Single, Single> _dataTable;
+
+        static SimpleBinaryOpTest__InsertSingle8()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleBinaryOpTest__InsertSingle8()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld1), ref Unsafe.As<Single, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld2), ref Unsafe.As<Single, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = (float)(random.NextDouble()); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleBinaryOpTest__DataTable<Single, Single, Single>(_data1, _data2, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr),
+                                        (byte)8
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)8
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Vector128<Single>), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr)),
+                                        LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr)),
+                                        (byte)8
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar1,
+                _clsVar2,
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var left = Unsafe.Read<Vector128<Single>>(_dataTable.inArray1Ptr);
+            var right = Unsafe.Read<Vector128<Single>>(_dataTable.inArray2Ptr);
+            var result = Sse41.Insert(left, right, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var left = Sse.LoadVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var left = Sse.LoadAlignedVector128((Single*)(_dataTable.inArray1Ptr));
+            var right = LoadAlignedVector128((Single*)(_dataTable.inArray2Ptr));
+            var result = Sse41.Insert(left, right, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(left, right, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleBinaryOpTest__InsertSingle8();
+            var result = Sse41.Insert(test._fld1, test._fld2, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld1, _fld2, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> left, Vector128<Single> right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray1 = new Single[Op1ElementCount];
+            Single[] inArray2 = new Single[Op2ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Single[] left, Single[] right, Single[] result, [CallerMemberName] string method = "")
+        {
+            if (BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(right[0]))
+            {
+                Succeeded = false;
+            }
+            else
+            {
+                for (var i = 1; i < RetElementCount; i++)
+                {
+                    if (i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i]))
+                    {
+                        Succeeded = false;
+                        break;
+                    }
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single>, Vector128<Single>.8): {method} failed:");
+                Console.WriteLine($"    left: ({string.Join(", ", left)})");
+                Console.WriteLine($"   right: ({string.Join(", ", right)})");
+                Console.WriteLine($"  result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
index 92fd2acd8c7df7ff0551424825928dd8ab03fccc..542e9364bf587d6717276533e50919ebd55da8fd 100644 (file)
@@ -38,18 +38,30 @@ namespace JIT.HardwareIntrinsics.X86
                 ["Floor.Single"] = FloorSingle,
                 ["FloorScalar.Double"] = FloorScalarDouble,
                 ["FloorScalar.Single"] = FloorScalarSingle,
+                ["Insert.Single.0"] = InsertSingle0,
                 ["Insert.Byte.1"] = InsertByte1,
                 ["Insert.SByte.1"] = InsertSByte1,
                 ["Insert.Int32.1"] = InsertInt321,
                 ["Insert.UInt32.1"] = InsertUInt321,
                 ["Insert.Int64.1"] = InsertInt641,
                 ["Insert.UInt64.1"] = InsertUInt641,
+                ["Insert.Single.1"] = InsertSingle1,
+                ["Insert.Single.2"] = InsertSingle2,
+                ["Insert.Single.4"] = InsertSingle4,
+                ["Insert.Single.8"] = InsertSingle8,
+                ["Insert.Single.16"] = InsertSingle16,
+                ["Insert.Single.32"] = InsertSingle32,
+                ["Insert.Single.48"] = InsertSingle48,
+                ["Insert.Single.64"] = InsertSingle64,
+                ["Insert.Single.128"] = InsertSingle128,
                 ["Insert.Byte.129"] = InsertByte129,
                 ["Insert.SByte.129"] = InsertSByte129,
                 ["Insert.Int32.129"] = InsertInt32129,
                 ["Insert.UInt32.129"] = InsertUInt32129,
                 ["Insert.Int64.129"] = InsertInt64129,
                 ["Insert.UInt64.129"] = InsertUInt64129,
+                ["Insert.Single.129"] = InsertSingle129,
+                ["Insert.Single.192"] = InsertSingle192,
                 ["Max.Int32"] = MaxInt32,
                 ["Max.SByte"] = MaxSByte,
                 ["Max.UInt16"] = MaxUInt16,
index b6d3523a2176efb41cc03b16aedcefa48a5abcf7..69dde3a3ad79eff96c7d9cec81cef21cc5acb1bb 100644 (file)
     <Compile Include="Extract.Int64.129.cs" />
     <Compile Include="Extract.UInt64.129.cs" />
     <Compile Include="Extract.Single.129.cs" />
+    <Compile Include="Insert.Single.0.cs" />
     <Compile Include="Insert.Byte.1.cs" />
     <Compile Include="Insert.SByte.1.cs" />
     <Compile Include="Insert.Int32.1.cs" />
     <Compile Include="Insert.UInt32.1.cs" />
     <Compile Include="Insert.Int64.1.cs" />
     <Compile Include="Insert.UInt64.1.cs" />
+    <Compile Include="Insert.Single.1.cs" />   
+    <Compile Include="Insert.Single.2.cs" />   
+    <Compile Include="Insert.Single.4.cs" />   
+    <Compile Include="Insert.Single.8.cs" />   
+    <Compile Include="Insert.Single.16.cs" />  
+    <Compile Include="Insert.Single.32.cs" />  
+    <Compile Include="Insert.Single.48.cs" />  
+    <Compile Include="Insert.Single.64.cs" />  
+    <Compile Include="Insert.Single.128.cs" />
     <Compile Include="Insert.Byte.129.cs" />
     <Compile Include="Insert.SByte.129.cs" />
     <Compile Include="Insert.Int32.129.cs" />
     <Compile Include="Insert.UInt32.129.cs" />
     <Compile Include="Insert.Int64.129.cs" />
     <Compile Include="Insert.UInt64.129.cs" />
+    <Compile Include="Insert.Single.129.cs" /> 
+    <Compile Include="Insert.Single.192.cs" />
     <Compile Include="Program.Sse41.cs" />
     <Compile Include="..\Shared\BooleanUnOpTest_DataTable.cs" />
     <Compile Include="..\Shared\BooleanBinOpTest_DataTable.cs" />
index 424f767169d2479b757aa75f160f61fe854ff135..a2957963c4a6ff1bec59b17876710c0525a66f41 100644 (file)
     <Compile Include="Extract.Int64.129.cs" />
     <Compile Include="Extract.UInt64.129.cs" />
     <Compile Include="Extract.Single.129.cs" />
+    <Compile Include="Insert.Single.0.cs" />
     <Compile Include="Insert.Byte.1.cs" />
     <Compile Include="Insert.SByte.1.cs" />
     <Compile Include="Insert.Int32.1.cs" />
     <Compile Include="Insert.UInt32.1.cs" />
     <Compile Include="Insert.Int64.1.cs" />
     <Compile Include="Insert.UInt64.1.cs" />
+    <Compile Include="Insert.Single.1.cs" />   
+    <Compile Include="Insert.Single.2.cs" />   
+    <Compile Include="Insert.Single.4.cs" />   
+    <Compile Include="Insert.Single.8.cs" />   
+    <Compile Include="Insert.Single.16.cs" />  
+    <Compile Include="Insert.Single.32.cs" />  
+    <Compile Include="Insert.Single.48.cs" />  
+    <Compile Include="Insert.Single.64.cs" />  
+    <Compile Include="Insert.Single.128.cs" />
     <Compile Include="Insert.Byte.129.cs" />
     <Compile Include="Insert.SByte.129.cs" />
     <Compile Include="Insert.Int32.129.cs" />
     <Compile Include="Insert.UInt32.129.cs" />
     <Compile Include="Insert.Int64.129.cs" />
     <Compile Include="Insert.UInt64.129.cs" />
+    <Compile Include="Insert.Single.129.cs" /> 
+    <Compile Include="Insert.Single.192.cs" />
     <Compile Include="Program.Sse41.cs" />
     <Compile Include="..\Shared\BooleanUnOpTest_DataTable.cs" />
     <Compile Include="..\Shared\BooleanBinOpTest_DataTable.cs" />