Adding containment support for more x86 hardware intrinsics (dotnet/coreclr#18297)
authorTanner Gooding <tagoo@outlook.com>
Thu, 7 Jun 2018 01:24:04 +0000 (18:24 -0700)
committerGitHub <noreply@github.com>
Thu, 7 Jun 2018 01:24:04 +0000 (18:24 -0700)
* Adding containment support to one-operand scalar HWIntrinsics (x86)

* Adding containment support to two-operand imm HWIntrinsics (x86)

* Adding containment support to three-operand imm HWIntrinsics (x86)

* Updating hwintrinsiccodegenxarch to properly mask Sse41.Insert for TYP_FLOAT

* Updating the Sse41.Insert tests for TYP_FLOAT

* Adding containment support for Sse2.CompareLessThan and BlendVariable (Sse41/Avx/Avx2)

* Fixing `genHWIntrinsic_R_RM_I` to call `emitIns_SIMD_R_R_I`, rather than `emitIns_R_R_I`

* Updating emitOutputSV to not modify the code for IF_RWR_RRD_SRD_CNS

* Cleaning up some of the emitxarch code.

* Moving roundps and roundpd into the IsDstSrcImm check

Commit migrated from https://github.com/dotnet/coreclr/commit/f7993ddfc19ccce311df69b32f675bd076935e7f

34 files changed:
src/coreclr/src/jit/codegenlinear.h
src/coreclr/src/jit/emitfmtsxarch.h
src/coreclr/src/jit/emitxarch.cpp
src/coreclr/src/jit/emitxarch.h
src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/src/jit/lowerxarch.cpp
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Shared/GenerateTests.csx
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Byte.1.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Byte.129.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Int32.1.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Int32.129.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Int64.1.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Int64.129.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.SByte.1.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.SByte.129.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.0.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.1.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.128.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.129.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.16.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.192.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.2.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.32.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.4.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.48.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.64.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.8.cs [new file with mode: 0644]
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.UInt32.1.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.UInt32.129.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.UInt64.1.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.UInt64.129.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Program.Sse41.cs
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Sse41_r.csproj
src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Sse41_ro.csproj

index 195368631005da3496145e839d4da2941ea4dd0e..5a8df9c071f0266c15b385afb8c141e083dd4fc2 100644 (file)
@@ -116,9 +116,10 @@ void genPutArgStkSIMD12(GenTree* treeNode);
 void genHWIntrinsic(GenTreeHWIntrinsic* node);
 #if defined(_TARGET_XARCH_)
 void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
-void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins);
+void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival);
 void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
-void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins);
+void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival);
+void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins);
 void genHWIntrinsic_R_R_R_RM(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTree* op3);
 void genSSEIntrinsic(GenTreeHWIntrinsic* node);
index a2814f86ce3b0b103fe03b4833f258a1646e388c..84bc0f8aec6086767351f1cd81792a5ed2ba86ea 100644 (file)
@@ -129,6 +129,7 @@ IF_DEF(RRW_MRD_CNS, IS_GM_RD|IS_R1_RW,          DSP_CNS)  // r/w    reg , read [
 IF_DEF(RWR_RRD_MRD, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP)      // write  reg , read reg2 , read [mem]
 IF_DEF(RWR_MRD_CNS, IS_GM_RD|IS_R1_WR,          DSP_CNS)  // write  reg , read [mem], const
 IF_DEF(RWR_RRD_MRD_CNS, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP_CNS) // write  reg , read reg2 , read [mem], const
+IF_DEF(RWR_RRD_MRD_RRD, IS_GM_RD|IS_R1_WR|IS_R2_RD|IS_R3_RD, DSP_CNS) // write  reg , read reg2 , read [mem], read reg3
 IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR,          DSP)      // write  reg , offset mem
 
 IF_DEF(MRD_RRD,     IS_GM_RD|IS_R1_RD,          DSP)      // read  [mem], read  reg
@@ -159,6 +160,7 @@ IF_DEF(RRW_SRD_CNS, IS_SF_RD|IS_R1_RW,          CNS )     // r/w    reg , read [
 IF_DEF(RWR_RRD_SRD, IS_SF_RD|IS_R1_WR|IS_R2_RD, NONE)     // write  reg , read  reg2, read [stk]
 IF_DEF(RWR_SRD_CNS, IS_SF_RD|IS_R1_WR,          CNS )     // write  reg , read [stk], const
 IF_DEF(RWR_RRD_SRD_CNS, IS_SF_RD|IS_R1_WR|IS_R2_RD, CNS ) // write  reg , read  reg2, read [stk], const
+IF_DEF(RWR_RRD_SRD_RRD, IS_SF_RD|IS_R1_WR|IS_R2_RD|IS_R3_RD, CNS ) // write  reg , read  reg2, read [stk], read reg3
 
 IF_DEF(SRD_RRD,     IS_SF_RD|IS_R1_RD,          NONE)     // read  [stk], read  reg
 IF_DEF(SWR_RRD,     IS_SF_WR|IS_R1_RD,          NONE)     // write [stk], read  reg
@@ -187,6 +189,7 @@ IF_DEF(RRW_ARD_CNS, IS_AM_RD|IS_R1_RW,          AMD_CNS)  // r/w    reg , read [
 IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD )     // write  reg , read  reg2, read [adr]
 IF_DEF(RWR_ARD_CNS, IS_AM_RD|IS_R1_WR,          AMD_CNS)  // write  reg , read [adr], const
 IF_DEF(RWR_RRD_ARD_CNS, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD_CNS) // write  reg , read  reg2, read [adr], const
+IF_DEF(RWR_RRD_ARD_RRD, IS_AM_RD|IS_R1_WR|IS_R2_RD|IS_R3_RD, AMD_CNS) // write  reg , read  reg2, read [adr], read reg3
 
 IF_DEF(ARD_RRD,     IS_AM_RD|IS_R1_RD,          AMD )     // read  [adr], read  reg
 IF_DEF(AWR_RRD,     IS_AM_WR|IS_R1_RD,          AMD )     // write [adr], read  reg
index 4d2aeb818312964bdd88b253b5808eb2c80aa2e0..b3011fc9682ee2932b58884e88131e62336b192f 100644 (file)
@@ -391,6 +391,8 @@ static bool IsDstSrcImmAvxInstruction(instruction ins)
         case INS_pshufd:
         case INS_pshufhw:
         case INS_pshuflw:
+        case INS_roundpd:
+        case INS_roundps:
             return true;
         default:
             return false;
@@ -2022,11 +2024,20 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(code_t code, int var, int dsp)
     }
 }
 
-inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, int val)
+inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp)
+{
+    instruction    ins      = id->idIns();
+    emitAttr       attrSize = id->idOpSize();
+    UNATIVE_OFFSET prefix   = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
+    return prefix + emitInsSizeSV(code, var, dsp);
+}
+
+inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val)
 {
     instruction    ins       = id->idIns();
-    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(id->idOpSize());
-    UNATIVE_OFFSET prefix    = 0;
+    emitAttr       attrSize  = id->idOpSize();
+    UNATIVE_OFFSET valSize   = EA_SIZE_IN_BYTES(attrSize);
+    UNATIVE_OFFSET prefix    = emitGetVexPrefixAdjustedSize(ins, attrSize, code);
     bool           valInByte = ((signed char)val == val) && (ins != INS_mov) && (ins != INS_test);
 
 #ifdef _TARGET_AMD64_
@@ -2055,10 +2066,10 @@ inline UNATIVE_OFFSET emitter::emitInsSizeSV(instrDesc* id, int var, int dsp, in
     // This referes to 66h size prefix override
     if (id->idOpSize() == EA_2BYTE)
     {
-        prefix = 1;
+        prefix += 1;
     }
 
-    return prefix + valSize + emitInsSizeSV(insCodeMI(ins), var, dsp);
+    return prefix + valSize + emitInsSizeSV(code, var, dsp);
 }
 
 /*****************************************************************************/
@@ -2347,6 +2358,8 @@ inline UNATIVE_OFFSET emitter::emitInsSizeCV(instrDesc* id, code_t code)
     // so we should only hit this path for statics that are RIP-relative
     UNATIVE_OFFSET size = sizeof(INT32);
 
+    size += emitGetVexPrefixAdjustedSize(ins, id->idOpSize(), code);
+
     // Most 16-bit operand instructions will need a prefix.
     // This refers to 66h size prefix override.
 
@@ -2702,6 +2715,8 @@ emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
             return IF_RWR_RRD_MRD;
         case IF_RWR_RRD_ARD_CNS:
             return IF_RWR_RRD_MRD_CNS;
+        case IF_RWR_RRD_ARD_RRD:
+            return IF_RWR_RRD_MRD_RRD;
 
         case IF_ARD_RRD:
             return IF_MRD_RRD;
@@ -4061,8 +4076,7 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT
 
     emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
 
-    // Plus one for the 1-byte immediate (ival)
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
 
     if (Is4ByteSSE4Instruction(ins))
     {
@@ -4090,8 +4104,7 @@ void emitter::emitIns_R_AR_I(instruction ins, emitAttr attr, regNumber reg1, reg
     id->idAddr()->iiaAddrMode.amBaseReg = base;
     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
 
-    // Plus one for the 1-byte immediate (ival)
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
 
     if (Is4ByteSSE4Instruction(ins))
     {
@@ -4117,8 +4130,14 @@ void emitter::emitIns_R_C_I(
     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
     assert(IsSSEOrAVXInstruction(ins));
 
-    instrDesc*     id = emitNewInstrCnsDsp(attr, ival, offs);
-    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
+
+    id->idIns(ins);
+    id->idInsFmt(IF_RRW_MRD_CNS);
+    id->idReg1(reg1);
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
 
     if (Is4ByteSSE4Instruction(ins))
     {
@@ -4126,12 +4145,8 @@ void emitter::emitIns_R_C_I(
         sz += 2;
     }
 
-    id->idIns(ins);
-    id->idInsFmt(IF_RRW_MRD_CNS);
-    id->idReg1(reg1);
-    id->idAddr()->iiaFieldHnd = fldHnd;
-
     id->idCodeSize(sz);
+
     dispIns(id);
     emitCurIGsize += sz;
 }
@@ -4141,15 +4156,7 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int
     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
     assert(IsSSEOrAVXInstruction(ins));
 
-    instrDesc*     id = emitNewInstrCns(attr, ival);
-    UNATIVE_OFFSET sz =
-        emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
-
-    if (Is4ByteSSE4Instruction(ins))
-    {
-        // The 4-Byte SSE4 instructions require two additional bytes
-        sz += 2;
-    }
+    instrDesc* id = emitNewInstrCns(attr, ival);
 
     id->idIns(ins);
     id->idInsFmt(IF_RRW_SRD_CNS);
@@ -4160,6 +4167,14 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int
     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
 #endif
 
+    UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
+
+    if (Is4ByteSSE4Instruction(ins))
+    {
+        // The 4-Byte SSE4 instructions require two additional bytes
+        sz += 2;
+    }
+
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -4180,7 +4195,7 @@ void emitter::emitIns_R_R_A(instruction ins, emitAttr attr, regNumber reg1, regN
 
     emitHandleMemOp(indir, id, IF_RWR_RRD_ARD, ins);
 
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -4202,7 +4217,7 @@ void emitter::emitIns_R_R_AR(instruction ins, emitAttr attr, regNumber reg1, reg
     id->idAddr()->iiaAddrMode.amBaseReg = base;
     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
 
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins));
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -4222,7 +4237,7 @@ void emitter::emitIns_R_R_C(
     }
 
     instrDesc*     id = emitNewInstrDsp(attr, offs);
-    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
 
     id->idIns(ins);
     id->idInsFmt(IF_RWR_RRD_MRD);
@@ -4267,9 +4282,7 @@ void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regN
     assert(IsSSEOrAVXInstruction(ins));
     assert(IsThreeOperandAVXInstruction(ins));
 
-    instrDesc*     id = emitNewInstr(attr);
-    UNATIVE_OFFSET sz =
-        emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+    instrDesc* id = emitNewInstr(attr);
 
     id->idIns(ins);
     id->idInsFmt(IF_RWR_RRD_SRD);
@@ -4281,7 +4294,9 @@ void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regN
     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
 #endif
 
+    UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
     id->idCodeSize(sz);
+
     dispIns(id);
     emitCurIGsize += sz;
 }
@@ -4301,7 +4316,7 @@ void emitter::emitIns_R_R_A_I(
 
     emitHandleMemOp(indir, id, fmt, ins);
 
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -4324,8 +4339,7 @@ void emitter::emitIns_R_R_AR_I(
     id->idAddr()->iiaAddrMode.amBaseReg = base;
     id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
 
-    // Plus one for the 1-byte immediate (ival)
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -4344,8 +4358,7 @@ void emitter::emitIns_R_R_C_I(
         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
     }
 
-    instrDesc*     id = emitNewInstrCnsDsp(attr, ival, offs);
-    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    instrDesc* id = emitNewInstrCnsDsp(attr, ival, offs);
 
     id->idIns(ins);
     id->idInsFmt(IF_RWR_RRD_MRD_CNS);
@@ -4353,7 +4366,9 @@ void emitter::emitIns_R_R_C_I(
     id->idReg2(reg2);
     id->idAddr()->iiaFieldHnd = fldHnd;
 
+    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
     id->idCodeSize(sz);
+
     dispIns(id);
     emitCurIGsize += sz;
 }
@@ -4398,9 +4413,7 @@ void emitter::emitIns_R_R_S_I(
     assert(IsSSEOrAVXInstruction(ins));
     assert(IsThreeOperandAVXInstruction(ins));
 
-    instrDesc*     id = emitNewInstrCns(attr, ival);
-    UNATIVE_OFFSET sz =
-        emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    instrDesc* id = emitNewInstrCns(attr, ival);
 
     id->idIns(ins);
     id->idInsFmt(IF_RWR_RRD_SRD_CNS);
@@ -4412,7 +4425,192 @@ void emitter::emitIns_R_R_S_I(
     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
 #endif
 
+    UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
     id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// encodeXmmRegAsIval: Encodes a XMM register into imm[7:4] for use by a SIMD instruction
+//
+// Arguments
+//    opReg -- The register being encoded
+//
+// Returns:
+//    opReg encoded in imm[7:4]
+static int encodeXmmRegAsIval(regNumber opReg)
+{
+    assert(opReg >= XMMBASE);
+    // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
+    // which encodes the fourth register into imm8[7:4]
+    return (opReg - XMMBASE) << 4;
+}
+
+//------------------------------------------------------------------------
+// emitIns_R_R_A_R: emits the code for an instruction that takes a register operand, a GenTreeIndir address,
+//                  another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operand
+//    op3Reg    -- The register of the third operand
+//    indir     -- The GenTreeIndir used for the memory address
+//
+// Remarks:
+//    op2 is built from indir
+//
+void emitter::emitIns_R_R_A_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
+{
+    assert(isAvxBlendv(ins));
+    assert(UseVEXEncoding());
+
+    int        ival = encodeXmmRegAsIval(op3Reg);
+    ssize_t    offs = indir->Offset();
+    instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
+
+    id->idIns(ins);
+    id->idReg1(targetReg);
+    id->idReg2(op1Reg);
+
+    emitHandleMemOp(indir, id, IF_RWR_RRD_ARD_RRD, ins);
+
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// emitIns_R_R_AR_R: emits the code for an instruction that takes a register operand, a base memory
+//                   register, another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operands
+//    op3Reg    -- The register of the third operand
+//    base      -- The base register used for the memory address
+//    offs      -- The offset added to the memory address from base
+//
+// Remarks:
+//    op2 is built from base + offs
+//
+void emitter::emitIns_R_R_AR_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base, int offs)
+{
+    assert(isAvxBlendv(ins));
+    assert(UseVEXEncoding());
+
+    int        ival = encodeXmmRegAsIval(op3Reg);
+    instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
+
+    id->idIns(ins);
+    id->idReg1(targetReg);
+    id->idReg2(op1Reg);
+
+    id->idInsFmt(IF_RWR_RRD_ARD_RRD);
+    id->idAddr()->iiaAddrMode.amBaseReg = base;
+    id->idAddr()->iiaAddrMode.amIndxReg = REG_NA;
+
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins), ival);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// emitIns_R_R_C_R: emits the code for an instruction that takes a register operand, a field handle +
+//                  offset,  another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operand
+//    op3Reg    -- The register of the third operand
+//    fldHnd    -- The CORINFO_FIELD_HANDLE used for the memory address
+//    offs      -- The offset added to the memory address from fldHnd
+//
+// Remarks:
+//    op2 is built from fldHnd + offs
+//
+void emitter::emitIns_R_R_C_R(instruction          ins,
+                              emitAttr             attr,
+                              regNumber            targetReg,
+                              regNumber            op1Reg,
+                              regNumber            op3Reg,
+                              CORINFO_FIELD_HANDLE fldHnd,
+                              int                  offs)
+{
+    assert(isAvxBlendv(ins));
+    assert(UseVEXEncoding());
+
+    // Static always need relocs
+    if (!jitStaticFldIsGlobAddr(fldHnd))
+    {
+        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+    }
+
+    int        ival = encodeXmmRegAsIval(op3Reg);
+    instrDesc* id   = emitNewInstrCnsDsp(attr, ival, offs);
+
+    id->idIns(ins);
+    id->idReg1(targetReg);
+    id->idReg2(op1Reg);
+
+    id->idInsFmt(IF_RWR_RRD_MRD_RRD);
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins), ival);
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+//------------------------------------------------------------------------
+// emitIns_R_R_R_S: emits the code for a instruction that takes a register operand, a variable index +
+//                  offset, another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operand
+//    op3Reg    -- The register of the third operand
+//    varx      -- The variable index used for the memory address
+//    offs      -- The offset added to the memory address from varx
+//
+// Remarks:
+//    op2 is built from varx + offs
+//
+void emitter::emitIns_R_R_S_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
+{
+    assert(isAvxBlendv(ins));
+    assert(UseVEXEncoding());
+
+    int        ival = encodeXmmRegAsIval(op3Reg);
+    instrDesc* id   = emitNewInstrCns(attr, ival);
+
+    id->idIns(ins);
+    id->idReg1(targetReg);
+    id->idReg2(op1Reg);
+
+    id->idInsFmt(IF_RWR_RRD_SRD_RRD);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+
+    UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeRM(ins), varx, offs, ival);
+    id->idCodeSize(sz);
+
     dispIns(id);
     emitCurIGsize += sz;
 }
@@ -4427,11 +4625,9 @@ void emitter::emitIns_R_R_R_R(
     // TODO-XArch-CQ: We should create function which can calculate all kinds of AVX instructions size in future
     UNATIVE_OFFSET sz = 6;
 
-    // AVX/AVX2 supports 4-reg format for vblendvps/vblendvpd/vpblendvb,
-    // which encodes the fourth register into imm8[7:4]
-    int ival = (reg3 - XMMBASE) << 4; // convert reg3 to ival
+    int        ival = encodeXmmRegAsIval(reg3);
+    instrDesc* id   = emitNewInstrCns(attr, ival);
 
-    instrDesc* id = emitNewInstrCns(attr, ival);
     id->idIns(ins);
     id->idInsFmt(IF_RWR_RRD_RRD_RRD);
     id->idReg1(targetReg);
@@ -4682,7 +4878,7 @@ void emitter::emitIns_J_S(instruction ins, emitAttr attr, BasicBlock* dst, int v
     id->idjNext      = emitCurIGjmpList;
     emitCurIGjmpList = id;
 
-    UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(insCodeMI(ins), varx, offs);
+    UNATIVE_OFFSET sz = sizeof(INT32) + emitInsSizeSV(id, insCodeMI(ins), varx, offs);
     id->dstLclVar.initLclVarAddr(varx, offs);
 #ifdef DEBUG
     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
@@ -5005,8 +5201,7 @@ void emitter::emitIns_AR_R_I(instruction ins, emitAttr attr, regNumber base, int
 
     assert(emitGetInsAmdAny(id) == disp); // make sure "disp" is stored properly
 
-    // Plus one for the 1-byte immediate (ival)
-    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeMR(ins)) + 1;
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeMR(ins), ival);
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -5878,9 +6073,9 @@ void emitter::emitIns_SIMD_R_R_R_R(
         }
         if (op1Reg != targetReg)
         {
-            // Ensure we aren't overwriting op2 or op3
+            // Ensure we aren't overwriting op2 or oop3 (which should be REG_XMM0)
             assert(op2Reg != targetReg);
-            assert(op3Reg != targetReg);
+            assert(targetReg != REG_XMM0);
 
             emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
         }
@@ -5917,6 +6112,301 @@ void emitter::emitIns_SIMD_R_R_R_S(
 
     emitIns_R_R_S(ins, attr, targetReg, op2Reg, varx, offs);
 }
+
+//------------------------------------------------------------------------
+// emitIns_SIMD_R_R_A_R: emits the code for a SIMD instruction that takes a register operand, a GenTreeIndir address,
+//                       another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operand
+//    op3Reg    -- The register of the third operand
+//    indir     -- The GenTreeIndir used for the memory address
+//
+void emitter::emitIns_SIMD_R_R_A_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir)
+{
+    if (UseVEXEncoding())
+    {
+        assert(isAvxBlendv(ins) || isSse41Blendv(ins));
+
+        // convert SSE encoding of SSE4.1 instructions to VEX encoding
+        switch (ins)
+        {
+            case INS_blendvps:
+            {
+                ins = INS_vblendvps;
+                break;
+            }
+
+            case INS_blendvpd:
+            {
+                ins = INS_vblendvpd;
+                break;
+            }
+
+            case INS_pblendvb:
+            {
+                ins = INS_vpblendvb;
+                break;
+            }
+
+            default:
+            {
+                break;
+            }
+        }
+
+        emitIns_R_R_A_R(ins, attr, targetReg, op1Reg, op3Reg, indir);
+    }
+    else
+    {
+        assert(isSse41Blendv(ins));
+
+        // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
+        if (op3Reg != REG_XMM0)
+        {
+            // Ensure we aren't overwriting op1
+            assert(op1Reg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
+        }
+        if (op1Reg != targetReg)
+        {
+            // Ensure we aren't overwriting op3 (which should be REG_XMM0)
+            assert(targetReg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+        }
+
+        emitIns_R_A(ins, attr, targetReg, indir);
+    }
+}
+
+//------------------------------------------------------------------------
+// emitIns_SIMD_R_R_AR_R: emits the code for a SIMD instruction that takes a register operand, a base memory
+//                        register, another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operands
+//    op3Reg    -- The register of the third operand
+//    base      -- The base register used for the memory address
+//
+void emitter::emitIns_SIMD_R_R_AR_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base)
+{
+    if (UseVEXEncoding())
+    {
+        assert(isAvxBlendv(ins) || isSse41Blendv(ins));
+
+        // convert SSE encoding of SSE4.1 instructions to VEX encoding
+        switch (ins)
+        {
+            case INS_blendvps:
+            {
+                ins = INS_vblendvps;
+                break;
+            }
+
+            case INS_blendvpd:
+            {
+                ins = INS_vblendvpd;
+                break;
+            }
+
+            case INS_pblendvb:
+            {
+                ins = INS_vpblendvb;
+                break;
+            }
+
+            default:
+            {
+                break;
+            }
+        }
+
+        emitIns_R_R_AR_R(ins, attr, targetReg, op1Reg, op3Reg, base, 0);
+    }
+    else
+    {
+        assert(isSse41Blendv(ins));
+
+        // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
+        if (op3Reg != REG_XMM0)
+        {
+            // Ensure we aren't overwriting op1
+            assert(op1Reg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
+        }
+        if (op1Reg != targetReg)
+        {
+            // Ensure we aren't overwriting op3 (which should be REG_XMM0)
+            assert(targetReg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+        }
+
+        emitIns_R_AR(ins, attr, targetReg, base, 0);
+    }
+}
+
+//------------------------------------------------------------------------
+// emitIns_SIMD_R_R_C_R: emits the code for a SIMD instruction that takes a register operand, a field handle +
+//                       offset,  another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operand
+//    op3Reg    -- The register of the third operand
+//    fldHnd    -- The CORINFO_FIELD_HANDLE used for the memory address
+//    offs      -- The offset added to the memory address from fldHnd
+//
+void emitter::emitIns_SIMD_R_R_C_R(instruction          ins,
+                                   emitAttr             attr,
+                                   regNumber            targetReg,
+                                   regNumber            op1Reg,
+                                   regNumber            op3Reg,
+                                   CORINFO_FIELD_HANDLE fldHnd,
+                                   int                  offs)
+{
+    if (UseVEXEncoding())
+    {
+        assert(isAvxBlendv(ins) || isSse41Blendv(ins));
+
+        // convert SSE encoding of SSE4.1 instructions to VEX encoding
+        switch (ins)
+        {
+            case INS_blendvps:
+            {
+                ins = INS_vblendvps;
+                break;
+            }
+
+            case INS_blendvpd:
+            {
+                ins = INS_vblendvpd;
+                break;
+            }
+
+            case INS_pblendvb:
+            {
+                ins = INS_vpblendvb;
+                break;
+            }
+
+            default:
+            {
+                break;
+            }
+        }
+
+        emitIns_R_R_C_R(ins, attr, targetReg, op1Reg, op3Reg, fldHnd, offs);
+    }
+    else
+    {
+        assert(isSse41Blendv(ins));
+
+        // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
+        if (op3Reg != REG_XMM0)
+        {
+            // Ensure we aren't overwriting op1
+            assert(op1Reg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
+        }
+        if (op1Reg != targetReg)
+        {
+            // Ensure we aren't overwriting op3 (which should be REG_XMM0)
+            assert(targetReg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+        }
+
+        emitIns_R_C(ins, attr, targetReg, fldHnd, offs);
+    }
+}
+
+//------------------------------------------------------------------------
+// emitIns_SIMD_R_R_S_R: emits the code for a SIMD instruction that takes a register operand, a variable index +
+//                       offset, another register operand, and that returns a value in register
+//
+// Arguments:
+//    ins       -- The instruction being emitted
+//    attr      -- The emit attribute
+//    targetReg -- The target register
+//    op1Reg    -- The register of the first operand
+//    op3Reg    -- The register of the third operand
+//    varx      -- The variable index used for the memory address
+//    offs      -- The offset added to the memory address from varx
+//
+void emitter::emitIns_SIMD_R_R_S_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs)
+{
+    if (UseVEXEncoding())
+    {
+        assert(isAvxBlendv(ins) || isSse41Blendv(ins));
+
+        // convert SSE encoding of SSE4.1 instructions to VEX encoding
+        switch (ins)
+        {
+            case INS_blendvps:
+            {
+                ins = INS_vblendvps;
+                break;
+            }
+
+            case INS_blendvpd:
+            {
+                ins = INS_vblendvpd;
+                break;
+            }
+
+            case INS_pblendvb:
+            {
+                ins = INS_vpblendvb;
+                break;
+            }
+
+            default:
+            {
+                break;
+            }
+        }
+
+        emitIns_R_R_S_R(ins, attr, targetReg, op1Reg, op3Reg, varx, offs);
+    }
+    else
+    {
+        assert(isSse41Blendv(ins));
+
+        // SSE4.1 blendv* hardcode the mask vector (op3) in XMM0
+        if (op3Reg != REG_XMM0)
+        {
+            // Ensure we aren't overwriting op1
+            assert(op1Reg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, REG_XMM0, op3Reg);
+        }
+        if (op1Reg != targetReg)
+        {
+            // Ensure we aren't overwriting op3 (which should be REG_XMM0)
+            assert(targetReg != REG_XMM0);
+
+            emitIns_R_R(INS_movaps, attr, targetReg, op1Reg);
+        }
+
+        emitIns_R_S(ins, attr, targetReg, varx, offs);
+    }
+}
 #endif // FEATURE_HW_INTRINSICS
 
 /*****************************************************************************
@@ -5927,7 +6417,7 @@ void emitter::emitIns_SIMD_R_R_R_S(
 void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
 {
     instrDesc*     id  = emitNewInstr(attr);
-    UNATIVE_OFFSET sz  = emitInsSizeSV(insCodeMR(ins), varx, offs);
+    UNATIVE_OFFSET sz  = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
     insFormat      fmt = emitInsModeFormat(ins, IF_SRD);
 
     // 16-bit operand instructions will need a prefix
@@ -5962,7 +6452,7 @@ void emitter::emitIns_S(instruction ins, emitAttr attr, int varx, int offs)
 void emitter::emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs)
 {
     instrDesc*     id  = emitNewInstr(attr);
-    UNATIVE_OFFSET sz  = emitInsSizeSV(insCodeMR(ins), varx, offs);
+    UNATIVE_OFFSET sz  = emitInsSizeSV(id, insCodeMR(ins), varx, offs);
     insFormat      fmt = emitInsModeFormat(ins, IF_SRD_RRD);
 
 #ifdef _TARGET_X86_
@@ -6004,7 +6494,7 @@ void emitter::emitIns_R_S(instruction ins, emitAttr attr, regNumber ireg, int va
     noway_assert(emitVerifyEncodable(ins, size, ireg));
 
     instrDesc*     id  = emitNewInstr(attr);
-    UNATIVE_OFFSET sz  = emitInsSizeSV(insCodeRM(ins), varx, offs);
+    UNATIVE_OFFSET sz  = emitInsSizeSV(id, insCodeRM(ins), varx, offs);
     insFormat      fmt = emitInsModeFormat(ins, IF_RRD_SRD);
 
     // Most 16-bit operand instructions need a prefix
@@ -6066,7 +6556,7 @@ void emitter::emitIns_S_I(instruction ins, emitAttr attr, int varx, int offs, in
     instrDesc* id = emitNewInstrCns(attr, val);
     id->idIns(ins);
     id->idInsFmt(fmt);
-    UNATIVE_OFFSET sz = emitInsSizeSV(id, varx, offs, val);
+    UNATIVE_OFFSET sz = emitInsSizeSV(id, insCodeMI(ins), varx, offs, val);
 
     // VEX prefix
     sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeMI(ins));
@@ -6599,7 +7089,7 @@ void emitter::emitIns_Call(EmitCallType          callType,
                 // disp is really a lclVarNum
                 noway_assert((unsigned)disp == (size_t)disp);
                 id->idAddr()->iiaLclVar.initLclVarAddr((unsigned)disp, 0);
-                sz = emitInsSizeSV(insCodeMR(INS_call), (unsigned)disp, 0);
+                sz = emitInsSizeSV(id, insCodeMR(INS_call), (unsigned)disp, 0);
 
                 break;
 
@@ -7959,6 +8449,18 @@ void emitter::emitDispIns(
             break;
         }
 
+        case IF_RWR_RRD_ARD_RRD:
+        {
+            printf("%s, ", emitRegName(id->idReg1(), attr));
+            printf("%s, ", emitRegName(id->idReg2(), attr));
+            emitDispAddrMode(id);
+
+            emitGetInsAmdCns(id, &cnsVal);
+            val = (cnsVal.cnsVal >> 4) + XMMBASE;
+            printf(", %s", emitRegName((regNumber)val, attr));
+            break;
+        }
+
         case IF_ARD_RRD:
         case IF_AWR_RRD:
         case IF_ARW_RRD:
@@ -8140,6 +8642,19 @@ void emitter::emitDispIns(
             break;
         }
 
+        case IF_RWR_RRD_SRD_RRD:
+        {
+            printf("%s, ", emitRegName(id->idReg1(), attr));
+            printf("%s, ", emitRegName(id->idReg2(), attr));
+            emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                             id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+
+            emitGetInsCns(id, &cnsVal);
+            val = (cnsVal.cnsVal >> 4) + XMMBASE;
+            printf(", %s", emitRegName((regNumber)val, attr));
+            break;
+        }
+
         case IF_RRD_RRD:
         case IF_RWR_RRD:
         case IF_RRW_RRD:
@@ -8345,6 +8860,20 @@ void emitter::emitDispIns(
             break;
         }
 
+        case IF_RWR_RRD_MRD_RRD:
+        {
+            printf("%s, ", emitRegName(id->idReg1(), attr));
+            printf("%s, ", emitRegName(id->idReg2(), attr));
+
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+
+            emitGetInsDcmCns(id, &cnsVal);
+            val = (cnsVal.cnsVal >> 4) + XMMBASE;
+            printf(", %s", emitRegName((regNumber)val, attr));
+            break;
+        }
+
         case IF_RWR_MRD_OFF:
 
             printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
@@ -9492,7 +10021,8 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
         // Does the constant fit in a byte?
         if ((signed char)cval == cval && addc->cnsReloc == false && ins != INS_mov && ins != INS_test)
         {
-            if (id->idInsFmt() != IF_SRW_SHF)
+            if ((id->idInsFmt() != IF_SRW_SHF) && (id->idInsFmt() != IF_RRW_SRD_CNS) &&
+                (id->idInsFmt() != IF_RWR_RRD_SRD_CNS))
             {
                 code |= 2;
             }
@@ -12298,6 +12828,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             break;
 
         case IF_RWR_RRD_ARD_CNS:
+        case IF_RWR_RRD_ARD_RRD:
         {
             emitGetInsAmdCns(id, &cnsVal);
             code = insCodeRM(ins);
@@ -12471,6 +13002,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         }
 
         case IF_RWR_RRD_SRD_CNS:
+        case IF_RWR_RRD_SRD_RRD:
         {
             // This should only be called on AVX instructions
             assert(IsAVXInstruction(ins));
@@ -12492,6 +13024,8 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
                 regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
                 dst     = emitOutputSV(dst, id, code | regcode, &cnsVal);
             }
+
+            sz = emitSizeOfInsDsc(id);
             break;
         }
 
@@ -12628,6 +13162,7 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
         }
 
         case IF_RWR_RRD_MRD_CNS:
+        case IF_RWR_RRD_MRD_RRD:
         {
             // This should only be called on AVX instructions
             assert(IsAVXInstruction(ins));
index 0429b0f8879308b2b485661d00eef87d80440121..e7a93a1bc688665f3f1d3c45ae5606a11286b5ad 100644 (file)
@@ -42,7 +42,8 @@ struct CnsVal
 UNATIVE_OFFSET emitInsSize(code_t code);
 UNATIVE_OFFSET emitInsSizeRM(instruction ins);
 UNATIVE_OFFSET emitInsSizeSV(code_t code, int var, int dsp);
-UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, int var, int dsp, int val);
+UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp);
+UNATIVE_OFFSET emitInsSizeSV(instrDesc* id, code_t code, int var, int dsp, int val);
 UNATIVE_OFFSET emitInsSizeRR(instruction ins, regNumber reg1, regNumber reg2, emitAttr attr);
 UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code);
 UNATIVE_OFFSET emitInsSizeAM(instrDesc* id, code_t code, int val);
@@ -352,6 +353,23 @@ void emitIns_R_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber r
 
 void emitIns_R_R_S_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival);
 
+void emitIns_R_R_A_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, GenTreeIndir* indir);
+
+void emitIns_R_R_AR_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, regNumber base, int offs);
+
+void emitIns_R_R_C_R(instruction          ins,
+                     emitAttr             attr,
+                     regNumber            targetReg,
+                     regNumber            op1Reg,
+                     regNumber            op3Reg,
+                     CORINFO_FIELD_HANDLE fldHnd,
+                     int                  offs);
+
+void emitIns_R_R_S_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op3Reg, int varx, int offs);
+
 void emitIns_R_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, regNumber reg4);
 
 void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
@@ -449,6 +467,20 @@ void emitIns_SIMD_R_R_R_R(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg);
 void emitIns_SIMD_R_R_R_S(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs);
+
+void emitIns_SIMD_R_R_A_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir);
+void emitIns_SIMD_R_R_AR_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber base);
+void emitIns_SIMD_R_R_C_R(instruction          ins,
+                          emitAttr             attr,
+                          regNumber            targetReg,
+                          regNumber            op1Reg,
+                          regNumber            op2Reg,
+                          CORINFO_FIELD_HANDLE fldHnd,
+                          int                  offs);
+void emitIns_SIMD_R_R_S_R(
+    instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs);
 #endif // FEATURE_HW_INTRINSICS
 
 enum EmitCallType
index 4aaa197cf464244c0699aede11849301542d53c3..1468f035d60afe7e8323977258dc8db06ab921d2 100644 (file)
@@ -96,7 +96,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 else if ((ival != -1) && varTypeIsFloating(baseType))
                 {
                     assert((ival >= 0) && (ival <= 127));
-                    genHWIntrinsic_R_RM_I(node, ins);
+                    genHWIntrinsic_R_RM_I(node, ins, (int8_t)ival);
                 }
                 else
                 {
@@ -133,7 +133,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 else if ((ival != -1) && varTypeIsFloating(baseType))
                 {
                     assert((ival >= 0) && (ival <= 127));
-                    genHWIntrinsic_R_R_RM_I(node, ins);
+                    genHWIntrinsic_R_R_RM_I(node, ins, ival);
                 }
                 else if (category == HW_Category_MemoryLoad)
                 {
@@ -156,7 +156,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                         simdSize = emitTypeSize(TYP_INT);
                     }
 
-                    auto emitSwCase = [&](int8_t i) { emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, i); };
+                    auto emitSwCase = [&](int8_t i) { genHWIntrinsic_R_RM_I(node, ins, i); };
 
                     if (op2->IsCnsIntOrI())
                     {
@@ -206,18 +206,33 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 {
                     assert(ival == -1);
 
-                    auto emitSwCase = [&](int8_t i) {
-                        emit->emitIns_SIMD_R_R_R_I(ins, simdSize, targetReg, op1Reg, op2Reg, i);
-                    };
+                    auto emitSwCase = [&](int8_t i) { genHWIntrinsic_R_R_RM_I(node, ins, i); };
 
                     if (op3->IsCnsIntOrI())
                     {
                         ssize_t ival = op3->AsIntCon()->IconValue();
                         assert((ival >= 0) && (ival <= 255));
+
+                        if ((intrinsicId == NI_SSE41_Insert) && (baseType == TYP_FLOAT))
+                        {
+                            // Bits 6 and 7 impact the index that is selected from op2
+                            // when op2 is already in register. However, our API exposes
+                            // op2 as a scalar and so bits 6 and 7 must be set to 0.
+                            ival &= 0x3F;
+                        }
+
                         emitSwCase((int8_t)ival);
                     }
                     else
                     {
+                        if ((intrinsicId == NI_SSE41_Insert) && (baseType == TYP_FLOAT))
+                        {
+                            // Bits 6 and 7 impact the index that is selected from op2
+                            // when op2 is already in register. However, our API exposes
+                            // op2 as a scalar and so bits 6 and 7 must be set to 0.
+                            emit->emitIns_R_I(INS_and, EA_1BYTE, op3Reg, 0x3F);
+                        }
+
                         // We emit a fallback case for the scenario when the imm-op is not a constant. This should
                         // normally happen when the intrinsic is called indirectly, such as via Reflection. However, it
                         // can also occur if the consumer calls it directly and just doesn't pass a constant value.
@@ -240,7 +255,22 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else
                 {
-                    emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg, op3Reg);
+                    switch (intrinsicId)
+                    {
+                        case NI_SSE41_BlendVariable:
+                        case NI_AVX_BlendVariable:
+                        case NI_AVX2_BlendVariable:
+                        {
+                            genHWIntrinsic_R_R_RM_R(node, ins);
+                            break;
+                        }
+
+                        default:
+                        {
+                            unreached();
+                            break;
+                        };
+                    }
                 }
                 break;
             }
@@ -431,8 +461,9 @@ void CodeGen::genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emi
 // Arguments:
 //    node - The hardware intrinsic node
 //    ins  - The instruction being generated
+//    ival - The immediate value
 //
-void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
+void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival)
 {
     var_types targetType = node->TypeGet();
     regNumber targetReg  = node->gtRegNum;
@@ -440,15 +471,11 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
     emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
     emitter*  emit       = getEmitter();
 
-    int ival = HWIntrinsicInfo::lookupIval(node->gtHWIntrinsicId);
-    assert((ival >= 0) && (ival <= 127));
-
     // TODO-XArch-CQ: Commutative operations can have op1 be contained
     // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
 
     assert(targetReg != REG_NA);
-    assert(node->gtGetOp2() == nullptr); // The second operand is implicit and comes from lookupIval
-    assert(!node->OperIsCommutative());  // One operand intrinsics cannot be commutative
+    assert(!node->OperIsCommutative()); // One operand intrinsics cannot be commutative
 
     if (op1->isContained() || op1->isUsedFromSpillTemp())
     {
@@ -550,7 +577,7 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
     else
     {
         regNumber op1Reg = op1->gtRegNum;
-        emit->emitIns_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
+        emit->emitIns_SIMD_R_R_I(ins, simdSize, targetReg, op1Reg, ival);
     }
 }
 
@@ -705,8 +732,9 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
 // Arguments:
 //    node - The hardware intrinsic node
 //    ins  - The instruction being generated
+//    ival - The immediate value
 //
-void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
+void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival)
 {
     var_types targetType = node->TypeGet();
     regNumber targetReg  = node->gtRegNum;
@@ -715,12 +743,25 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
     emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
     emitter*  emit       = getEmitter();
 
-    int ival = HWIntrinsicInfo::lookupIval(node->gtHWIntrinsicId);
-    assert((ival >= 0) && (ival <= 127));
-
     // TODO-XArch-CQ: Commutative operations can have op1 be contained
     // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
 
+    if (op1->OperIsList())
+    {
+        assert(op2 == nullptr);
+
+        GenTreeArgList* argList = op1->AsArgList();
+
+        op1     = argList->Current();
+        argList = argList->Rest();
+
+        op2     = argList->Current();
+        argList = argList->Rest();
+
+        assert(argList->Current() != nullptr);
+        assert(argList->Rest() == nullptr);
+    }
+
     regNumber op1Reg = op1->gtRegNum;
 
     assert(targetReg != REG_NA);
@@ -846,6 +887,143 @@ void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
     }
 }
 
+//------------------------------------------------------------------------
+// genHWIntrinsic_R_R_RM_R: Generates the code for a hardware intrinsic node that takes a register operand, a
+//                          register/memory operand, another register operand, and that returns a value in register
+//
+// Arguments:
+//    node - The hardware intrinsic node
+//    ins  - The instruction being generated
+//
+void CodeGen::genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins)
+{
+    var_types targetType = node->TypeGet();
+    regNumber targetReg  = node->gtRegNum;
+    GenTree*  op1        = node->gtGetOp1();
+    GenTree*  op2        = node->gtGetOp2();
+    GenTree*  op3        = nullptr;
+    emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
+    emitter*  emit       = getEmitter();
+
+    assert(op1->OperIsList());
+    assert(op2 == nullptr);
+
+    GenTreeArgList* argList = op1->AsArgList();
+
+    op1     = argList->Current();
+    argList = argList->Rest();
+
+    op2     = argList->Current();
+    argList = argList->Rest();
+
+    op3 = argList->Current();
+    assert(argList->Rest() == nullptr);
+
+    regNumber op1Reg = op1->gtRegNum;
+    regNumber op3Reg = op3->gtRegNum;
+
+    assert(targetReg != REG_NA);
+    assert(op1Reg != REG_NA);
+    assert(op3Reg != REG_NA);
+
+    if (op2->isContained() || op2->isUsedFromSpillTemp())
+    {
+        TempDsc* tmpDsc = nullptr;
+        unsigned varNum = BAD_VAR_NUM;
+        unsigned offset = (unsigned)-1;
+
+        if (op2->isUsedFromSpillTemp())
+        {
+            assert(op2->IsRegOptional());
+
+            // TODO-XArch-Cleanup: The getSpillTempDsc...tempRlsTemp code is a fairly common
+            //                     pattern. It could probably be extracted to its own method.
+            tmpDsc = getSpillTempDsc(op2);
+            varNum = tmpDsc->tdTempNum();
+            offset = 0;
+
+            compiler->tmpRlsTemp(tmpDsc);
+        }
+        else if (op2->OperIsHWIntrinsic())
+        {
+            emit->emitIns_SIMD_R_R_AR_R(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum, op3Reg);
+            return;
+        }
+        else if (op2->isIndir())
+        {
+            GenTreeIndir* memIndir = op2->AsIndir();
+            GenTree*      memBase  = memIndir->gtOp1;
+
+            switch (memBase->OperGet())
+            {
+                case GT_LCL_VAR_ADDR:
+                {
+                    varNum = memBase->AsLclVarCommon()->GetLclNum();
+                    offset = 0;
+
+                    // Ensure that all the GenTreeIndir values are set to their defaults.
+                    assert(!memIndir->HasIndex());
+                    assert(memIndir->Scale() == 1);
+                    assert(memIndir->Offset() == 0);
+
+                    break;
+                }
+
+                case GT_CLS_VAR_ADDR:
+                {
+                    emit->emitIns_SIMD_R_R_C_R(ins, simdSize, targetReg, op1Reg, op3Reg, memBase->gtClsVar.gtClsVarHnd,
+                                               0);
+                    return;
+                }
+
+                default:
+                {
+                    emit->emitIns_SIMD_R_R_A_R(ins, simdSize, targetReg, op1Reg, op3Reg, memIndir);
+                    return;
+                }
+            }
+        }
+        else
+        {
+            switch (op2->OperGet())
+            {
+                case GT_LCL_FLD:
+                {
+                    GenTreeLclFld* lclField = op2->AsLclFld();
+
+                    varNum = lclField->GetLclNum();
+                    offset = lclField->gtLclFld.gtLclOffs;
+                    break;
+                }
+
+                case GT_LCL_VAR:
+                {
+                    assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate());
+                    varNum = op2->AsLclVar()->GetLclNum();
+                    offset = 0;
+                    break;
+                }
+
+                default:
+                    unreached();
+                    break;
+            }
+        }
+
+        // Ensure we got a good varNum and offset.
+        // We also need to check for `tmpDsc != nullptr` since spill temp numbers
+        // are negative and start with -1, which also happens to be BAD_VAR_NUM.
+        assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
+        assert(offset != (unsigned)-1);
+
+        emit->emitIns_SIMD_R_R_S_R(ins, simdSize, targetReg, op1Reg, op3Reg, varNum, offset);
+    }
+    else
+    {
+        emit->emitIns_SIMD_R_R_R_R(ins, simdSize, targetReg, op1Reg, op2->gtRegNum, op3Reg);
+    }
+}
+
 //------------------------------------------------------------------------
 // genHWIntrinsic_R_R_R_RM: Generates the code for a hardware intrinsic node that takes two register operands,
 //                          a register/memory operand, and that returns a value in register
@@ -2056,18 +2234,10 @@ void CodeGen::genFMAIntrinsic(GenTreeHWIntrinsic* node)
 //
 void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node)
 {
-    NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
-    GenTree*       op1         = node->gtGetOp1();
-    regNumber      targetReg   = node->gtRegNum;
-    assert(targetReg != REG_NA);
-    var_types targetType = node->TypeGet();
-    regNumber op1Reg     = op1->gtRegNum;
-    genConsumeOperands(node);
-
-    assert(intrinsicId == NI_LZCNT_LeadingZeroCount);
-
-    inst_RV_RV(INS_lzcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType));
+    assert(node->gtHWIntrinsicId == NI_LZCNT_LeadingZeroCount);
 
+    genConsumeOperands(node);
+    genHWIntrinsic_R_RM(node, INS_lzcnt, emitTypeSize(node->TypeGet()));
     genProduceReg(node);
 }
 
@@ -2090,18 +2260,10 @@ void CodeGen::genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node)
 //
 void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node)
 {
-    NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
-    GenTree*       op1         = node->gtGetOp1();
-    regNumber      targetReg   = node->gtRegNum;
-    assert(targetReg != REG_NA);
-    var_types targetType = node->TypeGet();
-    regNumber op1Reg     = op1->gtRegNum;
-    genConsumeOperands(node);
-
-    assert(intrinsicId == NI_POPCNT_PopCount);
-
-    inst_RV_RV(INS_popcnt, targetReg, op1Reg, targetType, emitTypeSize(targetType));
+    assert(node->gtHWIntrinsicId == NI_POPCNT_PopCount);
 
+    genConsumeOperands(node);
+    genHWIntrinsic_R_RM(node, INS_popcnt, emitTypeSize(node->TypeGet()));
     genProduceReg(node);
 }
 
index 04b637fe26573ada5c2dc4c582fb505b2f7f5f6a..fff2ec387de441e52bb8f22412eebd821438bdf1 100644 (file)
@@ -2418,6 +2418,7 @@ bool Lowering::IsContainableHWIntrinsicOp(GenTreeHWIntrinsic* containingNode, Ge
                     break;
                 }
             }
+            break;
         }
 
         case HW_Category_SIMDScalar:
@@ -2533,6 +2534,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
         {
             case HW_Category_SimpleSIMD:
             case HW_Category_SIMDScalar:
+            case HW_Category_Scalar:
             {
                 switch (intrinsicId)
                 {
@@ -2593,9 +2595,7 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
 
             default:
             {
-                // TODO-XArch-CQ: Assert that this is unreached after we have ensured the relevant node types are
-                // handled.
-                //                https://github.com/dotnet/coreclr/issues/16497
+                unreached();
                 break;
             }
         }
@@ -2637,6 +2637,112 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                     break;
                 }
 
+                case HW_Category_IMM:
+                {
+                    // We don't currently have any IMM intrinsics which are also commutative
+                    assert(!isCommutative);
+                    bool supportsRegOptional = false;
+
+                    switch (intrinsicId)
+                    {
+                        case NI_SSE2_ShiftLeftLogical:
+                        case NI_SSE2_ShiftRightArithmetic:
+                        case NI_SSE2_ShiftRightLogical:
+                        case NI_AVX2_ShiftLeftLogical:
+                        case NI_AVX2_ShiftRightArithmetic:
+                        case NI_AVX2_ShiftRightLogical:
+                        {
+                            // These intrinsics can have op2 be imm or reg/mem
+
+                            if (!HWIntrinsicInfo::isImmOp(intrinsicId, op2))
+                            {
+                                if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
+                                {
+                                    MakeSrcContained(node, op2);
+                                }
+                                else if (supportsRegOptional)
+                                {
+                                    op2->SetRegOptional();
+                                }
+                            }
+                            break;
+                        }
+
+                        case NI_SSE2_Shuffle:
+                        case NI_SSE2_ShuffleHigh:
+                        case NI_SSE2_ShuffleLow:
+                        {
+                            // These intrinsics have op2 as an imm and op1 as a reg/mem
+
+                            if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional))
+                            {
+                                MakeSrcContained(node, op1);
+                            }
+                            else if (supportsRegOptional)
+                            {
+                                op1->SetRegOptional();
+                            }
+                            break;
+                        }
+
+                        case NI_AVX_Permute:
+                        {
+                            // These intrinsics can have op2 be imm or reg/mem
+                            // They also can have op1 be reg/mem and op2 be imm
+
+                            if (HWIntrinsicInfo::isImmOp(intrinsicId, op2))
+                            {
+                                if (IsContainableHWIntrinsicOp(node, op1, &supportsRegOptional))
+                                {
+                                    MakeSrcContained(node, op1);
+                                }
+                                else if (supportsRegOptional)
+                                {
+                                    op1->SetRegOptional();
+                                }
+                            }
+                            else if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
+                            {
+                                MakeSrcContained(node, op2);
+                            }
+                            else if (supportsRegOptional)
+                            {
+                                op2->SetRegOptional();
+                            }
+                            break;
+                        }
+
+                        default:
+                        {
+                            break;
+                        }
+                    }
+
+                    break;
+                }
+
+                case HW_Category_Special:
+                {
+                    if (intrinsicId == NI_SSE2_CompareLessThan)
+                    {
+                        bool supportsRegOptional = false;
+
+                        if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
+                        {
+                            MakeSrcContained(node, op2);
+                        }
+                        else if (supportsRegOptional)
+                        {
+                            op2->SetRegOptional();
+                        }
+                    }
+                    else
+                    {
+                        unreached();
+                    }
+                    break;
+                }
+
                 default:
                 {
                     // TODO-XArch-CQ: Assert that this is unreached after we have ensured the relevant node types are
@@ -2707,13 +2813,82 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                             op3->SetRegOptional();
                         }
                     }
+                    else
+                    {
+                        bool supportsRegOptional = false;
+
+                        switch (intrinsicId)
+                        {
+                            case NI_SSE41_BlendVariable:
+                            case NI_AVX_BlendVariable:
+                            case NI_AVX2_BlendVariable:
+                            {
+                                if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
+                                {
+                                    MakeSrcContained(node, op2);
+                                }
+                                else if (supportsRegOptional)
+                                {
+                                    op2->SetRegOptional();
+                                }
+                                break;
+                            }
+
+                            default:
+                            {
+                                unreached();
+                                break;
+                            }
+                        }
+                    }
+                }
+
+                case HW_Category_IMM:
+                {
+                    bool supportsRegOptional = false;
+
+                    switch (intrinsicId)
+                    {
+                        case NI_SSE_Shuffle:
+                        case NI_SSE2_Insert:
+                        case NI_SSE2_Shuffle:
+                        case NI_SSSE3_AlignRight:
+                        case NI_SSE41_Blend:
+                        case NI_SSE41_DotProduct:
+                        case NI_SSE41_Insert:
+                        case NI_SSE41_MultipleSumAbsoluteDifferences:
+                        case NI_AVX_Blend:
+                        case NI_AVX_Compare:
+                        case NI_AVX_CompareScalar:
+                        case NI_AVX_DotProduct:
+                        case NI_AVX_Insert:
+                        case NI_AVX_Permute2x128:
+                        case NI_AVX_Shuffle:
+                        case NI_AVX2_Permute2x128:
+                        {
+                            if (IsContainableHWIntrinsicOp(node, op2, &supportsRegOptional))
+                            {
+                                MakeSrcContained(node, op2);
+                            }
+                            else if (supportsRegOptional)
+                            {
+                                op2->SetRegOptional();
+                            }
+                            break;
+                        }
+
+                        default:
+                        {
+                            break;
+                        }
+                    }
+
+                    break;
                 }
 
                 default:
                 {
-                    // TODO-XArch-CQ: Assert that this is unreached after we have ensured the relevant node types are
-                    // handled.
-                    //                https://github.com/dotnet/coreclr/issues/16497
+                    unreached();
                     break;
                 }
             }
index d4f62c552c6b9b7ed3db8be3bf737def64ad3767..e8f44cde188995616abf415f93c3d970cbc1d2ee 100644 (file)
@@ -301,20 +301,30 @@ private static readonly (string templateFileName, Dictionary<string, string> tem
     ("SimpleUnOpTest.template",      new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Floor",                         ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                                                                   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Floor(firstOp[0]))",                                                                                                                                               ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(MathF.Floor(firstOp[i]))"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "FloorScalar",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Double",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Double", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Double",                                                                                                      ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(double)(random.NextDouble())",                        ["NextValueOp2"] = "(double)(random.NextDouble())",                                                                                    ["ValidateFirstResult"] = "BitConverter.DoubleToInt64Bits(result[0]) != BitConverter.DoubleToInt64Bits(Math.Floor(right[0]))",                                                                                                                                                  ["ValidateRemainingResults"] = "BitConverter.DoubleToInt64Bits(result[i]) != BitConverter.DoubleToInt64Bits(left[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "FloorScalar",                   ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Single",                                                                                                      ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                         ["NextValueOp2"] = "(float)(random.NextDouble())",                                                                                     ["ValidateFirstResult"] = "BitConverter.SingleToInt32Bits(result[0]) != BitConverter.SingleToInt32Bits(MathF.Floor(right[0]))",                                                                                                                                                 ["ValidateRemainingResults"] = "BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(left[i])"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte",    ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["Data"] = "(byte)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(byte)0",                                                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["Data"] = "(sbyte)2", ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)0",                                                                                                                                                                                    ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["Data"] = "(int)2",   ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)0",                                                                                                                                                                                      ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["Data"] = "(uint)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(uint)0",                                                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["Data"] = "(long)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(long)0",                                                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["Data"] = "(ulong)2", ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ulong)0",                                                                                                                                                                                    ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "0",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)0",                                                                                                                                                                                    ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((float)2) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((float)0))"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte",    ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["Data"] = "(byte)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(byte)0",                                                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["Data"] = "(sbyte)2", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)0",                                                                                                                                                                                    ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["Data"] = "(int)2",   ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)0",                                                                                                                                                                                      ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["Data"] = "(uint)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(uint)0",                                                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["Data"] = "(long)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(long)0",                                                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["Data"] = "(ulong)2", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ulong)0",                                                                                                                                                                                    ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != 0)"}),
-    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "217", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((float)0))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "0",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte",    ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["Data"] = "(byte)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(byte)(random.Next(0, byte.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["Data"] = "(sbyte)2", ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)(random.Next(0, sbyte.MaxValue))",                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["Data"] = "(int)2",   ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)(random.Next(0, int.MaxValue))",                                                                                                                                                         ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["Data"] = "(uint)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(uint)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["Data"] = "(long)2",  ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(long)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["Data"] = "(ulong)2", ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ulong)(random.Next(0, int.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "1",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "2",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "4",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "8",   ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "16",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "32",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "48",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "64",  ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "128", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Byte",    ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Byte",                                                                                                                             ["Data"] = "(byte)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(byte)(random.Next(0, byte.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",                                                                                                                            ["Data"] = "(sbyte)2", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)(random.Next(0, sbyte.MaxValue))",                                                                                                                                                     ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",                                                                                                                            ["Data"] = "(int)2",   ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)(random.Next(0, int.MaxValue))",                                                                                                                                                         ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt32",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt32",                                                                                                                           ["Data"] = "(uint)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(uint)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int64",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int64",                                                                                                                            ["Data"] = "(long)2",  ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(long)(random.Next(0, int.MaxValue))",                                                                                                                                                        ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt64",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt64",                                                                                                                           ["Data"] = "(ulong)2", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ulong)(random.Next(0, int.MaxValue))",                                                                                                                                                       ["ValidateFirstResult"] = "(i == 1 ? result[i] != 2 : result[i] != firstOp[i])"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "129", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
+    ("InsertScalarTest.template",    new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse",  ["Method"] = "Insert",                        ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Single",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Single",                                                                                                                           ["Data"] = "(float)2", ["Imm"] = "192", ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(float)(random.NextDouble())",                                                                                                                                                                ["ValidateFirstResult"] = "(i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i]))"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Max",                           ["RetVectorType"] = "Vector128", ["RetBaseType"] = "Int32",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "Int32",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "Int32",                                                                                                       ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(int)(random.Next(int.MinValue, int.MaxValue))",       ["NextValueOp2"] = "(int)(random.Next(int.MinValue, int.MaxValue))",                                                                   ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                                                                                                                                                                           ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Max",                           ["RetVectorType"] = "Vector128", ["RetBaseType"] = "SByte",   ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "SByte",  ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "SByte",                                                                                                       ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(sbyte)(random.Next(sbyte.MinValue, sbyte.MaxValue))", ["NextValueOp2"] = "(sbyte)(random.Next(sbyte.MinValue, sbyte.MaxValue))",                                                             ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                                                                                                                                                                           ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
     ("SimpleBinOpTest.template",     new Dictionary<string, string> { ["Isa"] = "Sse41", ["LoadIsa"] = "Sse2", ["Method"] = "Max",                           ["RetVectorType"] = "Vector128", ["RetBaseType"] = "UInt16",  ["Op1VectorType"] ="Vector128", ["Op1BaseType"] = "UInt16", ["Op2VectorType"] = "Vector128", ["Op2BaseType"] = "UInt16",                                                                                                      ["LargestVectorSize"] = "16", ["NextValueOp1"] = "(ushort)(random.Next(0, ushort.MaxValue))",            ["NextValueOp2"] = "(ushort)(random.Next(0, ushort.MaxValue))",                                                                        ["ValidateFirstResult"] = "result[0] != Math.Max(left[0], right[0])",                                                                                                                                                                                                           ["ValidateRemainingResults"] = "result[i] != Math.Max(left[i], right[i])"}),
index d5e06087cd5c3061a91197deaf9e678ed494468e..de94118fcb9e8f5b9cc6a99fa4f3ba50eb79351d 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)(random.Next(0, byte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _clsVar), ref Unsafe.As<Byte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)(random.Next(0, byte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _fld), ref Unsafe.As<Byte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)(random.Next(0, byte.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<Byte, Byte>(_data, new Byte[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 80cbdb80d3886d670e4f324d90e74a62adc2ead0..6c4f2d59089b3bf705c9eb97ec2d1da37ac6535b 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)(random.Next(0, byte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _clsVar), ref Unsafe.As<Byte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)(random.Next(0, byte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _fld), ref Unsafe.As<Byte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (byte)(random.Next(0, byte.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<Byte, Byte>(_data, new Byte[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 6cb04a42fdd6699dcb6b302d19d8cc5785c78eba..a89626622c8cbb9c15adb03a7d8224aedad969b4 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar), ref Unsafe.As<Int32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld), ref Unsafe.As<Int32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<Int32, Int32>(_data, new Int32[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 000fab8ff6495cf7af5fb45558e67efec6b74248..c257663651c0770b5bbbedf8c7c5c0a48dd7436d 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar), ref Unsafe.As<Int32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld), ref Unsafe.As<Int32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (int)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<Int32, Int32>(_data, new Int32[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index bf125e848f471153a177fd546e6a1d42e039469e..c673ce63220cd9ce37499def88b469c5a4699cc1 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _clsVar), ref Unsafe.As<Int64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _fld), ref Unsafe.As<Int64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<Int64, Int64>(_data, new Int64[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 15a1495116b6df7e66d4c8e29e1f7e332ae8b9e7..6cd83adc408bc8d8861ebd1dbe4460476db0ea6e 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _clsVar), ref Unsafe.As<Int64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int64>, byte>(ref _fld), ref Unsafe.As<Int64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Int64>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (long)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<Int64, Int64>(_data, new Int64[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index f58c0f12c060905379d95472529b7801aee5c744..f0a4dcf47c67c4c132210aa84d6dfb77ced8f06e 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)(random.Next(0, sbyte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _clsVar), ref Unsafe.As<SByte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)(random.Next(0, sbyte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _fld), ref Unsafe.As<SByte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)(random.Next(0, sbyte.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<SByte, SByte>(_data, new SByte[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 168a59a00207326690e63114fd5b8cfe05050310..7bad860ba41ba0723fcd5c09029c3eab1979358e 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)(random.Next(0, sbyte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _clsVar), ref Unsafe.As<SByte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)(random.Next(0, sbyte.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _fld), ref Unsafe.As<SByte, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (sbyte)(random.Next(0, sbyte.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<SByte, SByte>(_data, new SByte[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 13d060d7b0ae38908e80ea760c04ab52d1aee75e..5ee20531daf891d6a00847add75be3113bf393b5 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
             _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((float)2) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits((float)0)))
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
                 {
                     Succeeded = false;
                     break;
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.1.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.1.cs
new file mode 100644 (file)
index 0000000..537fe66
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle1()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle1();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle1
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle1()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle1()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)1
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)1
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)1
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                1
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle1();
+            var result = Sse41.Insert(test._fld, (float)2, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 1);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.128.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.128.cs
new file mode 100644 (file)
index 0000000..0ca4931
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle128()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle128();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle128
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle128()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle128()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)128
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)128
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)128
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                128
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle128();
+            var result = Sse41.Insert(test._fld, (float)2, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 128);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.129.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.129.cs
new file mode 100644 (file)
index 0000000..8b30e9f
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle129()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle129();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle129
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle129()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle129()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)129
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)129
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)129
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                129
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle129();
+            var result = Sse41.Insert(test._fld, (float)2, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 129);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.16.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.16.cs
new file mode 100644 (file)
index 0000000..a491c10
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle16()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle16();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle16
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle16()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle16()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)16
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)16
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)16
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                16
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle16();
+            var result = Sse41.Insert(test._fld, (float)2, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 16);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.192.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.192.cs
new file mode 100644 (file)
index 0000000..f8908e3
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle192()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle192();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle192
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle192()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle192()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)192
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)192
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)192
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                192
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle192();
+            var result = Sse41.Insert(test._fld, (float)2, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 192);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.2.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.2.cs
new file mode 100644 (file)
index 0000000..44ead60
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle2()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle2();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle2
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle2()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle2()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)2
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)2
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)2
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle2();
+            var result = Sse41.Insert(test._fld, (float)2, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : i == 1 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.32.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.32.cs
new file mode 100644 (file)
index 0000000..6d647af
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle32()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle32();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle32
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle32()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle32()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)32
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)32
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)32
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                32
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle32();
+            var result = Sse41.Insert(test._fld, (float)2, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 32);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.4.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.4.cs
new file mode 100644 (file)
index 0000000..c47c9d2
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle4()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle4();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle4
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle4()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle4()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)4
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)4
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)4
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                4
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle4();
+            var result = Sse41.Insert(test._fld, (float)2, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 4);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : i == 2 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.48.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.48.cs
new file mode 100644 (file)
index 0000000..4a8eef9
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle48()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle48();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle48
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle48()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle48()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)48
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)48
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)48
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                48
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle48();
+            var result = Sse41.Insert(test._fld, (float)2, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 48);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.64.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.64.cs
new file mode 100644 (file)
index 0000000..feddaad
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle64()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle64();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle64
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle64()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle64()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)64
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)64
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)64
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                64
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle64();
+            var result = Sse41.Insert(test._fld, (float)2, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 64);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
diff --git a/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.8.cs b/src/coreclr/tests/src/JIT/HardwareIntrinsics/X86/Sse41/Insert.Single.8.cs
new file mode 100644 (file)
index 0000000..b81444c
--- /dev/null
@@ -0,0 +1,321 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+// See the LICENSE file in the project root for more information.
+
+/******************************************************************************
+ * This file is auto-generated from a template file by the GenerateTests.csx  *
+ * script in tests\src\JIT\HardwareIntrinsics\X86\Shared. In order to make    *
+ * changes, please update the corresponding template and run according to the *
+ * directions listed in the file.                                             *
+ ******************************************************************************/
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void InsertSingle8()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle8();
+            
+            try
+            {
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Sse.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local works
+                test.RunLclFldScenario();
+
+                // Validates passing an instance member works
+                test.RunFldScenario();
+            }
+            else
+            {
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+            }
+            catch (PlatformNotSupportedException)
+            {
+                test.Succeeded = true;
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleUnaryOpTest__InsertSingle8
+    {
+        private static readonly int LargestVectorSize = 16;
+
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Single>>() / sizeof(Single);
+
+        private static Single[] _data = new Single[Op1ElementCount];
+
+        private static Vector128<Single> _clsVar;
+
+        private Vector128<Single> _fld;
+
+        private SimpleUnaryOpTest__DataTable<Single, Single> _dataTable;
+
+        static SimpleUnaryOpTest__InsertSingle8()
+        {
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _clsVar), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+        }
+
+        public SimpleUnaryOpTest__InsertSingle8()
+        {
+            Succeeded = true;
+
+            var random = new Random();
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Single>, byte>(ref _fld), ref Unsafe.As<Single, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (float)(random.NextDouble()); }
+            _dataTable = new SimpleUnaryOpTest__DataTable<Single, Single>(_data, new Single[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => Sse41.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            var result = Sse41.Insert(
+                Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                (float)2,
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            var result = Sse41.Insert(
+                Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                (float)2,
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr),
+                                        (float)2,
+                                        (byte)8
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)8
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            var result = typeof(Sse41).GetMethod(nameof(Sse41.Insert), new Type[] { typeof(Vector128<Single>), typeof(Single), typeof(byte) })
+                                     .Invoke(null, new object[] {
+                                        Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr)),
+                                        (float)2,
+                                        (byte)8
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Single>)(result));
+            ValidateResult(_dataTable.inArrayPtr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            var result = Sse41.Insert(
+                _clsVar,
+                (float)2,
+                8
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            var firstOp = Unsafe.Read<Vector128<Single>>(_dataTable.inArrayPtr);
+            var result = Sse41.Insert(firstOp, (float)2, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            var firstOp = Sse.LoadVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            var firstOp = Sse.LoadAlignedVector128((Single*)(_dataTable.inArrayPtr));
+            var result = Sse41.Insert(firstOp, (float)2, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(firstOp, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclFldScenario()
+        {
+            var test = new SimpleUnaryOpTest__InsertSingle8();
+            var result = Sse41.Insert(test._fld, (float)2, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunFldScenario()
+        {
+            var result = Sse41.Insert(_fld, (float)2, 8);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld, _dataTable.outArrayPtr);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            Succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                Succeeded = true;
+            }
+        }
+
+        private void ValidateResult(Vector128<Single> firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), firstOp);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(void* firstOp, void* result, [CallerMemberName] string method = "")
+        {
+            Single[] inArray = new Single[Op1ElementCount];
+            Single[] outArray = new Single[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref inArray[0]), ref Unsafe.AsRef<byte>(firstOp), (uint)Unsafe.SizeOf<Vector128<Single>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Single, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Single>>());
+
+            ValidateResult(inArray, outArray, method);
+        }
+
+        private void ValidateResult(Single[] firstOp, Single[] result, [CallerMemberName] string method = "")
+        {
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if ((i == 0 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(2.0f) : i == 3 ? BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(0.0f) : BitConverter.SingleToInt32Bits(result[i]) != BitConverter.SingleToInt32Bits(firstOp[i])))
+                {
+                    Succeeded = false;
+                    break;
+                }
+            }
+
+            if (!Succeeded)
+            {
+                Console.WriteLine($"{nameof(Sse41)}.{nameof(Sse41.Insert)}<Single>(Vector128<Single><9>): {method} failed:");
+                Console.WriteLine($"  firstOp: ({string.Join(", ", firstOp)})");
+                Console.WriteLine($"   result: ({string.Join(", ", result)})");
+                Console.WriteLine();
+            }
+        }
+    }
+}
index b5814ef97616ab12d7f42ea648cd0ce92bff2e55..ab82f5f1bf08cd99802e2d7036d3d27b7cb36cc6 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _clsVar), ref Unsafe.As<UInt32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _fld), ref Unsafe.As<UInt32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<UInt32, UInt32>(_data, new UInt32[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 4109ba3e56f94bd4864e6d54b9df0e33d681c4a8..27177f67f28ed08c0e59cd7fa4f96e70377e2737 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _clsVar), ref Unsafe.As<UInt32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt32>, byte>(ref _fld), ref Unsafe.As<UInt32, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt32>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (uint)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<UInt32, UInt32>(_data, new UInt32[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index c923ee1ad727f5e4666cebf00ed381fdd6d08807..d4d9f998192381d8b591ce3007edbb44410f9e16 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _clsVar), ref Unsafe.As<UInt64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _fld), ref Unsafe.As<UInt64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<UInt64, UInt64>(_data, new UInt64[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 22bd2b58a9e42e95cf0e49696e446ca0771c82eb..62ce511405c80c0f68f50bcf3b5cce4c333a2000 100644 (file)
@@ -109,7 +109,7 @@ namespace JIT.HardwareIntrinsics.X86
         {
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _clsVar), ref Unsafe.As<UInt64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
         }
 
@@ -119,10 +119,10 @@ namespace JIT.HardwareIntrinsics.X86
 
             var random = new Random();
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)(random.Next(0, int.MaxValue)); }
             Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<UInt64>, byte>(ref _fld), ref Unsafe.As<UInt64, byte>(ref _data[0]), (uint)Unsafe.SizeOf<Vector128<UInt64>>());
 
-            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)0; }
+            for (var i = 0; i < Op1ElementCount; i++) { _data[i] = (ulong)(random.Next(0, int.MaxValue)); }
             _dataTable = new SimpleUnaryOpTest__DataTable<UInt64, UInt64>(_data, new UInt64[RetElementCount], LargestVectorSize);
         }
 
@@ -302,7 +302,7 @@ namespace JIT.HardwareIntrinsics.X86
 
             for (var i = 0; i < RetElementCount; i++)
             {
-                if ((i == 1 ? result[i] != 2 : result[i] != 0))
+                if ((i == 1 ? result[i] != 2 : result[i] != firstOp[i]))
                 {
                     Succeeded = false;
                     break;
index 7302624c09223e5007d98d9d1ccf02739d122d7e..195e1865b2b425fee472a93c04e7494bc9d6b8fa 100644 (file)
@@ -40,20 +40,30 @@ namespace JIT.HardwareIntrinsics.X86
                 ["Floor.Single"] = FloorSingle,
                 ["FloorScalar.Double"] = FloorScalarDouble,
                 ["FloorScalar.Single"] = FloorScalarSingle,
+                ["Insert.Single.0"] = InsertSingle0,
                 ["Insert.Byte.1"] = InsertByte1,
                 ["Insert.SByte.1"] = InsertSByte1,
                 ["Insert.Int32.1"] = InsertInt321,
                 ["Insert.UInt32.1"] = InsertUInt321,
                 ["Insert.Int64.1"] = InsertInt641,
                 ["Insert.UInt64.1"] = InsertUInt641,
-                ["Insert.Single.0"] = InsertSingle0,
+                ["Insert.Single.1"] = InsertSingle1,
+                ["Insert.Single.2"] = InsertSingle2,
+                ["Insert.Single.4"] = InsertSingle4,
+                ["Insert.Single.8"] = InsertSingle8,
+                ["Insert.Single.16"] = InsertSingle16,
+                ["Insert.Single.32"] = InsertSingle32,
+                ["Insert.Single.48"] = InsertSingle48,
+                ["Insert.Single.64"] = InsertSingle64,
+                ["Insert.Single.128"] = InsertSingle128,
                 ["Insert.Byte.129"] = InsertByte129,
                 ["Insert.SByte.129"] = InsertSByte129,
                 ["Insert.Int32.129"] = InsertInt32129,
                 ["Insert.UInt32.129"] = InsertUInt32129,
                 ["Insert.Int64.129"] = InsertInt64129,
                 ["Insert.UInt64.129"] = InsertUInt64129,
-                ["Insert.Single.217"] = InsertSingle217,
+                ["Insert.Single.129"] = InsertSingle129,
+                ["Insert.Single.192"] = InsertSingle192,
                 ["Max.Int32"] = MaxInt32,
                 ["Max.SByte"] = MaxSByte,
                 ["Max.UInt16"] = MaxUInt16,
index 12a6b0b10d22822f3a0238039b2cf8125da69ba9..e98e0c2acba7e067b2dc10120b22ea93ea8d0209 100644 (file)
     <Compile Include="Extract.Int64.129.cs" />
     <Compile Include="Extract.UInt64.129.cs" />
     <Compile Include="Extract.Single.129.cs" />
+    <Compile Include="Insert.Single.0.cs" />
     <Compile Include="Insert.Byte.1.cs" />
     <Compile Include="Insert.SByte.1.cs" />
     <Compile Include="Insert.Int32.1.cs" />
     <Compile Include="Insert.UInt32.1.cs" />
     <Compile Include="Insert.Int64.1.cs" />
     <Compile Include="Insert.UInt64.1.cs" />
-    <Compile Include="Insert.Single.0.cs" />
+    <Compile Include="Insert.Single.1.cs" />
+    <Compile Include="Insert.Single.2.cs" />
+    <Compile Include="Insert.Single.4.cs" />
+    <Compile Include="Insert.Single.8.cs" />
+    <Compile Include="Insert.Single.16.cs" />
+    <Compile Include="Insert.Single.32.cs" />
+    <Compile Include="Insert.Single.48.cs" />
+    <Compile Include="Insert.Single.64.cs" />
+    <Compile Include="Insert.Single.128.cs" />
     <Compile Include="Insert.Byte.129.cs" />
     <Compile Include="Insert.SByte.129.cs" />
     <Compile Include="Insert.Int32.129.cs" />
     <Compile Include="Insert.UInt32.129.cs" />
     <Compile Include="Insert.Int64.129.cs" />
     <Compile Include="Insert.UInt64.129.cs" />
-    <Compile Include="Insert.Single.217.cs" />
+    <Compile Include="Insert.Single.129.cs" />
+    <Compile Include="Insert.Single.192.cs" />
     <Compile Include="Program.Sse41.cs" />
     <Compile Include="..\Shared\BooleanUnOpTest_DataTable.cs" />
     <Compile Include="..\Shared\BooleanBinOpTest_DataTable.cs" />
index de6d56ed7e41538466ebf64eba2787cf4c964d4c..171f917b8625972c849a1c890562ab02e40872bc 100644 (file)
     <Compile Include="Extract.Int64.129.cs" />
     <Compile Include="Extract.UInt64.129.cs" />
     <Compile Include="Extract.Single.129.cs" />
+    <Compile Include="Insert.Single.0.cs" />
     <Compile Include="Insert.Byte.1.cs" />
     <Compile Include="Insert.SByte.1.cs" />
     <Compile Include="Insert.Int32.1.cs" />
     <Compile Include="Insert.UInt32.1.cs" />
     <Compile Include="Insert.Int64.1.cs" />
     <Compile Include="Insert.UInt64.1.cs" />
-    <Compile Include="Insert.Single.0.cs" />
+    <Compile Include="Insert.Single.1.cs" />
+    <Compile Include="Insert.Single.2.cs" />
+    <Compile Include="Insert.Single.4.cs" />
+    <Compile Include="Insert.Single.8.cs" />
+    <Compile Include="Insert.Single.16.cs" />
+    <Compile Include="Insert.Single.32.cs" />
+    <Compile Include="Insert.Single.48.cs" />
+    <Compile Include="Insert.Single.64.cs" />
+    <Compile Include="Insert.Single.128.cs" />
     <Compile Include="Insert.Byte.129.cs" />
     <Compile Include="Insert.SByte.129.cs" />
     <Compile Include="Insert.Int32.129.cs" />
     <Compile Include="Insert.UInt32.129.cs" />
     <Compile Include="Insert.Int64.129.cs" />
     <Compile Include="Insert.UInt64.129.cs" />
-    <Compile Include="Insert.Single.217.cs" />
+    <Compile Include="Insert.Single.129.cs" />
+    <Compile Include="Insert.Single.192.cs" />
     <Compile Include="Program.Sse41.cs" />
     <Compile Include="..\Shared\BooleanUnOpTest_DataTable.cs" />
     <Compile Include="..\Shared\BooleanBinOpTest_DataTable.cs" />