Add support for AvxVnni instructions under Experimental. (#51998)
authorweilinwa <weilin.wang@intel.com>
Wed, 2 Jun 2021 02:18:48 +0000 (19:18 -0700)
committerGitHub <noreply@github.com>
Wed, 2 Jun 2021 02:18:48 +0000 (19:18 -0700)
* Add support for AvxVnni instructions under Experimental.

* Add support for AvxVnni instructions

* Add preveiw feature attribute

* Handle operands in lsra

* Undo changes for Experimental

* Update JITEEVersionIdentifier and fix remaining issues

* Resolve Mono CI failure

* Disable tests

* Disable Vector128 tests

* Modify disable tests

Co-authored-by: Tanner Gooding <tagoo@outlook.com>
39 files changed:
src/coreclr/inc/corinfoinstructionset.h
src/coreclr/inc/jiteeversionguid.h
src/coreclr/inc/readytoruninstructionset.h
src/coreclr/jit/compiler.cpp
src/coreclr/jit/emitxarch.cpp
src/coreclr/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/jit/hwintrinsiclistxarch.h
src/coreclr/jit/hwintrinsicxarch.cpp
src/coreclr/jit/instrsxarch.h
src/coreclr/jit/jitconfigvalues.h
src/coreclr/jit/lowerxarch.cpp
src/coreclr/jit/lsraxarch.cpp
src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSet.cs
src/coreclr/tools/Common/Internal/Runtime/ReadyToRunInstructionSetHelper.cs
src/coreclr/tools/Common/JitInterface/CorInfoInstructionSet.cs
src/coreclr/tools/Common/JitInterface/ThunkGenerator/InstructionSetDesc.txt
src/coreclr/vm/codeman.cpp
src/libraries/System.Private.CoreLib/src/ILLink/ILLink.Substitutions.NoX86Intrinsics.xml
src/libraries/System.Private.CoreLib/src/System.Private.CoreLib.Shared.projitems
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs [new file with mode: 0644]
src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs [new file with mode: 0644]
src/libraries/System.Runtime.Intrinsics/ref/System.Runtime.Intrinsics.cs
src/mono/mono/mini/simd-intrinsics.c
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs [new file with mode: 0644]
src/tests/JIT/HardwareIntrinsics/X86/Shared/Program.cs
src/tests/issues.targets

index 5b7ec3f..39003cb 100644 (file)
@@ -56,22 +56,24 @@ enum CORINFO_InstructionSet
     InstructionSet_POPCNT=16,
     InstructionSet_Vector128=17,
     InstructionSet_Vector256=18,
-    InstructionSet_X86Base_X64=19,
-    InstructionSet_SSE_X64=20,
-    InstructionSet_SSE2_X64=21,
-    InstructionSet_SSE3_X64=22,
-    InstructionSet_SSSE3_X64=23,
-    InstructionSet_SSE41_X64=24,
-    InstructionSet_SSE42_X64=25,
-    InstructionSet_AVX_X64=26,
-    InstructionSet_AVX2_X64=27,
-    InstructionSet_AES_X64=28,
-    InstructionSet_BMI1_X64=29,
-    InstructionSet_BMI2_X64=30,
-    InstructionSet_FMA_X64=31,
-    InstructionSet_LZCNT_X64=32,
-    InstructionSet_PCLMULQDQ_X64=33,
-    InstructionSet_POPCNT_X64=34,
+    InstructionSet_AVXVNNI=19,
+    InstructionSet_X86Base_X64=20,
+    InstructionSet_SSE_X64=21,
+    InstructionSet_SSE2_X64=22,
+    InstructionSet_SSE3_X64=23,
+    InstructionSet_SSSE3_X64=24,
+    InstructionSet_SSE41_X64=25,
+    InstructionSet_SSE42_X64=26,
+    InstructionSet_AVX_X64=27,
+    InstructionSet_AVX2_X64=28,
+    InstructionSet_AES_X64=29,
+    InstructionSet_BMI1_X64=30,
+    InstructionSet_BMI2_X64=31,
+    InstructionSet_FMA_X64=32,
+    InstructionSet_LZCNT_X64=33,
+    InstructionSet_PCLMULQDQ_X64=34,
+    InstructionSet_POPCNT_X64=35,
+    InstructionSet_AVXVNNI_X64=36,
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
     InstructionSet_X86Base=1,
@@ -92,22 +94,24 @@ enum CORINFO_InstructionSet
     InstructionSet_POPCNT=16,
     InstructionSet_Vector128=17,
     InstructionSet_Vector256=18,
-    InstructionSet_X86Base_X64=19,
-    InstructionSet_SSE_X64=20,
-    InstructionSet_SSE2_X64=21,
-    InstructionSet_SSE3_X64=22,
-    InstructionSet_SSSE3_X64=23,
-    InstructionSet_SSE41_X64=24,
-    InstructionSet_SSE42_X64=25,
-    InstructionSet_AVX_X64=26,
-    InstructionSet_AVX2_X64=27,
-    InstructionSet_AES_X64=28,
-    InstructionSet_BMI1_X64=29,
-    InstructionSet_BMI2_X64=30,
-    InstructionSet_FMA_X64=31,
-    InstructionSet_LZCNT_X64=32,
-    InstructionSet_PCLMULQDQ_X64=33,
-    InstructionSet_POPCNT_X64=34,
+    InstructionSet_AVXVNNI=19,
+    InstructionSet_X86Base_X64=20,
+    InstructionSet_SSE_X64=21,
+    InstructionSet_SSE2_X64=22,
+    InstructionSet_SSE3_X64=23,
+    InstructionSet_SSSE3_X64=24,
+    InstructionSet_SSE41_X64=25,
+    InstructionSet_SSE42_X64=26,
+    InstructionSet_AVX_X64=27,
+    InstructionSet_AVX2_X64=28,
+    InstructionSet_AES_X64=29,
+    InstructionSet_BMI1_X64=30,
+    InstructionSet_BMI2_X64=31,
+    InstructionSet_FMA_X64=32,
+    InstructionSet_LZCNT_X64=33,
+    InstructionSet_PCLMULQDQ_X64=34,
+    InstructionSet_POPCNT_X64=35,
+    InstructionSet_AVXVNNI_X64=36,
 #endif // TARGET_X86
 
 };
@@ -205,6 +209,8 @@ public:
             AddInstructionSet(InstructionSet_PCLMULQDQ_X64);
         if (HasInstructionSet(InstructionSet_POPCNT))
             AddInstructionSet(InstructionSet_POPCNT_X64);
+        if (HasInstructionSet(InstructionSet_AVXVNNI))
+            AddInstructionSet(InstructionSet_AVXVNNI_X64);
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
 #endif // TARGET_X86
@@ -342,6 +348,10 @@ inline CORINFO_InstructionSetFlags EnsureInstructionSetFlagsAreValid(CORINFO_Ins
             resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
         if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT))
             resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64);
+        if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64))
+            resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
+        if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI))
+            resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64);
         if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
             resultflags.RemoveInstructionSet(InstructionSet_SSE);
         if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
@@ -530,6 +540,10 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "Vector128";
         case InstructionSet_Vector256 :
             return "Vector256";
+        case InstructionSet_AVXVNNI :
+            return "AVXVNNI";
+        case InstructionSet_AVXVNNI_X64 :
+            return "AVXVNNI_X64";
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
         case InstructionSet_X86Base :
@@ -568,6 +582,8 @@ inline const char *InstructionSetToString(CORINFO_InstructionSet instructionSet)
             return "Vector128";
         case InstructionSet_Vector256 :
             return "Vector256";
+        case InstructionSet_AVXVNNI :
+            return "AVXVNNI";
 #endif // TARGET_X86
 
         default:
@@ -615,6 +631,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
         case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
         case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
+        case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
 #endif // TARGET_AMD64
 #ifdef TARGET_X86
         case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
@@ -633,6 +650,7 @@ inline CORINFO_InstructionSet InstructionSetFromR2RInstructionSet(ReadyToRunInst
         case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
         case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
         case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
+        case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
 #endif // TARGET_X86
 
         default:
index 548fcf5..25746b4 100644 (file)
@@ -43,12 +43,12 @@ typedef const GUID *LPCGUID;
 #define GUID_DEFINED
 #endif // !GUID_DEFINED
 
-constexpr GUID JITEEVersionIdentifier = { /* 81a5e384-8ca5-4947-8b2e-1d76556728fd */
-    0x81a5e384,
-    0x8ca5,
-    0x4947,
-    {0x8b, 0x2e, 0x1d, 0x76, 0x55, 0x67, 0x28, 0xfd}
-};
+constexpr GUID JITEEVersionIdentifier = { /* 1052f490-cad7-4610-99bb-6f2bd91a1d19 */
+    0x1052f490,
+    0xcad7,
+    0x4610,
+    {0x99, 0xbb, 0x6f, 0x2b, 0xd9, 0x1a, 0x1d, 0x19}
+  };
 
 //////////////////////////////////////////////////////////////////////////////////////////////////////////
 //
index 9a4d0ba..1b66c6e 100644 (file)
@@ -33,6 +33,7 @@ enum ReadyToRunInstructionSet
     READYTORUN_INSTRUCTION_X86Base=22,
     READYTORUN_INSTRUCTION_Dp=23,
     READYTORUN_INSTRUCTION_Rdm=24,
+    READYTORUN_INSTRUCTION_AvxVnni=25,
 
 };
 
index 48bc487..5f086e1 100644 (file)
@@ -2419,6 +2419,11 @@ void Compiler::compSetProcessor()
         instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX2);
     }
 
+    if (!JitConfig.EnableAVXVNNI())
+    {
+        instructionSetFlags.RemoveInstructionSet(InstructionSet_AVXVNNI);
+    }
+
     if (!JitConfig.EnableLZCNT())
     {
         instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT);
index c22d52b..da7febe 100644 (file)
@@ -44,6 +44,11 @@ bool IsFMAInstruction(instruction ins)
     return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
 }
 
+bool IsAVXVNNIInstruction(instruction ins)
+{
+    return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION);
+}
+
 bool IsBMIInstruction(instruction ins)
 {
     return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
@@ -6314,7 +6319,7 @@ void emitter::emitIns_SIMD_R_R_S_I(
 void emitter::emitIns_SIMD_R_R_R_A(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
 {
-    assert(IsFMAInstruction(ins));
+    assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
     assert(UseVEXEncoding());
 
     // Ensure we aren't overwriting op2
@@ -6395,7 +6400,7 @@ void emitter::emitIns_SIMD_R_R_R_C(instruction          ins,
 void emitter::emitIns_SIMD_R_R_R_R(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
 {
-    if (IsFMAInstruction(ins))
+    if (IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins))
     {
         assert(UseVEXEncoding());
 
@@ -6463,7 +6468,7 @@ void emitter::emitIns_SIMD_R_R_R_R(
 void emitter::emitIns_SIMD_R_R_R_S(
     instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
 {
-    assert(IsFMAInstruction(ins));
+    assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
     assert(UseVEXEncoding());
 
     // Ensure we aren't overwriting op2
@@ -15633,6 +15638,10 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
         case INS_vfnmsub132ss:
         case INS_vfnmsub213ss:
         case INS_vfnmsub231ss:
+        case INS_vpdpbusd:  // will be populated when the HW becomes publicly available
+        case INS_vpdpwssd:  // will be populated when the HW becomes publicly available
+        case INS_vpdpbusds: // will be populated when the HW becomes publicly available
+        case INS_vpdpwssds: // will be populated when the HW becomes publicly available
             // uops.info
             result.insThroughput = PERFSCORE_THROUGHPUT_2X;
             result.insLatency += PERFSCORE_LATENCY_4C;
index be6e577..8448dbd 100644 (file)
@@ -324,7 +324,16 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                             genHWIntrinsic_R_R_RM_R(node, ins);
                             break;
                         }
+                        case NI_AVXVNNI_MultiplyWideningAndAdd:
+                        case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
+                        {
+                            assert(targetReg != REG_NA);
+                            assert(op1Reg != REG_NA);
+                            assert(op2Reg != REG_NA);
 
+                            genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3);
+                            break;
+                        }
                         default:
                         {
                             unreached();
index 82d9a43..eb9bac1 100644 (file)
@@ -567,7 +567,13 @@ HARDWARE_INTRINSIC(AVX2,            SubtractSaturate,
 HARDWARE_INTRINSIC(AVX2,            UnpackHigh,                                 32,              2,     {INS_punpckhbw,         INS_punpckhbw,          INS_punpckhwd,          INS_punpckhwd,          INS_punpckhdq,          INS_punpckhdq,          INS_punpckhqdq,         INS_punpckhqdq,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            UnpackLow,                                  32,              2,     {INS_punpcklbw,         INS_punpcklbw,          INS_punpcklwd,          INS_punpcklwd,          INS_punpckldq,          INS_punpckldq,          INS_punpcklqdq,         INS_punpcklqdq,         INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            Xor,                                        32,              2,     {INS_pxor,              INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_pxor,               INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_Commutative)
-
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
+//                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+//  AVXVNNI Intrinsics
+HARDWARE_INTRINSIC(AVXVNNI,         MultiplyWideningAndAdd,                     -1,              3,     {INS_invalid,           INS_vpdpbusd,            INS_vpdpwssd,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVXVNNI,         MultiplyWideningAndAddSaturate,             -1,              3,     {INS_invalid,           INS_vpdpbusds,           INS_vpdpwssds,         INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 ISA              Function name                               SIMD size       NumArg                                                                                                         Instructions                                                                                                                             Category                            Flags
 //                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
index 0155e41..6e81227 100644 (file)
@@ -36,6 +36,8 @@ static CORINFO_InstructionSet X64VersionOfIsa(CORINFO_InstructionSet isa)
             return InstructionSet_AVX_X64;
         case InstructionSet_AVX2:
             return InstructionSet_AVX2_X64;
+        case InstructionSet_AVXVNNI:
+            return InstructionSet_AVXVNNI_X64;
         case InstructionSet_AES:
             return InstructionSet_AES_X64;
         case InstructionSet_BMI1:
@@ -80,6 +82,10 @@ static CORINFO_InstructionSet lookupInstructionSet(const char* className)
         {
             return InstructionSet_AVX2;
         }
+        if (strcmp(className, "AvxVnni") == 0)
+        {
+            return InstructionSet_AVXVNNI;
+        }
     }
     else if (className[0] == 'S')
     {
@@ -348,6 +354,8 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(CORINFO_InstructionSet isa)
         case InstructionSet_AVX_X64:
         case InstructionSet_AVX2:
         case InstructionSet_AVX2_X64:
+        case InstructionSet_AVXVNNI:
+        case InstructionSet_AVXVNNI_X64:
         case InstructionSet_BMI1:
         case InstructionSet_BMI1_X64:
         case InstructionSet_BMI2:
index 25e5de7..750f1b2 100644 (file)
@@ -583,6 +583,13 @@ INST3(vfnmsub213ss,     "fnmsub213ss",      IUM_WR, BAD_CODE,     BAD_CODE,
 INST3(vfnmsub231ss,     "fnmsub231ss",      IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xBF),                             INS_Flags_IsDstDstSrcAVXInstruction)    //
 INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
 
+INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(vpdpbusd,          "pdpbusd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x50),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Multiply and Add Unsigned and Signed Bytes
+INST3(vpdpwssd,          "pdpwssd",         IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x52),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Multiply and Add Signed Word Integers
+INST3(vpdpbusds,         "pdpbusds",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x51),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Multiply and Add Unsigned and Signed Bytes with Saturation
+INST3(vpdpwssds,         "pdpwssds",        IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0x53),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Multiply and Add Signed Word Integers with Saturation
+INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+
 // BMI1
 INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
 INST3(andn,             "andn",             IUM_WR, BAD_CODE,     BAD_CODE,     SSE38(0xF2),                             INS_Flags_IsDstDstSrcAVXInstruction)    // Logical AND NOT
index 8037e33..5d1cd06 100644 (file)
@@ -279,6 +279,7 @@ CONFIG_INTEGER(EnableSSE41, W("EnableSSE41"), 1)             // Enable SSE41
 CONFIG_INTEGER(EnableSSE42, W("EnableSSE42"), 1)             // Enable SSE42
 CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1)                 // Enable AVX
 CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1)               // Enable AVX2
+CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1)         // Enable AVXVNNI
 CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1)                 // Enable FMA
 CONFIG_INTEGER(EnableAES, W("EnableAES"), 1)                 // Enable AES
 CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1)               // Enable BMI1
index 4acfd81..4d42530 100644 (file)
@@ -6347,7 +6347,19 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
                                 }
                                 break;
                             }
-
+                            case NI_AVXVNNI_MultiplyWideningAndAdd:
+                            case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
+                            {
+                                if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
+                                {
+                                    MakeSrcContained(node, op3);
+                                }
+                                else if (supportsRegOptional)
+                                {
+                                    op3->SetRegOptional();
+                                }
+                                break;
+                            }
                             case NI_BMI2_MultiplyNoFlags:
                             case NI_BMI2_X64_MultiplyNoFlags:
                             {
index 5c76005..1cd8112 100644 (file)
@@ -2373,6 +2373,20 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
                 break;
             }
 
+            case NI_AVXVNNI_MultiplyWideningAndAdd:
+            case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
+            {
+                assert(numArgs == 3);
+
+                tgtPrefUse = BuildUse(op1);
+                srcCount += 1;
+                srcCount += BuildDelayFreeUses(op2, op1);
+                srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);
+
+                buildUses = false;
+                break;
+            }
+
             case NI_AVX2_GatherVector128:
             case NI_AVX2_GatherVector256:
             {
index 5a1093e..ea4aa13 100644 (file)
@@ -36,6 +36,7 @@ namespace Internal.ReadyToRunConstants
         X86Base=22,
         Dp=23,
         Rdm=24,
+        AvxVnni=25,
 
     }
 }
index 7b816cd..ffc302a 100644 (file)
@@ -86,6 +86,8 @@ namespace Internal.ReadyToRunConstants
                             case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt;
                             case InstructionSet.X64_Vector128: return null;
                             case InstructionSet.X64_Vector256: return null;
+                            case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
+                            case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni;
 
                             default: throw new Exception("Unknown instruction set");
                         }
@@ -129,6 +131,8 @@ namespace Internal.ReadyToRunConstants
                             case InstructionSet.X86_POPCNT_X64: return null;
                             case InstructionSet.X86_Vector128: return null;
                             case InstructionSet.X86_Vector256: return null;
+                            case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
+                            case InstructionSet.X86_AVXVNNI_X64: return null;
 
                             default: throw new Exception("Unknown instruction set");
                         }
index 8a8111d..f1ffa0a 100644 (file)
@@ -55,22 +55,24 @@ namespace Internal.JitInterface
         X64_POPCNT=16,
         X64_Vector128=17,
         X64_Vector256=18,
-        X64_X86Base_X64=19,
-        X64_SSE_X64=20,
-        X64_SSE2_X64=21,
-        X64_SSE3_X64=22,
-        X64_SSSE3_X64=23,
-        X64_SSE41_X64=24,
-        X64_SSE42_X64=25,
-        X64_AVX_X64=26,
-        X64_AVX2_X64=27,
-        X64_AES_X64=28,
-        X64_BMI1_X64=29,
-        X64_BMI2_X64=30,
-        X64_FMA_X64=31,
-        X64_LZCNT_X64=32,
-        X64_PCLMULQDQ_X64=33,
-        X64_POPCNT_X64=34,
+        X64_AVXVNNI=19,
+        X64_X86Base_X64=20,
+        X64_SSE_X64=21,
+        X64_SSE2_X64=22,
+        X64_SSE3_X64=23,
+        X64_SSSE3_X64=24,
+        X64_SSE41_X64=25,
+        X64_SSE42_X64=26,
+        X64_AVX_X64=27,
+        X64_AVX2_X64=28,
+        X64_AES_X64=29,
+        X64_BMI1_X64=30,
+        X64_BMI2_X64=31,
+        X64_FMA_X64=32,
+        X64_LZCNT_X64=33,
+        X64_PCLMULQDQ_X64=34,
+        X64_POPCNT_X64=35,
+        X64_AVXVNNI_X64=36,
         X86_X86Base=1,
         X86_SSE=2,
         X86_SSE2=3,
@@ -89,22 +91,24 @@ namespace Internal.JitInterface
         X86_POPCNT=16,
         X86_Vector128=17,
         X86_Vector256=18,
-        X86_X86Base_X64=19,
-        X86_SSE_X64=20,
-        X86_SSE2_X64=21,
-        X86_SSE3_X64=22,
-        X86_SSSE3_X64=23,
-        X86_SSE41_X64=24,
-        X86_SSE42_X64=25,
-        X86_AVX_X64=26,
-        X86_AVX2_X64=27,
-        X86_AES_X64=28,
-        X86_BMI1_X64=29,
-        X86_BMI2_X64=30,
-        X86_FMA_X64=31,
-        X86_LZCNT_X64=32,
-        X86_PCLMULQDQ_X64=33,
-        X86_POPCNT_X64=34,
+        X86_AVXVNNI=19,
+        X86_X86Base_X64=20,
+        X86_SSE_X64=21,
+        X86_SSE2_X64=22,
+        X86_SSE3_X64=23,
+        X86_SSSE3_X64=24,
+        X86_SSE41_X64=25,
+        X86_SSE42_X64=26,
+        X86_AVX_X64=27,
+        X86_AVX2_X64=28,
+        X86_AES_X64=29,
+        X86_BMI1_X64=30,
+        X86_BMI2_X64=31,
+        X86_FMA_X64=32,
+        X86_LZCNT_X64=33,
+        X86_PCLMULQDQ_X64=34,
+        X86_POPCNT_X64=35,
+        X86_AVXVNNI_X64=36,
 
     }
 
@@ -298,6 +302,10 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.X64_POPCNT_X64);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
                         resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
@@ -449,6 +457,8 @@ namespace Internal.JitInterface
                         resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
                         resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
+                    if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
+                        resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
                         resultflags.AddInstructionSet(InstructionSet.X64_SSE);
                     if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
@@ -574,6 +584,7 @@ namespace Internal.JitInterface
                     yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true);
                     yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false);
                     yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false);
+                    yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true);
                     break;
 
                 case TargetArchitecture.X86:
@@ -595,6 +606,7 @@ namespace Internal.JitInterface
                     yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true);
                     yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false);
                     yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false);
+                    yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true);
                     break;
 
             }
@@ -657,6 +669,8 @@ namespace Internal.JitInterface
                         AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
                     if (HasInstructionSet(InstructionSet.X64_POPCNT))
                         AddInstructionSet(InstructionSet.X64_POPCNT_X64);
+                    if (HasInstructionSet(InstructionSet.X64_AVXVNNI))
+                        AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
                     break;
 
                 case TargetArchitecture.X86:
@@ -698,6 +712,7 @@ namespace Internal.JitInterface
                     AddInstructionSet(InstructionSet.X64_LZCNT_X64);
                     AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
                     AddInstructionSet(InstructionSet.X64_POPCNT_X64);
+                    AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
                     break;
 
                 case TargetArchitecture.X86:
@@ -717,6 +732,7 @@ namespace Internal.JitInterface
                     AddInstructionSet(InstructionSet.X86_LZCNT_X64);
                     AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64);
                     AddInstructionSet(InstructionSet.X86_POPCNT_X64);
+                    AddInstructionSet(InstructionSet.X86_AVXVNNI_X64);
                     break;
 
             }
index 6e64e7e..8bfdc9a 100644 (file)
@@ -40,6 +40,7 @@ instructionset     ,X86   ,Pclmulqdq ,        ,14 ,PCLMULQDQ,pclmul
 instructionset     ,X86   ,Popcnt    ,        ,15 ,POPCNT   ,popcnt
 instructionset     ,X86   ,          ,        ,   ,Vector128,
 instructionset     ,X86   ,          ,        ,   ,Vector256,
+instructionset     ,X86   ,AvxVnni   ,        ,25 ,AVXVNNI  ,avxvnni
 
 instructionset64bit,X86   ,X86Base
 instructionset64bit,X86   ,SSE
@@ -57,6 +58,7 @@ instructionset64bit,X86   ,FMA
 instructionset64bit,X86   ,LZCNT
 instructionset64bit,X86   ,PCLMULQDQ
 instructionset64bit,X86   ,POPCNT
+instructionset64bit,X86   ,AVXVNNI
 
 implication        ,X86   ,SSE       ,X86Base
 implication        ,X86   ,SSE2      ,SSE
index 1898a79..592ff2e 100644 (file)
@@ -1308,6 +1308,9 @@ void EEJitManager::SetCpuInfo()
     //   CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
     //      CORJIT_FLAG_USE_AVX
     //      AVX2      - EBX bit 5
+    //   CORJIT_FLAG_USE_AVXVNNI if the following feature bit is set (input EAX of 0x07 and input ECX of 1):
+    //      CORJIT_FLAG_USE_AVX2
+    //      AVXVNNI   - EAX bit 4
     //   CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without
     //   CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
     //      BMI1 - EBX bit 3
@@ -1385,6 +1388,12 @@ void EEJitManager::SetCpuInfo()
                                         if ((cpuidInfo[EBX] & (1 << 5)) != 0)                               // AVX2
                                         {
                                             CPUCompileFlags.Set(InstructionSet_AVX2);
+
+                                            __cpuidex(cpuidInfo, 0x00000007, 0x00000001);
+                                            if ((cpuidInfo[EAX] & (1 << 4)) != 0)                           // AVX-VNNI
+                                            {
+                                                CPUCompileFlags.Set(InstructionSet_AVXVNNI);
+                                            }
                                         }
                                     }
                                 }
index df1020f..59d0783 100644 (file)
     <type fullname="System.Runtime.Intrinsics.X86.Avx2/X64">
       <method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
     </type>
+    <type fullname="System.Runtime.Intrinsics.X86.AvxVnni">
+      <method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
+    </type>
+    <type fullname="System.Runtime.Intrinsics.X86.AvxVnni/X64">
+      <method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
+    </type>
     <type fullname="System.Runtime.Intrinsics.X86.Bmi1">
       <method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
     </type>
index bbbd804..c899b63 100644 (file)
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Aes.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx2.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnni.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi1.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi2.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Fma.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Aes.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx2.PlatformNotSupported.cs" />
+    <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnni.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi1.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi2.PlatformNotSupported.cs" />
     <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Fma.PlatformNotSupported.cs" />
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.PlatformNotSupported.cs
new file mode 100644 (file)
index 0000000..2edfd97
--- /dev/null
@@ -0,0 +1,72 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Versioning;
+
+namespace System.Runtime.Intrinsics.X86
+{
+    [CLSCompliant(false)]
+    [RequiresPreviewFeatures]
+    public abstract class AvxVnni : Avx2
+    {
+        internal AvxVnni() { }
+
+        public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+        public new abstract class X64 : Avx2.X64
+        {
+            internal X64() { }
+
+            public static new bool IsSupported { [Intrinsic] get { return false; } }
+        }
+
+        /// <summary>
+        /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPBUSD xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPWSSD xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPBUSD ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPWSSD ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPBUSDS xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPWSSDS xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPBUSDS ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+        /// <summary>
+        /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPWSSDS ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
+    }
+}
diff --git a/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs b/src/libraries/System.Private.CoreLib/src/System/Runtime/Intrinsics/X86/AvxVnni.cs
new file mode 100644 (file)
index 0000000..d6bb750
--- /dev/null
@@ -0,0 +1,74 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Versioning;
+
+namespace System.Runtime.Intrinsics.X86
+{
+    [Intrinsic]
+    [CLSCompliant(false)]
+    [RequiresPreviewFeatures]
+    public abstract class AvxVnni : Avx2
+    {
+        internal AvxVnni() { }
+
+        public static new bool IsSupported { get => IsSupported; }
+
+        [Intrinsic]
+        public new abstract class X64 : Avx2.X64
+        {
+            internal X64() { }
+
+            public static new bool IsSupported { get => IsSupported; }
+        }
+
+        /// <summary>
+        /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPBUSD xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        /// <summary>
+        /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPWSSD xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<short> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        /// <summary>
+        /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPBUSD ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        /// <summary>
+        /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPWSSD ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<short> left, Vector256<short> right) => MultiplyWideningAndAdd(addend, left, right);
+
+        /// <summary>
+        /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPBUSDS xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        /// <summary>
+        /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b)
+        /// VPDPWSSDS xmm, xmm, xmm/m128
+        /// </summary>
+        public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<short> left, Vector128<short> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        /// <summary>
+        /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPBUSDS ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+        /// <summary>
+        /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b)
+        /// VPDPWSSDS ymm, ymm, ymm/m256
+        /// </summary>
+        public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<short> left, Vector256<short> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+    }
+}
index 6cf453f..6ad7fc3 100644 (file)
@@ -3,6 +3,7 @@
 // ------------------------------------------------------------------------------
 // Changes to this file must follow the https://aka.ms/api-review process.
 // ------------------------------------------------------------------------------
+using System.Runtime.Versioning;
 
 namespace System.Runtime.Intrinsics
 {
@@ -3368,6 +3369,28 @@ namespace System.Runtime.Intrinsics.X86
             public static new bool IsSupported { get { throw null; } }
         }
     }
+
+    [System.CLSCompliantAttribute(false)]
+    [RequiresPreviewFeatures]
+    public abstract class AvxVnni : System.Runtime.Intrinsics.X86.Avx2
+    {
+        internal AvxVnni() { }
+        public static new bool IsSupported { get { throw null; } }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<byte> left, System.Runtime.Intrinsics.Vector256<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<short> left, System.Runtime.Intrinsics.Vector256<short> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<byte> left, System.Runtime.Intrinsics.Vector256<sbyte> right) { throw null; }
+        public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<short> left, System.Runtime.Intrinsics.Vector256<short> right) { throw null; }
+        public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64
+        {
+            internal X64() { }
+            public static new bool IsSupported { get { throw null; } }
+        }
+    }
+
     [System.CLSCompliantAttribute(false)]
     public abstract partial class Bmi1 : System.Runtime.Intrinsics.X86.X86Base
     {
index 53df6cb..40df691 100644 (file)
@@ -2082,6 +2082,7 @@ static const IntrinGroup supported_x86_intrinsics [] = {
        { "Aes", MONO_CPU_X86_AES, aes_methods, sizeof (aes_methods) },
        { "Avx", MONO_CPU_X86_AVX, unsupported, sizeof (unsupported) },
        { "Avx2", MONO_CPU_X86_AVX2, unsupported, sizeof (unsupported) },
+       { "AvxVnni", 0, unsupported, sizeof (unsupported) },
        { "Bmi1", MONO_CPU_X86_BMI1, bmi1_methods, sizeof (bmi1_methods) },
        { "Bmi2", MONO_CPU_X86_BMI2, bmi2_methods, sizeof (bmi2_methods) },
        { "Fma", MONO_CPU_X86_FMA, unsupported, sizeof (unsupported) },
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Byte.cs
new file mode 100644 (file)
index 0000000..0a38e01
--- /dev/null
@@ -0,0 +1,501 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddByte()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();  //TODO: this one does not work. Fix it.
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alighment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector256<Int32> _fld0;
+            public Vector256<Byte> _fld1;
+            public Vector256<SByte> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Byte>>() / sizeof(Byte);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<SByte>>() / sizeof(SByte);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Byte[] _data1 = new Byte[Op1ElementCount];
+        private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+        private static Vector256<Int32> _clsVar0;
+        private static Vector256<Byte> _clsVar1;
+        private static Vector256<SByte> _clsVar2;
+
+        private Vector256<Int32> _fld0;
+        private Vector256<Byte> _fld1;
+        private Vector256<SByte> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)));
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+            
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector256<Int32> addend, Vector256<Byte> left, Vector256<SByte> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2])
+                                                    + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue);
+            }
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd.Int16.cs
new file mode 100644 (file)
index 0000000..4907c18
--- /dev/null
@@ -0,0 +1,500 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddInt16()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();  //TODO: this one does not work. Fix it.
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alignment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector256<Int32> _fld0;
+            public Vector256<Int16> _fld1;
+            public Vector256<Int16> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Int16[] _data1 = new Int16[Op1ElementCount];
+        private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+        private static Vector256<Int32> _clsVar0;
+        private static Vector256<Int16> _clsVar1;
+        private static Vector256<Int16> _clsVar2;
+
+        private Vector256<Int32> _fld0;
+        private Vector256<Int16> _fld1;
+        private Vector256<Int16> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)));
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+            
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector256<Int32> addend, Vector256<Int16> left, Vector256<Int16> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue);
+            }
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Byte.cs
new file mode 100644 (file)
index 0000000..2e22107
--- /dev/null
@@ -0,0 +1,504 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddSaturateByte()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alighment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector256<Int32> _fld0;
+            public Vector256<Byte> _fld1;
+            public Vector256<SByte> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Byte>>() / sizeof(Byte);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<SByte>>() / sizeof(SByte);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Byte[] _data1 = new Byte[Op1ElementCount];
+        private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+        private static Vector256<Int32> _clsVar0;
+        private static Vector256<Byte> _clsVar1;
+        private static Vector256<SByte> _clsVar2;
+
+        private Vector256<Int32> _fld0;
+        private Vector256<Byte> _fld1;
+        private Vector256<SByte> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)));
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+            
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector256<Int32> addend, Vector256<Byte> left, Vector256<SByte> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4];
+                int value = addend[i] + addend2;
+                int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+                int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp;
+                outArray[i] = c;
+            }
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAddSaturate.Int16.cs
new file mode 100644 (file)
index 0000000..3c755d8
--- /dev/null
@@ -0,0 +1,505 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.ComponentModel;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddSaturateInt16()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();  //TODO: this one does not work. Fix it.
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alignment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector256<Int32> _fld0;
+            public Vector256<Int16> _fld1;
+            public Vector256<Int16> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Int16[] _data1 = new Int16[Op1ElementCount];
+        private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+        private static Vector256<Int32> _clsVar0;
+        private static Vector256<Int16> _clsVar1;
+        private static Vector256<Int16> _clsVar2;
+
+        private Vector256<Int32> _fld0;
+        private Vector256<Int16> _fld1;
+        private Vector256<Int16> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)));
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+            
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector256<Int32> addend, Vector256<Int16> left, Vector256<Int16> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2];
+                int value = addend[i] + addend2;
+                int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+                int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue: tmp;
+                outArray[i] = c;
+            }
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_r.csproj
new file mode 100644 (file)
index 0000000..721cd42
--- /dev/null
@@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- It takes a long time to complete (on a non-AVX machine) -->
+    <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+    <!-- https://github.com/dotnet/runtime/issues/12392 -->
+    <GCStressIncompatible>true</GCStressIncompatible>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+    <Compile Include="Program.AvxVnni.cs" />
+    <Compile Include="..\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/MultiplyWideningAndAdd_ro.csproj
new file mode 100644 (file)
index 0000000..58b4945
--- /dev/null
@@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- It takes a long time to complete (on a non-AVX machine) -->
+    <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+    <!-- https://github.com/dotnet/runtime/issues/12392 -->
+    <GCStressIncompatible>true</GCStressIncompatible>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize>
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+    <Compile Include="Program.AvxVnni.cs" />
+    <Compile Include="..\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni/Program.AvxVnni.cs
new file mode 100644 (file)
index 0000000..ff68d04
--- /dev/null
@@ -0,0 +1,21 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        static Program()
+        {
+            TestList = new Dictionary<string, Action>() {
+                ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte,
+                ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16,
+                ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte,
+                ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16,
+            };
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Byte.cs
new file mode 100644 (file)
index 0000000..fe05ed0
--- /dev/null
@@ -0,0 +1,515 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddByte()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();  //TODO: this one does not work. Fix it.
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alighment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector128<Int32> _fld0;
+            public Vector128<Byte> _fld1;
+            public Vector128<SByte> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Byte>>() / sizeof(Byte);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<SByte>>() / sizeof(SByte);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Byte[] _data1 = new Byte[Op1ElementCount];
+        private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+        private static Vector128<Int32> _clsVar0;
+        private static Vector128<Byte> _clsVar1;
+        private static Vector128<SByte> _clsVar2;
+
+        private Vector128<Int32> _fld0;
+        private Vector128<Byte> _fld1;
+        private Vector128<SByte> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result1 = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result1);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector128<Int32> addend, Vector128<Byte> left, Vector128<SByte> right, Vector128<Int32> result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), result);
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+        private void ValidateResult(Vector128<Int32> addend, Vector128<Byte> left, Vector128<SByte> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2])
+                                                    + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue);
+            }
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd.Int16.cs
new file mode 100644 (file)
index 0000000..8adf4e5
--- /dev/null
@@ -0,0 +1,500 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddInt16()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alighment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector128<Int32> _fld0;
+            public Vector128<Int16> _fld1;
+            public Vector128<Int16> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Int16[] _data1 = new Int16[Op1ElementCount];
+        private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+        private static Vector128<Int32> _clsVar0;
+        private static Vector128<Int16> _clsVar1;
+        private static Vector128<Int16> _clsVar2;
+
+        private Vector128<Int32> _fld0;
+        private Vector128<Int16> _fld1;
+        private Vector128<Int16> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+            );
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+            
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector128<Int32> addend, Vector128<Int16> left, Vector128<Int16> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue);
+            }
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Byte.cs
new file mode 100644 (file)
index 0000000..6b003ef
--- /dev/null
@@ -0,0 +1,503 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddSaturateByte()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();  //TODO: this one does not work. Fix it.
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alighment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector128<Int32> _fld0;
+            public Vector128<Byte> _fld1;
+            public Vector128<SByte> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Byte>>() / sizeof(Byte);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<SByte>>() / sizeof(SByte);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Byte[] _data1 = new Byte[Op1ElementCount];
+        private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+        private static Vector128<Int32> _clsVar0;
+        private static Vector128<Byte> _clsVar1;
+        private static Vector128<SByte> _clsVar2;
+
+        private Vector128<Int32> _fld0;
+        private Vector128<Byte> _fld1;
+        private Vector128<SByte> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector128<Int32> addend, Vector128<Byte> left, Vector128<SByte> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Byte[] inArray1 = new Byte[Op1ElementCount];
+            SByte[] inArray2 = new SByte[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4];
+                int value = addend[i] + addend2;
+                int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+                int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp;
+                outArray[i] = c;
+            }
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAddSaturate.Int16.cs
new file mode 100644 (file)
index 0000000..a96951f
--- /dev/null
@@ -0,0 +1,503 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        private static void MultiplyWideningAndAddSaturateInt16()
+        {
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+
+            if (test.IsSupported)
+            {
+                // Validates basic functionality works, using Unsafe.Read
+                test.RunBasicScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates basic functionality works, using Load
+                    test.RunBasicScenario_Load();
+
+                    // Validates basic functionality works, using LoadAligned
+                    test.RunBasicScenario_LoadAligned();
+                }
+
+                else
+                {
+                    Console.WriteLine("Avx Is Not Supported");
+                }
+
+                // Validates calling via reflection works, using Unsafe.Read
+                test.RunReflectionScenario_UnsafeRead();  //TODO: this one does not work. Fix it.
+
+                if (Avx.IsSupported)
+                {
+                    // Validates calling via reflection works, using Load
+                    test.RunReflectionScenario_Load();
+
+                    // Validates calling via reflection works, using LoadAligned
+                    test.RunReflectionScenario_LoadAligned();
+                }
+
+                // Validates passing a static member works
+                test.RunClsVarScenario();
+
+                // Validates passing a local works, using Unsafe.Read
+                test.RunLclVarScenario_UnsafeRead();
+
+                if (Avx.IsSupported)
+                {
+                    // Validates passing a local works, using Load
+                    test.RunLclVarScenario_Load();
+
+                    // Validates passing a local works, using LoadAligned
+                    test.RunLclVarScenario_LoadAligned();
+                }
+
+                // Validates passing the field of a local class works
+                test.RunClassLclFldScenario();
+
+                // Validates passing an instance member of a class works
+                test.RunClassFldScenario();
+
+                // Validates passing the field of a local struct works
+                test.RunStructLclFldScenario();
+
+                // Validates passing an instance member of a struct works
+                test.RunStructFldScenario();
+            }
+            else
+            {
+                Console.WriteLine("Test Is Not Supported");
+                // Validates we throw on unsupported hardware
+                test.RunUnsupportedScenario();
+            }
+
+            if (!test.Succeeded)
+            {
+                throw new Exception("One or more scenarios did not complete as expected.");
+            }
+        }
+    }
+
+    public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16
+    {
+        private struct DataTable
+        {
+            private byte[] inArray0;
+            private byte[] inArray1;
+            private byte[] inArray2;
+            private byte[] outArray;
+
+            private GCHandle inHandle0;
+            private GCHandle inHandle1;
+            private GCHandle inHandle2;
+            private GCHandle outHandle;
+
+            private ulong alignment;
+
+            public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+            {
+                int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+                int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+                int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+                if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)        
+                {
+                    throw new ArgumentException("Invalid value of alighment");
+                }
+
+                this.inArray0 = new byte[alignment * 2];
+                this.inArray1 = new byte[alignment * 2];
+                this.inArray2 = new byte[alignment * 2];
+                this.outArray = new byte[alignment * 2];
+
+                this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+                this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+                this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+                this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+                this.alignment = (ulong)alignment;
+
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+                Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+            }
+
+            public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+            public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+            public void Dispose()
+            {
+                inHandle0.Free();
+                inHandle1.Free();
+                inHandle2.Free();
+                outHandle.Free();
+            }
+
+            private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+            {
+                return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+            }
+        }
+        private struct TestStruct
+        {
+            public Vector128<Int32> _fld0;
+            public Vector128<Int16> _fld1;
+            public Vector128<Int16> _fld2;
+
+            public static TestStruct Create()
+            {
+                var testStruct = new TestStruct();
+
+                for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+                for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+                for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+                Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+                return testStruct;
+            }
+
+            public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass)
+            {
+                var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+                Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+                testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+            }
+        }
+
+        private static readonly int LargestVectorSize = 32;
+
+        private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+        private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+        private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+        private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+        private static Int32[] _data0 = new Int32[Op0ElementCount];
+        private static Int16[] _data1 = new Int16[Op1ElementCount];
+        private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+        private static Vector128<Int32> _clsVar0;
+        private static Vector128<Int16> _clsVar1;
+        private static Vector128<Int16> _clsVar2;
+
+        private Vector128<Int32> _fld0;
+        private Vector128<Int16> _fld1;
+        private Vector128<Int16> _fld2;
+
+        private DataTable _dataTable;
+
+        static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+        {
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+        }
+
+        public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+        {
+            Succeeded = true;
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+            for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+            for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+            for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+            _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+        }
+
+        public bool IsSupported => AvxVnni.IsSupported;
+
+        public bool Succeeded { get; set; }
+
+        public void RunBasicScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+                Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunBasicScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+            );
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+            
+        }
+
+        public void RunBasicScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+                Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+                                        Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+                                        Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunReflectionScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+            var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+                                     .Invoke(null, new object[] {
+                                        Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+                                        Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+                                        Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+                                     });
+
+            Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+            ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+        }
+
+        public void RunClsVarScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(
+                _clsVar0,
+                _clsVar1,
+                _clsVar2
+            );
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_UnsafeRead()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+            var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+            var second = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr);
+            var third = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr);
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_Load()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+            var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunLclVarScenario_LoadAligned()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+            var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+            var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr));
+            var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr));
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(first, second, third, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+            var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunClassFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructLclFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+            var test = TestStruct.Create();
+            var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+            Unsafe.Write(_dataTable.outArrayPtr, result);
+            ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+        }
+
+        public void RunStructFldScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+            var test = TestStruct.Create();
+            test.RunStructFldScenario(this);
+        }
+
+        public void RunUnsupportedScenario()
+        {
+            TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+            bool succeeded = false;
+
+            try
+            {
+                RunBasicScenario_UnsafeRead();
+            }
+            catch (PlatformNotSupportedException)
+            {
+                succeeded = true;
+            }
+
+            if (!succeeded)
+            {
+                Succeeded = false;
+            }
+        }
+
+        private void ValidateResult(Vector128<Int32> addend, Vector128<Int16> left, Vector128<Int16> right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+            Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+        {
+            Int32[] inArray0 = new Int32[Op0ElementCount];
+            Int16[] inArray1 = new Int16[Op1ElementCount];
+            Int16[] inArray2 = new Int16[Op2ElementCount];
+            Int32[] outArray = new Int32[RetElementCount];
+
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+            Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+            ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+        }
+
+        private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+        {
+            bool succeeded = true;
+
+            Int32[] outArray = new Int32[RetElementCount];
+
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2];
+                int value = addend[i] + addend2;
+                int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+                int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp;
+                outArray[i] = c;
+            }
+            for (var i = 0; i < RetElementCount; i++)
+            {
+                if (result[i] != outArray[i])
+                {
+                    succeeded = false;
+                    break;
+                }
+            }
+
+            if (!succeeded)
+            {
+                TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+                TestLibrary.TestFramework.LogInformation($"  addend: ({string.Join(", ", addend)})");
+                TestLibrary.TestFramework.LogInformation($"  left: ({string.Join(", ", left)})");
+                TestLibrary.TestFramework.LogInformation($"  right: ({string.Join(", ", right)})");
+                TestLibrary.TestFramework.LogInformation($"  result: ({string.Join(", ", result)})");
+                TestLibrary.TestFramework.LogInformation($"  valid: ({string.Join(", ", outArray)})");
+                TestLibrary.TestFramework.LogInformation(string.Empty);
+
+                Succeeded = false;
+            }
+        }
+    }
+}
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_r.csproj
new file mode 100644 (file)
index 0000000..8274558
--- /dev/null
@@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- It takes a long time to complete (on a non-AVX machine) -->
+    <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+    <!-- https://github.com/dotnet/runtime/issues/12392 -->
+    <GCStressIncompatible>true</GCStressIncompatible>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize />
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+    <Compile Include="Program.AvxVnni_Vector128.cs" />
+    <Compile Include="..\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/MultiplyWideningAndAdd_ro.csproj
new file mode 100644 (file)
index 0000000..669831c
--- /dev/null
@@ -0,0 +1,22 @@
+<Project Sdk="Microsoft.NET.Sdk">
+  <PropertyGroup>
+    <OutputType>Exe</OutputType>
+    <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+    <!-- It takes a long time to complete (on a non-AVX machine) -->
+    <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+    <!-- https://github.com/dotnet/runtime/issues/12392 -->
+    <GCStressIncompatible>true</GCStressIncompatible>
+  </PropertyGroup>
+  <PropertyGroup>
+    <DebugType>Embedded</DebugType>
+    <Optimize>True</Optimize> 
+  </PropertyGroup>
+  <ItemGroup>
+    <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+    <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+    <Compile Include="Program.AvxVnni_Vector128.cs" />
+    <Compile Include="..\Shared\Program.cs" />
+  </ItemGroup>
+</Project>
diff --git a/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs b/src/tests/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/Program.AvxVnni_Vector128.cs
new file mode 100644 (file)
index 0000000..ff68d04
--- /dev/null
@@ -0,0 +1,21 @@
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+    public static partial class Program
+    {
+        static Program()
+        {
+            TestList = new Dictionary<string, Action>() {
+                ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte,
+                ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16,
+                ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte,
+                ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16,
+            };
+        }
+    }
+}
index 884eff7..1d77268 100644 (file)
@@ -72,6 +72,7 @@ namespace JIT.HardwareIntrinsics.X86
             TestLibrary.TestFramework.LogInformation($"  AES:       {Aes.IsSupported}");
             TestLibrary.TestFramework.LogInformation($"  AVX:       {Avx.IsSupported}");
             TestLibrary.TestFramework.LogInformation($"  AVX2:      {Avx2.IsSupported}");
+            TestLibrary.TestFramework.LogInformation($"  AVXVNNI:   {AvxVnni.IsSupported}");
             TestLibrary.TestFramework.LogInformation($"  BMI1:      {Bmi1.IsSupported}");
             TestLibrary.TestFramework.LogInformation($"  BMI2:      {Bmi2.IsSupported}");
             TestLibrary.TestFramework.LogInformation($"  FMA:       {Fma.IsSupported}");
index f1d2ba2..f1cdc5d 100644 (file)
         <ExcludeList Include="$(XunitTestBinBase)/JIT/opt/Devirtualization/Comparer_get_Default/*">
             <Issue>https://github.com/dotnet/runtime/issues/48190</Issue>
         </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/AvxVnni/**">
+            <Issue>Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078</Issue>
+        </ExcludeList>
+        <ExcludeList Include="$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/**">
+            <Issue>Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078</Issue>
+        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/DynamicPgo/**">
             <Issue>Mono doesn't have a dynamic pgo or tiered compilation infrastructure</Issue>
         </ExcludeList>