* Add support for AvxVnni instructions under Experimental.
* Add support for AvxVnni instructions
* Add preveiw feature attribute
* Handle operands in lsra
* Undo changes for Experimental
* Update JITEEVersionIdentifier and fix remaining issues
* Resolve Mono CI failure
* Disable tests
* Disable Vector128 tests
* Modify disable tests
Co-authored-by: Tanner Gooding <tagoo@outlook.com>
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
- InstructionSet_X86Base_X64=19,
- InstructionSet_SSE_X64=20,
- InstructionSet_SSE2_X64=21,
- InstructionSet_SSE3_X64=22,
- InstructionSet_SSSE3_X64=23,
- InstructionSet_SSE41_X64=24,
- InstructionSet_SSE42_X64=25,
- InstructionSet_AVX_X64=26,
- InstructionSet_AVX2_X64=27,
- InstructionSet_AES_X64=28,
- InstructionSet_BMI1_X64=29,
- InstructionSet_BMI2_X64=30,
- InstructionSet_FMA_X64=31,
- InstructionSet_LZCNT_X64=32,
- InstructionSet_PCLMULQDQ_X64=33,
- InstructionSet_POPCNT_X64=34,
+ InstructionSet_AVXVNNI=19,
+ InstructionSet_X86Base_X64=20,
+ InstructionSet_SSE_X64=21,
+ InstructionSet_SSE2_X64=22,
+ InstructionSet_SSE3_X64=23,
+ InstructionSet_SSSE3_X64=24,
+ InstructionSet_SSE41_X64=25,
+ InstructionSet_SSE42_X64=26,
+ InstructionSet_AVX_X64=27,
+ InstructionSet_AVX2_X64=28,
+ InstructionSet_AES_X64=29,
+ InstructionSet_BMI1_X64=30,
+ InstructionSet_BMI2_X64=31,
+ InstructionSet_FMA_X64=32,
+ InstructionSet_LZCNT_X64=33,
+ InstructionSet_PCLMULQDQ_X64=34,
+ InstructionSet_POPCNT_X64=35,
+ InstructionSet_AVXVNNI_X64=36,
#endif // TARGET_AMD64
#ifdef TARGET_X86
InstructionSet_X86Base=1,
InstructionSet_POPCNT=16,
InstructionSet_Vector128=17,
InstructionSet_Vector256=18,
- InstructionSet_X86Base_X64=19,
- InstructionSet_SSE_X64=20,
- InstructionSet_SSE2_X64=21,
- InstructionSet_SSE3_X64=22,
- InstructionSet_SSSE3_X64=23,
- InstructionSet_SSE41_X64=24,
- InstructionSet_SSE42_X64=25,
- InstructionSet_AVX_X64=26,
- InstructionSet_AVX2_X64=27,
- InstructionSet_AES_X64=28,
- InstructionSet_BMI1_X64=29,
- InstructionSet_BMI2_X64=30,
- InstructionSet_FMA_X64=31,
- InstructionSet_LZCNT_X64=32,
- InstructionSet_PCLMULQDQ_X64=33,
- InstructionSet_POPCNT_X64=34,
+ InstructionSet_AVXVNNI=19,
+ InstructionSet_X86Base_X64=20,
+ InstructionSet_SSE_X64=21,
+ InstructionSet_SSE2_X64=22,
+ InstructionSet_SSE3_X64=23,
+ InstructionSet_SSSE3_X64=24,
+ InstructionSet_SSE41_X64=25,
+ InstructionSet_SSE42_X64=26,
+ InstructionSet_AVX_X64=27,
+ InstructionSet_AVX2_X64=28,
+ InstructionSet_AES_X64=29,
+ InstructionSet_BMI1_X64=30,
+ InstructionSet_BMI2_X64=31,
+ InstructionSet_FMA_X64=32,
+ InstructionSet_LZCNT_X64=33,
+ InstructionSet_PCLMULQDQ_X64=34,
+ InstructionSet_POPCNT_X64=35,
+ InstructionSet_AVXVNNI_X64=36,
#endif // TARGET_X86
};
AddInstructionSet(InstructionSet_PCLMULQDQ_X64);
if (HasInstructionSet(InstructionSet_POPCNT))
AddInstructionSet(InstructionSet_POPCNT_X64);
+ if (HasInstructionSet(InstructionSet_AVXVNNI))
+ AddInstructionSet(InstructionSet_AVXVNNI_X64);
#endif // TARGET_AMD64
#ifdef TARGET_X86
#endif // TARGET_X86
resultflags.RemoveInstructionSet(InstructionSet_POPCNT);
if (resultflags.HasInstructionSet(InstructionSet_POPCNT_X64) && !resultflags.HasInstructionSet(InstructionSet_POPCNT))
resultflags.RemoveInstructionSet(InstructionSet_POPCNT_X64);
+ if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64))
+ resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI);
+ if (resultflags.HasInstructionSet(InstructionSet_AVXVNNI_X64) && !resultflags.HasInstructionSet(InstructionSet_AVXVNNI))
+ resultflags.RemoveInstructionSet(InstructionSet_AVXVNNI_X64);
if (resultflags.HasInstructionSet(InstructionSet_SSE) && !resultflags.HasInstructionSet(InstructionSet_X86Base))
resultflags.RemoveInstructionSet(InstructionSet_SSE);
if (resultflags.HasInstructionSet(InstructionSet_SSE2) && !resultflags.HasInstructionSet(InstructionSet_SSE))
return "Vector128";
case InstructionSet_Vector256 :
return "Vector256";
+ case InstructionSet_AVXVNNI :
+ return "AVXVNNI";
+ case InstructionSet_AVXVNNI_X64 :
+ return "AVXVNNI_X64";
#endif // TARGET_AMD64
#ifdef TARGET_X86
case InstructionSet_X86Base :
return "Vector128";
case InstructionSet_Vector256 :
return "Vector256";
+ case InstructionSet_AVXVNNI :
+ return "AVXVNNI";
#endif // TARGET_X86
default:
case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
+ case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
#endif // TARGET_AMD64
#ifdef TARGET_X86
case READYTORUN_INSTRUCTION_X86Base: return InstructionSet_X86Base;
case READYTORUN_INSTRUCTION_Lzcnt: return InstructionSet_LZCNT;
case READYTORUN_INSTRUCTION_Pclmulqdq: return InstructionSet_PCLMULQDQ;
case READYTORUN_INSTRUCTION_Popcnt: return InstructionSet_POPCNT;
+ case READYTORUN_INSTRUCTION_AvxVnni: return InstructionSet_AVXVNNI;
#endif // TARGET_X86
default:
#define GUID_DEFINED
#endif // !GUID_DEFINED
-constexpr GUID JITEEVersionIdentifier = { /* 81a5e384-8ca5-4947-8b2e-1d76556728fd */
- 0x81a5e384,
- 0x8ca5,
- 0x4947,
- {0x8b, 0x2e, 0x1d, 0x76, 0x55, 0x67, 0x28, 0xfd}
-};
+constexpr GUID JITEEVersionIdentifier = { /* 1052f490-cad7-4610-99bb-6f2bd91a1d19 */
+ 0x1052f490,
+ 0xcad7,
+ 0x4610,
+ {0x99, 0xbb, 0x6f, 0x2b, 0xd9, 0x1a, 0x1d, 0x19}
+ };
//////////////////////////////////////////////////////////////////////////////////////////////////////////
//
READYTORUN_INSTRUCTION_X86Base=22,
READYTORUN_INSTRUCTION_Dp=23,
READYTORUN_INSTRUCTION_Rdm=24,
+ READYTORUN_INSTRUCTION_AvxVnni=25,
};
instructionSetFlags.RemoveInstructionSet(InstructionSet_AVX2);
}
+ if (!JitConfig.EnableAVXVNNI())
+ {
+ instructionSetFlags.RemoveInstructionSet(InstructionSet_AVXVNNI);
+ }
+
if (!JitConfig.EnableLZCNT())
{
instructionSetFlags.RemoveInstructionSet(InstructionSet_LZCNT);
return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
}
+bool IsAVXVNNIInstruction(instruction ins)
+{
+ return (ins >= INS_FIRST_AVXVNNI_INSTRUCTION) && (ins <= INS_LAST_AVXVNNI_INSTRUCTION);
+}
+
bool IsBMIInstruction(instruction ins)
{
return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
void emitter::emitIns_SIMD_R_R_R_A(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, GenTreeIndir* indir)
{
- assert(IsFMAInstruction(ins));
+ assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseVEXEncoding());
// Ensure we aren't overwriting op2
void emitter::emitIns_SIMD_R_R_R_R(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, regNumber op3Reg)
{
- if (IsFMAInstruction(ins))
+ if (IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins))
{
assert(UseVEXEncoding());
void emitter::emitIns_SIMD_R_R_R_S(
instruction ins, emitAttr attr, regNumber targetReg, regNumber op1Reg, regNumber op2Reg, int varx, int offs)
{
- assert(IsFMAInstruction(ins));
+ assert(IsFMAInstruction(ins) || IsAVXVNNIInstruction(ins));
assert(UseVEXEncoding());
// Ensure we aren't overwriting op2
case INS_vfnmsub132ss:
case INS_vfnmsub213ss:
case INS_vfnmsub231ss:
+ case INS_vpdpbusd: // will be populated when the HW becomes publicly available
+ case INS_vpdpwssd: // will be populated when the HW becomes publicly available
+ case INS_vpdpbusds: // will be populated when the HW becomes publicly available
+ case INS_vpdpwssds: // will be populated when the HW becomes publicly available
// uops.info
result.insThroughput = PERFSCORE_THROUGHPUT_2X;
result.insLatency += PERFSCORE_LATENCY_4C;
genHWIntrinsic_R_R_RM_R(node, ins);
break;
}
+ case NI_AVXVNNI_MultiplyWideningAndAdd:
+ case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
+ {
+ assert(targetReg != REG_NA);
+ assert(op1Reg != REG_NA);
+ assert(op2Reg != REG_NA);
+ genHWIntrinsic_R_R_R_RM(ins, simdSize, targetReg, op1Reg, op2Reg, op3);
+ break;
+ }
default:
{
unreached();
HARDWARE_INTRINSIC(AVX2, UnpackHigh, 32, 2, {INS_punpckhbw, INS_punpckhbw, INS_punpckhwd, INS_punpckhwd, INS_punpckhdq, INS_punpckhdq, INS_punpckhqdq, INS_punpckhqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, UnpackLow, 32, 2, {INS_punpcklbw, INS_punpcklbw, INS_punpcklwd, INS_punpcklwd, INS_punpckldq, INS_punpckldq, INS_punpcklqdq, INS_punpcklqdq, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, Xor, 32, 2, {INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_pxor, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
-
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// ISA Function name SIMD size NumArg Instructions Category Flags
+// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
+// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
+// AVXVNNI Intrinsics
+HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAdd, -1, 3, {INS_invalid, INS_vpdpbusd, INS_vpdpwssd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
+HARDWARE_INTRINSIC(AVXVNNI, MultiplyWideningAndAddSaturate, -1, 3, {INS_invalid, INS_vpdpbusds, INS_vpdpwssds, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFloatingPointUsed|HW_Flag_BaseTypeFromSecondArg)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// ISA Function name SIMD size NumArg Instructions Category Flags
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
return InstructionSet_AVX_X64;
case InstructionSet_AVX2:
return InstructionSet_AVX2_X64;
+ case InstructionSet_AVXVNNI:
+ return InstructionSet_AVXVNNI_X64;
case InstructionSet_AES:
return InstructionSet_AES_X64;
case InstructionSet_BMI1:
{
return InstructionSet_AVX2;
}
+ if (strcmp(className, "AvxVnni") == 0)
+ {
+ return InstructionSet_AVXVNNI;
+ }
}
else if (className[0] == 'S')
{
case InstructionSet_AVX_X64:
case InstructionSet_AVX2:
case InstructionSet_AVX2_X64:
+ case InstructionSet_AVXVNNI:
+ case InstructionSet_AVXVNNI_X64:
case InstructionSet_BMI1:
case InstructionSet_BMI1_X64:
case InstructionSet_BMI2:
INST3(vfnmsub231ss, "fnmsub231ss", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xBF), INS_Flags_IsDstDstSrcAVXInstruction) //
INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(FIRST_AVXVNNI_INSTRUCTION, "FIRST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+INST3(vpdpbusd, "pdpbusd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x50), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes
+INST3(vpdpwssd, "pdpwssd", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x52), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers
+INST3(vpdpbusds, "pdpbusds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x51), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Unsigned and Signed Bytes with Saturation
+INST3(vpdpwssds, "pdpwssds", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0x53), INS_Flags_IsDstDstSrcAVXInstruction) // Multiply and Add Signed Word Integers with Saturation
+INST3(LAST_AVXVNNI_INSTRUCTION, "LAST_AVXVNNI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
+
// BMI1
INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
INST3(andn, "andn", IUM_WR, BAD_CODE, BAD_CODE, SSE38(0xF2), INS_Flags_IsDstDstSrcAVXInstruction) // Logical AND NOT
CONFIG_INTEGER(EnableSSE42, W("EnableSSE42"), 1) // Enable SSE42
CONFIG_INTEGER(EnableAVX, W("EnableAVX"), 1) // Enable AVX
CONFIG_INTEGER(EnableAVX2, W("EnableAVX2"), 1) // Enable AVX2
+CONFIG_INTEGER(EnableAVXVNNI, W("EnableAVXVNNI"), 1) // Enable AVXVNNI
CONFIG_INTEGER(EnableFMA, W("EnableFMA"), 1) // Enable FMA
CONFIG_INTEGER(EnableAES, W("EnableAES"), 1) // Enable AES
CONFIG_INTEGER(EnableBMI1, W("EnableBMI1"), 1) // Enable BMI1
}
break;
}
-
+ case NI_AVXVNNI_MultiplyWideningAndAdd:
+ case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
+ {
+ if (IsContainableHWIntrinsicOp(node, op3, &supportsRegOptional))
+ {
+ MakeSrcContained(node, op3);
+ }
+ else if (supportsRegOptional)
+ {
+ op3->SetRegOptional();
+ }
+ break;
+ }
case NI_BMI2_MultiplyNoFlags:
case NI_BMI2_X64_MultiplyNoFlags:
{
break;
}
+ case NI_AVXVNNI_MultiplyWideningAndAdd:
+ case NI_AVXVNNI_MultiplyWideningAndAddSaturate:
+ {
+ assert(numArgs == 3);
+
+ tgtPrefUse = BuildUse(op1);
+ srcCount += 1;
+ srcCount += BuildDelayFreeUses(op2, op1);
+ srcCount += op3->isContained() ? BuildOperandUses(op3) : BuildDelayFreeUses(op3, op1);
+
+ buildUses = false;
+ break;
+ }
+
case NI_AVX2_GatherVector128:
case NI_AVX2_GatherVector256:
{
X86Base=22,
Dp=23,
Rdm=24,
+ AvxVnni=25,
}
}
case InstructionSet.X64_POPCNT_X64: return ReadyToRunInstructionSet.Popcnt;
case InstructionSet.X64_Vector128: return null;
case InstructionSet.X64_Vector256: return null;
+ case InstructionSet.X64_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
+ case InstructionSet.X64_AVXVNNI_X64: return ReadyToRunInstructionSet.AvxVnni;
default: throw new Exception("Unknown instruction set");
}
case InstructionSet.X86_POPCNT_X64: return null;
case InstructionSet.X86_Vector128: return null;
case InstructionSet.X86_Vector256: return null;
+ case InstructionSet.X86_AVXVNNI: return ReadyToRunInstructionSet.AvxVnni;
+ case InstructionSet.X86_AVXVNNI_X64: return null;
default: throw new Exception("Unknown instruction set");
}
X64_POPCNT=16,
X64_Vector128=17,
X64_Vector256=18,
- X64_X86Base_X64=19,
- X64_SSE_X64=20,
- X64_SSE2_X64=21,
- X64_SSE3_X64=22,
- X64_SSSE3_X64=23,
- X64_SSE41_X64=24,
- X64_SSE42_X64=25,
- X64_AVX_X64=26,
- X64_AVX2_X64=27,
- X64_AES_X64=28,
- X64_BMI1_X64=29,
- X64_BMI2_X64=30,
- X64_FMA_X64=31,
- X64_LZCNT_X64=32,
- X64_PCLMULQDQ_X64=33,
- X64_POPCNT_X64=34,
+ X64_AVXVNNI=19,
+ X64_X86Base_X64=20,
+ X64_SSE_X64=21,
+ X64_SSE2_X64=22,
+ X64_SSE3_X64=23,
+ X64_SSSE3_X64=24,
+ X64_SSE41_X64=25,
+ X64_SSE42_X64=26,
+ X64_AVX_X64=27,
+ X64_AVX2_X64=28,
+ X64_AES_X64=29,
+ X64_BMI1_X64=30,
+ X64_BMI2_X64=31,
+ X64_FMA_X64=32,
+ X64_LZCNT_X64=33,
+ X64_PCLMULQDQ_X64=34,
+ X64_POPCNT_X64=35,
+ X64_AVXVNNI_X64=36,
X86_X86Base=1,
X86_SSE=2,
X86_SSE2=3,
X86_POPCNT=16,
X86_Vector128=17,
X86_Vector256=18,
- X86_X86Base_X64=19,
- X86_SSE_X64=20,
- X86_SSE2_X64=21,
- X86_SSE3_X64=22,
- X86_SSSE3_X64=23,
- X86_SSE41_X64=24,
- X86_SSE42_X64=25,
- X86_AVX_X64=26,
- X86_AVX2_X64=27,
- X86_AES_X64=28,
- X86_BMI1_X64=29,
- X86_BMI2_X64=30,
- X86_FMA_X64=31,
- X86_LZCNT_X64=32,
- X86_PCLMULQDQ_X64=33,
- X86_POPCNT_X64=34,
+ X86_AVXVNNI=19,
+ X86_X86Base_X64=20,
+ X86_SSE_X64=21,
+ X86_SSE2_X64=22,
+ X86_SSE3_X64=23,
+ X86_SSSE3_X64=24,
+ X86_SSE41_X64=25,
+ X86_SSE42_X64=26,
+ X86_AVX_X64=27,
+ X86_AVX2_X64=28,
+ X86_AES_X64=29,
+ X86_BMI1_X64=30,
+ X86_BMI2_X64=31,
+ X86_FMA_X64=32,
+ X86_LZCNT_X64=33,
+ X86_PCLMULQDQ_X64=34,
+ X86_POPCNT_X64=35,
+ X86_AVXVNNI_X64=36,
}
resultflags.AddInstructionSet(InstructionSet.X64_POPCNT_X64);
if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI))
+ resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
+ resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
resultflags.AddInstructionSet(InstructionSet.X64_X86Base);
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE2))
resultflags.AddInstructionSet(InstructionSet.X64_PCLMULQDQ);
if (resultflags.HasInstructionSet(InstructionSet.X64_POPCNT_X64))
resultflags.AddInstructionSet(InstructionSet.X64_POPCNT);
+ if (resultflags.HasInstructionSet(InstructionSet.X64_AVXVNNI_X64))
+ resultflags.AddInstructionSet(InstructionSet.X64_AVXVNNI);
if (resultflags.HasInstructionSet(InstructionSet.X64_X86Base))
resultflags.AddInstructionSet(InstructionSet.X64_SSE);
if (resultflags.HasInstructionSet(InstructionSet.X64_SSE))
yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X64_POPCNT, true);
yield return new InstructionSetInfo("Vector128", "", InstructionSet.X64_Vector128, false);
yield return new InstructionSetInfo("Vector256", "", InstructionSet.X64_Vector256, false);
+ yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X64_AVXVNNI, true);
break;
case TargetArchitecture.X86:
yield return new InstructionSetInfo("popcnt", "Popcnt", InstructionSet.X86_POPCNT, true);
yield return new InstructionSetInfo("Vector128", "", InstructionSet.X86_Vector128, false);
yield return new InstructionSetInfo("Vector256", "", InstructionSet.X86_Vector256, false);
+ yield return new InstructionSetInfo("avxvnni", "AvxVnni", InstructionSet.X86_AVXVNNI, true);
break;
}
AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
if (HasInstructionSet(InstructionSet.X64_POPCNT))
AddInstructionSet(InstructionSet.X64_POPCNT_X64);
+ if (HasInstructionSet(InstructionSet.X64_AVXVNNI))
+ AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
break;
case TargetArchitecture.X86:
AddInstructionSet(InstructionSet.X64_LZCNT_X64);
AddInstructionSet(InstructionSet.X64_PCLMULQDQ_X64);
AddInstructionSet(InstructionSet.X64_POPCNT_X64);
+ AddInstructionSet(InstructionSet.X64_AVXVNNI_X64);
break;
case TargetArchitecture.X86:
AddInstructionSet(InstructionSet.X86_LZCNT_X64);
AddInstructionSet(InstructionSet.X86_PCLMULQDQ_X64);
AddInstructionSet(InstructionSet.X86_POPCNT_X64);
+ AddInstructionSet(InstructionSet.X86_AVXVNNI_X64);
break;
}
instructionset ,X86 ,Popcnt , ,15 ,POPCNT ,popcnt
instructionset ,X86 , , , ,Vector128,
instructionset ,X86 , , , ,Vector256,
+instructionset ,X86 ,AvxVnni , ,25 ,AVXVNNI ,avxvnni
instructionset64bit,X86 ,X86Base
instructionset64bit,X86 ,SSE
instructionset64bit,X86 ,LZCNT
instructionset64bit,X86 ,PCLMULQDQ
instructionset64bit,X86 ,POPCNT
+instructionset64bit,X86 ,AVXVNNI
implication ,X86 ,SSE ,X86Base
implication ,X86 ,SSE2 ,SSE
// CORJIT_FLAG_USE_AVX2 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// CORJIT_FLAG_USE_AVX
// AVX2 - EBX bit 5
+ // CORJIT_FLAG_USE_AVXVNNI if the following feature bit is set (input EAX of 0x07 and input ECX of 1):
+ // CORJIT_FLAG_USE_AVX2
+ // AVXVNNI - EAX bit 4
// CORJIT_FLAG_USE_AVX_512 is not currently set, but defined so that it can be used in future without
// CORJIT_FLAG_USE_BMI1 if the following feature bit is set (input EAX of 0x07 and input ECX of 0):
// BMI1 - EBX bit 3
if ((cpuidInfo[EBX] & (1 << 5)) != 0) // AVX2
{
CPUCompileFlags.Set(InstructionSet_AVX2);
+
+ __cpuidex(cpuidInfo, 0x00000007, 0x00000001);
+ if ((cpuidInfo[EAX] & (1 << 4)) != 0) // AVX-VNNI
+ {
+ CPUCompileFlags.Set(InstructionSet_AVXVNNI);
+ }
}
}
}
<type fullname="System.Runtime.Intrinsics.X86.Avx2/X64">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
+ <type fullname="System.Runtime.Intrinsics.X86.AvxVnni">
+ <method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
+ </type>
+ <type fullname="System.Runtime.Intrinsics.X86.AvxVnni/X64">
+ <method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
+ </type>
<type fullname="System.Runtime.Intrinsics.X86.Bmi1">
<method signature="System.Boolean get_IsSupported()" body="stub" value="false" />
</type>
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Aes.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx2.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnni.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi1.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi2.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Fma.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Aes.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Avx2.PlatformNotSupported.cs" />
+ <Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\AvxVnni.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi1.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Bmi2.PlatformNotSupported.cs" />
<Compile Include="$(MSBuildThisFileDirectory)System\Runtime\Intrinsics\X86\Fma.PlatformNotSupported.cs" />
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Versioning;
+
+namespace System.Runtime.Intrinsics.X86
+{
+ [CLSCompliant(false)]
+ [RequiresPreviewFeatures]
+ public abstract class AvxVnni : Avx2
+ {
+ internal AvxVnni() { }
+
+ public static new bool IsSupported { [Intrinsic] get { return false; } }
+
+ public new abstract class X64 : Avx2.X64
+ {
+ internal X64() { }
+
+ public static new bool IsSupported { [Intrinsic] get { return false; } }
+ }
+
+ /// <summary>
+ /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPBUSD xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+ /// <summary>
+ /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPWSSD xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
+
+ /// <summary>
+ /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPBUSD ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+ /// <summary>
+ /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPWSSD ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
+
+ /// <summary>
+ /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPBUSDS xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+ /// <summary>
+ /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPWSSDS xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<short> left, Vector128<short> right) { throw new PlatformNotSupportedException(); }
+
+ /// <summary>
+ /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPBUSDS ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) { throw new PlatformNotSupportedException(); }
+
+ /// <summary>
+ /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPWSSDS ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<short> left, Vector256<short> right) { throw new PlatformNotSupportedException(); }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System.Runtime.CompilerServices;
+using System.Runtime.Versioning;
+
+namespace System.Runtime.Intrinsics.X86
+{
+ [Intrinsic]
+ [CLSCompliant(false)]
+ [RequiresPreviewFeatures]
+ public abstract class AvxVnni : Avx2
+ {
+ internal AvxVnni() { }
+
+ public static new bool IsSupported { get => IsSupported; }
+
+ [Intrinsic]
+ public new abstract class X64 : Avx2.X64
+ {
+ internal X64() { }
+
+ public static new bool IsSupported { get => IsSupported; }
+ }
+
+ /// <summary>
+ /// __m128i _mm_dpbusd_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPBUSD xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+ /// <summary>
+ /// __m128i _mm_dpwssd_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPWSSD xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAdd(Vector128<int> addend, Vector128<short> left, Vector128<short> right) => MultiplyWideningAndAdd(addend, left, right);
+
+ /// <summary>
+ /// __m256i _mm256_dpbusd_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPBUSD ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) => MultiplyWideningAndAdd(addend, left, right);
+
+ /// <summary>
+ /// __m256i _mm256_dpwssd_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPWSSD ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAdd(Vector256<int> addend, Vector256<short> left, Vector256<short> right) => MultiplyWideningAndAdd(addend, left, right);
+
+ /// <summary>
+ /// __m128i _mm_dpbusds_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPBUSDS xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<byte> left, Vector128<sbyte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+ /// <summary>
+ /// __m128i _mm_dpwssds_epi32 (__m128i src, __m128i a, __m128i b)
+ /// VPDPWSSDS xmm, xmm, xmm/m128
+ /// </summary>
+ public static Vector128<int> MultiplyWideningAndAddSaturate(Vector128<int> addend, Vector128<short> left, Vector128<short> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+ /// <summary>
+ /// __m256i _mm256_dpbusds_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPBUSDS ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<byte> left, Vector256<sbyte> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+
+ /// <summary>
+ /// __m256i _mm256_dpwssds_epi32 (__m256i src, __m256i a, __m256i b)
+ /// VPDPWSSDS ymm, ymm, ymm/m256
+ /// </summary>
+ public static Vector256<int> MultiplyWideningAndAddSaturate(Vector256<int> addend, Vector256<short> left, Vector256<short> right) => MultiplyWideningAndAddSaturate(addend, left, right);
+ }
+}
// ------------------------------------------------------------------------------
// Changes to this file must follow the https://aka.ms/api-review process.
// ------------------------------------------------------------------------------
+using System.Runtime.Versioning;
namespace System.Runtime.Intrinsics
{
public static new bool IsSupported { get { throw null; } }
}
}
+
+ [System.CLSCompliantAttribute(false)]
+ [RequiresPreviewFeatures]
+ public abstract class AvxVnni : System.Runtime.Intrinsics.X86.Avx2
+ {
+ internal AvxVnni() { }
+ public static new bool IsSupported { get { throw null; } }
+ public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<sbyte> right) { throw null; }
+ public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
+ public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<byte> left, System.Runtime.Intrinsics.Vector256<sbyte> right) { throw null; }
+ public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAdd(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<short> left, System.Runtime.Intrinsics.Vector256<short> right) { throw null; }
+ public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<byte> left, System.Runtime.Intrinsics.Vector128<sbyte> right) { throw null; }
+ public static System.Runtime.Intrinsics.Vector128<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector128<int> addend, System.Runtime.Intrinsics.Vector128<short> left, System.Runtime.Intrinsics.Vector128<short> right) { throw null; }
+ public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<byte> left, System.Runtime.Intrinsics.Vector256<sbyte> right) { throw null; }
+ public static System.Runtime.Intrinsics.Vector256<int> MultiplyWideningAndAddSaturate(System.Runtime.Intrinsics.Vector256<int> addend, System.Runtime.Intrinsics.Vector256<short> left, System.Runtime.Intrinsics.Vector256<short> right) { throw null; }
+ public new abstract partial class X64 : System.Runtime.Intrinsics.X86.Avx2.X64
+ {
+ internal X64() { }
+ public static new bool IsSupported { get { throw null; } }
+ }
+ }
+
[System.CLSCompliantAttribute(false)]
public abstract partial class Bmi1 : System.Runtime.Intrinsics.X86.X86Base
{
{ "Aes", MONO_CPU_X86_AES, aes_methods, sizeof (aes_methods) },
{ "Avx", MONO_CPU_X86_AVX, unsupported, sizeof (unsupported) },
{ "Avx2", MONO_CPU_X86_AVX2, unsupported, sizeof (unsupported) },
+ { "AvxVnni", 0, unsupported, sizeof (unsupported) },
{ "Bmi1", MONO_CPU_X86_BMI1, bmi1_methods, sizeof (bmi1_methods) },
{ "Bmi2", MONO_CPU_X86_BMI2, bmi2_methods, sizeof (bmi2_methods) },
{ "Fma", MONO_CPU_X86_FMA, unsupported, sizeof (unsupported) },
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddByte()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it.
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alighment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector256<Int32> _fld0;
+ public Vector256<Byte> _fld1;
+ public Vector256<SByte> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Byte>>() / sizeof(Byte);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<SByte>>() / sizeof(SByte);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Byte[] _data1 = new Byte[Op1ElementCount];
+ private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+ private static Vector256<Int32> _clsVar0;
+ private static Vector256<Byte> _clsVar1;
+ private static Vector256<SByte> _clsVar2;
+
+ private Vector256<Int32> _fld0;
+ private Vector256<Byte> _fld1;
+ private Vector256<SByte> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)));
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector256<Int32> addend, Vector256<Byte> left, Vector256<SByte> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2])
+ + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue);
+ }
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddInt16()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it.
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alignment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector256<Int32> _fld0;
+ public Vector256<Int16> _fld1;
+ public Vector256<Int16> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Int16[] _data1 = new Int16[Op1ElementCount];
+ private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+ private static Vector256<Int32> _clsVar0;
+ private static Vector256<Int16> _clsVar1;
+ private static Vector256<Int16> _clsVar2;
+
+ private Vector256<Int32> _fld0;
+ private Vector256<Int16> _fld1;
+ private Vector256<Int16> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)));
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector256<Int32> addend, Vector256<Int16> left, Vector256<Int16> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue);
+ }
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddSaturateByte()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alighment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector256<Int32> _fld0;
+ public Vector256<Byte> _fld1;
+ public Vector256<SByte> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Byte>>() / sizeof(Byte);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<SByte>>() / sizeof(SByte);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Byte[] _data1 = new Byte[Op1ElementCount];
+ private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+ private static Vector256<Int32> _clsVar0;
+ private static Vector256<Byte> _clsVar1;
+ private static Vector256<SByte> _clsVar2;
+
+ private Vector256<Int32> _fld0;
+ private Vector256<Byte> _fld1;
+ private Vector256<SByte> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr)));
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Byte>), typeof(Vector256<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector256<Byte>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector256<SByte>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector256((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector256((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector256((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector256((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector256<Int32> addend, Vector256<Byte> left, Vector256<SByte> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Byte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<SByte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4];
+ int value = addend[i] + addend2;
+ int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+ int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp;
+ outArray[i] = c;
+ }
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.ComponentModel;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddSaturateInt16()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it.
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alignment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector256<Int32> _fld0;
+ public Vector256<Int16> _fld1;
+ public Vector256<Int16> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector256<Int16>>() / sizeof(Int16);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector256<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Int16[] _data1 = new Int16[Op1ElementCount];
+ private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+ private static Vector256<Int32> _clsVar0;
+ private static Vector256<Int16> _clsVar1;
+ private static Vector256<Int16> _clsVar2;
+
+ private Vector256<Int32> _fld0;
+ private Vector256<Int16> _fld1;
+ private Vector256<Int16> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector256<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr)));
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector256<Int32>), typeof(Vector256<Int16>), typeof(Vector256<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector256<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector256<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector256<Int16>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector256((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector256((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector256((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector256((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector256<Int32> addend, Vector256<Int16> left, Vector256<Int16> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector256<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector256<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2];
+ int value = addend[i] + addend2;
+ int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+ int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue: tmp;
+ outArray[i] = c;
+ }
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector256<Int32>, Vector256<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+<Project Sdk="Microsoft.NET.Sdk">
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ <!-- It takes a long time to complete (on a non-AVX machine) -->
+ <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+ <!-- https://github.com/dotnet/runtime/issues/12392 -->
+ <GCStressIncompatible>true</GCStressIncompatible>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DebugType>Embedded</DebugType>
+ <Optimize />
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+ <Compile Include="Program.AvxVnni.cs" />
+ <Compile Include="..\Shared\Program.cs" />
+ </ItemGroup>
+</Project>
--- /dev/null
+<Project Sdk="Microsoft.NET.Sdk">
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ <!-- It takes a long time to complete (on a non-AVX machine) -->
+ <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+ <!-- https://github.com/dotnet/runtime/issues/12392 -->
+ <GCStressIncompatible>true</GCStressIncompatible>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DebugType>Embedded</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+ <Compile Include="Program.AvxVnni.cs" />
+ <Compile Include="..\Shared\Program.cs" />
+ </ItemGroup>
+</Project>
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ static Program()
+ {
+ TestList = new Dictionary<string, Action>() {
+ ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte,
+ ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16,
+ ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte,
+ ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16,
+ };
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddByte()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it.
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddByte
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alighment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector128<Int32> _fld0;
+ public Vector128<Byte> _fld1;
+ public Vector128<SByte> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddByte testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Byte>>() / sizeof(Byte);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<SByte>>() / sizeof(SByte);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Byte[] _data1 = new Byte[Op1ElementCount];
+ private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+ private static Vector128<Int32> _clsVar0;
+ private static Vector128<Byte> _clsVar1;
+ private static Vector128<SByte> _clsVar2;
+
+ private Vector128<Int32> _fld0;
+ private Vector128<Byte> _fld1;
+ private Vector128<SByte> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddByte()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result1 = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result1);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddByte();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector128<Int32> addend, Vector128<Byte> left, Vector128<SByte> right, Vector128<Int32> result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), result);
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+ private void ValidateResult(Vector128<Int32> addend, Vector128<Byte> left, Vector128<SByte> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ outArray[i] = Math.Clamp((addend[i] + (right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2])
+ + (right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4])), int.MinValue, int.MaxValue);
+ }
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddInt16()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddInt16
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alighment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector128<Int32> _fld0;
+ public Vector128<Int16> _fld1;
+ public Vector128<Int16> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddInt16 testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Int16[] _data1 = new Int16[Op1ElementCount];
+ private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+ private static Vector128<Int32> _clsVar0;
+ private static Vector128<Int16> _clsVar1;
+ private static Vector128<Int16> _clsVar2;
+
+ private Vector128<Int32> _fld0;
+ private Vector128<Int16> _fld1;
+ private Vector128<Int16> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddInt16()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+ );
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAdd), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAdd(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddInt16();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAdd(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAdd(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector128<Int32> addend, Vector128<Int16> left, Vector128<Int16> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ outArray[i] = Math.Clamp((addend[i] + (right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2])), int.MinValue, int.MaxValue);
+ }
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAdd)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddSaturateByte()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it.
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Byte[] inArray1, SByte[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Byte>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<SByte>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alighment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Byte, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<SByte, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector128<Int32> _fld0;
+ public Vector128<Byte> _fld1;
+ public Vector128<SByte> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref testStruct._fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref testStruct._fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Byte>>() / sizeof(Byte);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<SByte>>() / sizeof(SByte);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Byte[] _data1 = new Byte[Op1ElementCount];
+ private static SByte[] _data2 = new SByte[Op2ElementCount];
+
+ private static Vector128<Int32> _clsVar0;
+ private static Vector128<Byte> _clsVar1;
+ private static Vector128<SByte> _clsVar2;
+
+ private Vector128<Int32> _fld0;
+ private Vector128<Byte> _fld1;
+ private Vector128<SByte> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _clsVar1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _clsVar2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Byte>, byte>(ref _fld1), ref Unsafe.As<Byte, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetSByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<SByte>, byte>(ref _fld2), ref Unsafe.As<SByte, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetSByte(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Byte>), typeof(Vector128<SByte>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector128<Byte>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector128<SByte>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector128((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector128((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector128((Byte*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector128((SByte*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateByte();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector128<Int32> addend, Vector128<Byte> left, Vector128<SByte> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Byte[] inArray1 = new Byte[Op1ElementCount];
+ SByte[] inArray2 = new SByte[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Byte, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Byte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<SByte, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<SByte>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Byte[] left, SByte[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ int addend2 = right[i * 4 + 3] * left[i * 4 + 3] + right[i * 4 + 2] * left[i * 4 + 2] + right[i * 4 + 1] * left[i * 4 + 1] + right[i * 4] * left[i * 4];
+ int value = addend[i] + addend2;
+ int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+ int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp;
+ outArray[i] = c;
+ }
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Runtime.Intrinsics;
+using System.Runtime.Intrinsics.X86;
+using System.Text.RegularExpressions;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ private static void MultiplyWideningAndAddSaturateInt16()
+ {
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+
+ if (test.IsSupported)
+ {
+ // Validates basic functionality works, using Unsafe.Read
+ test.RunBasicScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates basic functionality works, using Load
+ test.RunBasicScenario_Load();
+
+ // Validates basic functionality works, using LoadAligned
+ test.RunBasicScenario_LoadAligned();
+ }
+
+ else
+ {
+ Console.WriteLine("Avx Is Not Supported");
+ }
+
+ // Validates calling via reflection works, using Unsafe.Read
+ test.RunReflectionScenario_UnsafeRead(); //TODO: this one does not work. Fix it.
+
+ if (Avx.IsSupported)
+ {
+ // Validates calling via reflection works, using Load
+ test.RunReflectionScenario_Load();
+
+ // Validates calling via reflection works, using LoadAligned
+ test.RunReflectionScenario_LoadAligned();
+ }
+
+ // Validates passing a static member works
+ test.RunClsVarScenario();
+
+ // Validates passing a local works, using Unsafe.Read
+ test.RunLclVarScenario_UnsafeRead();
+
+ if (Avx.IsSupported)
+ {
+ // Validates passing a local works, using Load
+ test.RunLclVarScenario_Load();
+
+ // Validates passing a local works, using LoadAligned
+ test.RunLclVarScenario_LoadAligned();
+ }
+
+ // Validates passing the field of a local class works
+ test.RunClassLclFldScenario();
+
+ // Validates passing an instance member of a class works
+ test.RunClassFldScenario();
+
+ // Validates passing the field of a local struct works
+ test.RunStructLclFldScenario();
+
+ // Validates passing an instance member of a struct works
+ test.RunStructFldScenario();
+ }
+ else
+ {
+ Console.WriteLine("Test Is Not Supported");
+ // Validates we throw on unsupported hardware
+ test.RunUnsupportedScenario();
+ }
+
+ if (!test.Succeeded)
+ {
+ throw new Exception("One or more scenarios did not complete as expected.");
+ }
+ }
+ }
+
+ public sealed unsafe class SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16
+ {
+ private struct DataTable
+ {
+ private byte[] inArray0;
+ private byte[] inArray1;
+ private byte[] inArray2;
+ private byte[] outArray;
+
+ private GCHandle inHandle0;
+ private GCHandle inHandle1;
+ private GCHandle inHandle2;
+ private GCHandle outHandle;
+
+ private ulong alignment;
+
+ public DataTable(Int32[] inArray0, Int16[] inArray1, Int16[] inArray2, Int32[] outArray, int alignment)
+ {
+ int sizeOfinArray0 = inArray0.Length * Unsafe.SizeOf<Int32>();
+ int sizeOfinArray1 = inArray1.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfinArray2 = inArray2.Length * Unsafe.SizeOf<Int16>();
+ int sizeOfoutArray = outArray.Length * Unsafe.SizeOf<Int32>();
+
+ if((alignment != 32 && alignment != 16) || (alignment *2) < sizeOfinArray0 || (alignment * 2) < sizeOfinArray1 || (alignment * 2) < sizeOfinArray2 || (alignment * 2) < sizeOfoutArray)
+ {
+ throw new ArgumentException("Invalid value of alighment");
+ }
+
+ this.inArray0 = new byte[alignment * 2];
+ this.inArray1 = new byte[alignment * 2];
+ this.inArray2 = new byte[alignment * 2];
+ this.outArray = new byte[alignment * 2];
+
+ this.inHandle0 = GCHandle.Alloc(this.inArray0, GCHandleType.Pinned);
+ this.inHandle1 = GCHandle.Alloc(this.inArray1, GCHandleType.Pinned);
+ this.inHandle2 = GCHandle.Alloc(this.inArray2, GCHandleType.Pinned);
+ this.outHandle = GCHandle.Alloc(this.outArray, GCHandleType.Pinned);
+
+ this.alignment = (ulong)alignment;
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray0Ptr), ref Unsafe.As<Int32, byte>(ref inArray0[0]), (uint)sizeOfinArray0);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray1Ptr), ref Unsafe.As<Int16, byte>(ref inArray1[0]), (uint)sizeOfinArray1);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.AsRef<byte>(inArray2Ptr), ref Unsafe.As<Int16, byte>(ref inArray2[0]), (uint)sizeOfinArray2);
+ }
+
+ public void* inArray0Ptr => Align((byte*)(inHandle0.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray1Ptr => Align((byte*)(inHandle1.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* inArray2Ptr => Align((byte*)(inHandle2.AddrOfPinnedObject().ToPointer()), alignment);
+ public void* outArrayPtr => Align((byte*)(outHandle.AddrOfPinnedObject().ToPointer()), alignment);
+
+ public void Dispose()
+ {
+ inHandle0.Free();
+ inHandle1.Free();
+ inHandle2.Free();
+ outHandle.Free();
+ }
+
+ private static unsafe void* Align(byte* buffer, ulong expectedAlighment)
+ {
+ return (void*)(((ulong)buffer + expectedAlighment -1) & ~(expectedAlighment - 1));
+ }
+ }
+ private struct TestStruct
+ {
+ public Vector128<Int32> _fld0;
+ public Vector128<Int16> _fld1;
+ public Vector128<Int16> _fld2;
+
+ public static TestStruct Create()
+ {
+ var testStruct = new TestStruct();
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref testStruct._fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetByte(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref testStruct._fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+ return testStruct;
+ }
+
+ public void RunStructFldScenario(SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16 testClass)
+ {
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(testClass._dataTable.outArrayPtr, result);
+ testClass.ValidateResult(_fld0, _fld1, _fld2, testClass._dataTable.outArrayPtr);
+ }
+ }
+
+ private static readonly int LargestVectorSize = 32;
+
+ private static readonly int Op0ElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+ private static readonly int Op1ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+ private static readonly int Op2ElementCount = Unsafe.SizeOf<Vector128<Int16>>() / sizeof(Int16);
+ private static readonly int RetElementCount = Unsafe.SizeOf<Vector128<Int32>>() / sizeof(Int32);
+
+ private static Int32[] _data0 = new Int32[Op0ElementCount];
+ private static Int16[] _data1 = new Int16[Op1ElementCount];
+ private static Int16[] _data2 = new Int16[Op2ElementCount];
+
+ private static Vector128<Int32> _clsVar0;
+ private static Vector128<Int16> _clsVar1;
+ private static Vector128<Int16> _clsVar2;
+
+ private Vector128<Int32> _fld0;
+ private Vector128<Int16> _fld1;
+ private Vector128<Int16> _fld2;
+
+ private DataTable _dataTable;
+
+ static SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+ {
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _clsVar0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _clsVar2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ }
+
+ public SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16()
+ {
+ Succeeded = true;
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int32>, byte>(ref _fld0), ref Unsafe.As<Int32, byte>(ref _data0[0]), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld1), ref Unsafe.As<Int16, byte>(ref _data1[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = (sbyte)TestLibrary.Generator.GetInt16(); }
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Vector128<Int16>, byte>(ref _fld2), ref Unsafe.As<Int16, byte>(ref _data2[0]), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+
+ for (var i = 0; i < Op0ElementCount; i++) { _data0[i] = TestLibrary.Generator.GetInt32(); }
+ for (var i = 0; i < Op1ElementCount; i++) { _data1[i] = TestLibrary.Generator.GetInt16(); }
+ for (var i = 0; i < Op2ElementCount; i++) { _data2[i] = TestLibrary.Generator.GetInt16(); }
+ _dataTable = new DataTable(_data0, _data1, _data2, new Int32[RetElementCount], LargestVectorSize);
+ }
+
+ public bool IsSupported => AvxVnni.IsSupported;
+
+ public bool Succeeded { get; set; }
+
+ public void RunBasicScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_UnsafeRead));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunBasicScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_Load));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+ );
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+
+ }
+
+ public void RunBasicScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunBasicScenario_LoadAligned));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_UnsafeRead));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+ .Invoke(null, new object[] {
+ Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr),
+ Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr)
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_Load));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunReflectionScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunReflectionScenario_LoadAligned));
+
+ var result = typeof(AvxVnni).GetMethod(nameof(AvxVnni.MultiplyWideningAndAddSaturate), new Type[] { typeof(Vector128<Int32>), typeof(Vector128<Int16>), typeof(Vector128<Int16>) })
+ .Invoke(null, new object[] {
+ Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr)),
+ Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr))
+ });
+
+ Unsafe.Write(_dataTable.outArrayPtr, (Vector128<Int32>)(result));
+ ValidateResult(_dataTable.inArray0Ptr, _dataTable.inArray1Ptr, _dataTable.inArray2Ptr, _dataTable.outArrayPtr);
+ }
+
+ public void RunClsVarScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClsVarScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(
+ _clsVar0,
+ _clsVar1,
+ _clsVar2
+ );
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_clsVar0, _clsVar1, _clsVar2, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_UnsafeRead()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_UnsafeRead));
+
+ var first = Unsafe.Read<Vector128<Int32>>(_dataTable.inArray0Ptr);
+ var second = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray1Ptr);
+ var third = Unsafe.Read<Vector128<Int16>>(_dataTable.inArray2Ptr);
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_Load()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_Load));
+
+ var first= Avx.LoadVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadVector128((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadVector128((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunLclVarScenario_LoadAligned()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunLclVarScenario_LoadAligned));
+
+ var first = Avx.LoadAlignedVector128((Int32*)(_dataTable.inArray0Ptr));
+ var second = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray1Ptr));
+ var third = Avx.LoadAlignedVector128((Int16*)(_dataTable.inArray2Ptr));
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(first, second, third);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(first, second, third, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassLclFldScenario));
+
+ var test = new SimpleTernaryOpTest__MultiplyWideningAndAddSaturateInt16();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunClassFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunClassFldScenario));
+
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(_fld0, _fld1, _fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(_fld0, _fld1, _fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructLclFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructLclFldScenario));
+
+ var test = TestStruct.Create();
+ var result = AvxVnni.MultiplyWideningAndAddSaturate(test._fld0, test._fld1, test._fld2);
+
+ Unsafe.Write(_dataTable.outArrayPtr, result);
+ ValidateResult(test._fld0, test._fld1, test._fld2, _dataTable.outArrayPtr);
+ }
+
+ public void RunStructFldScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunStructFldScenario));
+
+ var test = TestStruct.Create();
+ test.RunStructFldScenario(this);
+ }
+
+ public void RunUnsupportedScenario()
+ {
+ TestLibrary.TestFramework.BeginScenario(nameof(RunUnsupportedScenario));
+
+ bool succeeded = false;
+
+ try
+ {
+ RunBasicScenario_UnsafeRead();
+ }
+ catch (PlatformNotSupportedException)
+ {
+ succeeded = true;
+ }
+
+ if (!succeeded)
+ {
+ Succeeded = false;
+ }
+ }
+
+ private void ValidateResult(Vector128<Int32> addend, Vector128<Int16> left, Vector128<Int16> right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), addend);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), left);
+ Unsafe.WriteUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), right);
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(void* addend, void* left, void* right, void* result, [CallerMemberName] string method = "")
+ {
+ Int32[] inArray0 = new Int32[Op0ElementCount];
+ Int16[] inArray1 = new Int16[Op1ElementCount];
+ Int16[] inArray2 = new Int16[Op2ElementCount];
+ Int32[] outArray = new Int32[RetElementCount];
+
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref inArray0[0]), ref Unsafe.AsRef<byte>(addend), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray1[0]), ref Unsafe.AsRef<byte>(left), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int16, byte>(ref inArray2[0]), ref Unsafe.AsRef<byte>(right), (uint)Unsafe.SizeOf<Vector128<Int16>>());
+ Unsafe.CopyBlockUnaligned(ref Unsafe.As<Int32, byte>(ref outArray[0]), ref Unsafe.AsRef<byte>(result), (uint)Unsafe.SizeOf<Vector128<Int32>>());
+
+ ValidateResult(inArray0, inArray1, inArray2, outArray, method);
+ }
+
+ private void ValidateResult(Int32[] addend, Int16[] left, Int16[] right, Int32[] result, [CallerMemberName] string method = "")
+ {
+ bool succeeded = true;
+
+ Int32[] outArray = new Int32[RetElementCount];
+
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ int addend2 = right[i * 2 + 1] * left[i * 2 + 1] + right[i * 2] * left[i * 2];
+ int value = addend[i] + addend2;
+ int tmp = (value & ~(addend2 | addend[i])) < 0 ? int.MaxValue : value;
+ int c = (~value & (addend2 & addend[i])) < 0 ? int.MinValue : tmp;
+ outArray[i] = c;
+ }
+ for (var i = 0; i < RetElementCount; i++)
+ {
+ if (result[i] != outArray[i])
+ {
+ succeeded = false;
+ break;
+ }
+ }
+
+ if (!succeeded)
+ {
+ TestLibrary.TestFramework.LogInformation($"{nameof(AvxVnni)}.{nameof(AvxVnni.MultiplyWideningAndAddSaturate)}<Int32>(Vector128<Int32>, Vector128<Int32>): {method} failed:");
+ TestLibrary.TestFramework.LogInformation($" addend: ({string.Join(", ", addend)})");
+ TestLibrary.TestFramework.LogInformation($" left: ({string.Join(", ", left)})");
+ TestLibrary.TestFramework.LogInformation($" right: ({string.Join(", ", right)})");
+ TestLibrary.TestFramework.LogInformation($" result: ({string.Join(", ", result)})");
+ TestLibrary.TestFramework.LogInformation($" valid: ({string.Join(", ", outArray)})");
+ TestLibrary.TestFramework.LogInformation(string.Empty);
+
+ Succeeded = false;
+ }
+ }
+ }
+}
--- /dev/null
+<Project Sdk="Microsoft.NET.Sdk">
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ <!-- It takes a long time to complete (on a non-AVX machine) -->
+ <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+ <!-- https://github.com/dotnet/runtime/issues/12392 -->
+ <GCStressIncompatible>true</GCStressIncompatible>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DebugType>Embedded</DebugType>
+ <Optimize />
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+ <Compile Include="Program.AvxVnni_Vector128.cs" />
+ <Compile Include="..\Shared\Program.cs" />
+ </ItemGroup>
+</Project>
--- /dev/null
+<Project Sdk="Microsoft.NET.Sdk">
+ <PropertyGroup>
+ <OutputType>Exe</OutputType>
+ <AllowUnsafeBlocks>true</AllowUnsafeBlocks>
+ <!-- It takes a long time to complete (on a non-AVX machine) -->
+ <UnloadabilityIncompatible>true</UnloadabilityIncompatible>
+ <!-- https://github.com/dotnet/runtime/issues/12392 -->
+ <GCStressIncompatible>true</GCStressIncompatible>
+ </PropertyGroup>
+ <PropertyGroup>
+ <DebugType>Embedded</DebugType>
+ <Optimize>True</Optimize>
+ </PropertyGroup>
+ <ItemGroup>
+ <Compile Include="MultiplyWideningAndAdd.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAdd.Int16.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Byte.cs" />
+ <Compile Include="MultiplyWideningAndAddSaturate.Int16.cs" />
+ <Compile Include="Program.AvxVnni_Vector128.cs" />
+ <Compile Include="..\Shared\Program.cs" />
+ </ItemGroup>
+</Project>
--- /dev/null
+// Licensed to the .NET Foundation under one or more agreements.
+// The .NET Foundation licenses this file to you under the MIT license.
+
+using System;
+using System.Collections.Generic;
+
+namespace JIT.HardwareIntrinsics.X86
+{
+ public static partial class Program
+ {
+ static Program()
+ {
+ TestList = new Dictionary<string, Action>() {
+ ["MultiplyWideningAndAdd.Byte"] = MultiplyWideningAndAddByte,
+ ["MultiplyWideningAndAdd.Int16"] = MultiplyWideningAndAddInt16,
+ ["MultiplyWideningAndAddSaturate.Byte"] = MultiplyWideningAndAddSaturateByte,
+ ["MultiplyWideningAndAddSaturate.Int16"] = MultiplyWideningAndAddSaturateInt16,
+ };
+ }
+ }
+}
TestLibrary.TestFramework.LogInformation($" AES: {Aes.IsSupported}");
TestLibrary.TestFramework.LogInformation($" AVX: {Avx.IsSupported}");
TestLibrary.TestFramework.LogInformation($" AVX2: {Avx2.IsSupported}");
+ TestLibrary.TestFramework.LogInformation($" AVXVNNI: {AvxVnni.IsSupported}");
TestLibrary.TestFramework.LogInformation($" BMI1: {Bmi1.IsSupported}");
TestLibrary.TestFramework.LogInformation($" BMI2: {Bmi2.IsSupported}");
TestLibrary.TestFramework.LogInformation($" FMA: {Fma.IsSupported}");
<ExcludeList Include="$(XunitTestBinBase)/JIT/opt/Devirtualization/Comparer_get_Default/*">
<Issue>https://github.com/dotnet/runtime/issues/48190</Issue>
</ExcludeList>
+ <ExcludeList Include="$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/AvxVnni/**">
+ <Issue>Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078</Issue>
+ </ExcludeList>
+ <ExcludeList Include="$(XunitTestBinBase)/JIT/HardwareIntrinsics/X86/AvxVnni_Vector128/**">
+ <Issue>Mono crashes when new unsupported intrinsic groups are added, https://github.com/dotnet/runtime/issues/53078</Issue>
+ </ExcludeList>
<ExcludeList Include="$(XunitTestBinBase)/JIT/Directed/DynamicPgo/**">
<Issue>Mono doesn't have a dynamic pgo or tiered compilation infrastructure</Issue>
</ExcludeList>