regNumber targetReg = node->gtRegNum;
var_types targetType = node->TypeGet();
var_types baseType = node->gtSIMDBaseType;
- instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
+ instruction ins = INS_invalid;
regNumber op1Reg = REG_NA;
regNumber op2Reg = REG_NA;
emitter* emit = getEmitter();
assert(op2 != nullptr);
assert(baseType == TYP_DOUBLE);
+ ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
op2Reg = op2->gtRegNum;
ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
emit->emitIns_SIMD_R_R_R_I(ins, emitTypeSize(TYP_SIMD16), targetReg, op1Reg, op2Reg, ival);
break;
}
+ case NI_SSE2_LoadFence:
+ {
+ assert(baseType == TYP_VOID);
+ assert(op1 == nullptr);
+ assert(op2 == nullptr);
+ emit->emitIns(INS_lfence);
+ break;
+ }
+ case NI_SSE2_MemoryFence:
+ {
+ assert(baseType == TYP_VOID);
+ assert(op1 == nullptr);
+ assert(op2 == nullptr);
+ emit->emitIns(INS_mfence);
+ break;
+ }
+
case NI_SSE2_MoveMask:
{
assert(op2 == nullptr);
assert(baseType == TYP_BYTE || baseType == TYP_UBYTE || baseType == TYP_DOUBLE);
+ ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
emit->emitIns_R_R(ins, emitTypeSize(TYP_INT), targetReg, op1Reg);
break;
}
assert(op1 == nullptr);
assert(op2 == nullptr);
+ ins = Compiler::insOfHWIntrinsic(intrinsicID, baseType);
emit->emitIns_SIMD_R_R_R(ins, emitTypeSize(TYP_SIMD16), targetReg, targetReg, targetReg);
break;
}
HARDWARE_INTRINSIC(SSE2_ConvertToVector128Single, "ConvertToVector128Single", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromArg)
HARDWARE_INTRINSIC(SSE2_Divide, "Divide", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_LoadAlignedVector128, "LoadAlignedVector128", SSE2, -1, 16, 1, {INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_movdqa, INS_invalid, INS_movapd}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(SSE2_LoadFence, "LoadFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_LoadScalarVector128, "LoadScalarVector128", SSE2, -1, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_movd, INS_movq, INS_movq, INS_invalid, INS_movsdsse2}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_LoadVector128, "LoadVector128", SSE2, -1, 16, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_invalid, INS_movupd}, HW_Category_MemoryLoad, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Max, "Max", SSE2, -1, 16, 2, {INS_invalid, INS_pmaxub, INS_pmaxsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_maxpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
+HARDWARE_INTRINSIC(SSE2_MemoryFence, "MemoryFence", SSE2, -1, 0, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Special, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Min, "Min", SSE2, -1, 16, 2, {INS_invalid, INS_pminub, INS_pminsw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_minpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative)
HARDWARE_INTRINSIC(SSE2_MoveMask, "MoveMask", SSE2, -1, 16, 1, {INS_pmovmskb, INS_pmovmskb, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movmskpd}, HW_Category_Special, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2_Multiply, "Multiply", SSE2, -1, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_pmuludq, INS_invalid, INS_invalid, INS_invalid, INS_mulpd}, HW_Category_SimpleSIMD, HW_Flag_Commutative|HW_Flag_BaseTypeFromArg)
INST3( punpckhdq, "punpckhdq" , 0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, PCKDBL(0x6A))
+INST3( lfence, "lfence" , 0, IUM_RD, 0, 0, 0x000FE8AE, BAD_CODE, BAD_CODE)
+INST3( mfence, "mfence" , 0, IUM_RD, 0, 0, 0x000FF0AE, BAD_CODE, BAD_CODE)
INST3( prefetchnta, "prefetchnta" , 0, IUM_RD, 0, 0, 0x000F0018, BAD_CODE, BAD_CODE)
INST3( prefetcht0, "prefetcht0" , 0, IUM_RD, 0, 0, 0x000F0818, BAD_CODE, BAD_CODE)
INST3( prefetcht1, "prefetcht1" , 0, IUM_RD, 0, 0, 0x000F1018, BAD_CODE, BAD_CODE)