* Delete mov_i2xmm and mov_xmm2i.
* Delete `ins_CopyFloatToInt`.
* Delete 'ins_CopyIntToFloat'.
* review feedback
instruction ins_Copy(var_types dstType);
instruction ins_Copy(regNumber srcReg, var_types dstType);
- instruction ins_CopyIntToFloat(var_types srcType, var_types dstTyp);
- instruction ins_CopyFloatToInt(var_types srcType, var_types dstTyp);
static instruction ins_FloatStore(var_types type = TYP_DOUBLE);
static instruction ins_FloatCopy(var_types type = TYP_DOUBLE);
instruction ins_FloatConv(var_types to, var_types from);
}
#endif
instruction copyIns = ins_Copy(regNum, destMemType);
-#if defined(TARGET_XARCH)
- // For INS_mov_xmm2i, the source xmm reg comes first.
- if (copyIns == INS_mov_xmm2i)
- {
- GetEmitter()->emitIns_R_R(copyIns, size, regNum, destRegNum);
- }
- else
-#endif // TARGET_XARCH
- {
- GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
- }
+ GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
#ifdef USING_SCOPE_INFO
psiMoveToReg(varNum);
#endif // USING_SCOPE_INFO
}
return;
}
+
+ regNumber srcReg = genConsumeReg(op1);
var_types targetType = treeNode->TypeGet();
regNumber targetReg = treeNode->GetRegNum();
+ assert(srcReg != REG_NA);
assert(targetReg != REG_NA);
assert(targetType != TYP_STRUCT);
- // Check whether this node and the node from which we're copying the value have
- // different register types. This can happen if (currently iff) we have a SIMD
- // vector type that fits in an integer register, in which case it is passed as
- // an argument, or returned from a call, in an integer register and must be
- // copied if it's in an xmm register.
-
- bool srcFltReg = (varTypeUsesFloatReg(op1));
- bool tgtFltReg = (varTypeUsesFloatReg(treeNode));
- if (srcFltReg != tgtFltReg)
- {
- instruction ins;
- regNumber fpReg;
- regNumber intReg;
- if (tgtFltReg)
- {
- ins = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
- fpReg = targetReg;
- intReg = op1->GetRegNum();
- }
- else
- {
- ins = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
- intReg = targetReg;
- fpReg = op1->GetRegNum();
- }
- inst_RV_RV(ins, fpReg, intReg, targetType);
- }
- else
- {
- inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
- }
+ inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);
if (op1->IsLocal())
{
}
else
{
- emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, srcXmmReg, srcIntReg);
+ emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg);
emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg);
#ifdef TARGET_X86
// For x86, we need one more to convert it from 8 bytes to 16 bytes.
// integer and floating point registers so, let's do that.
if (call->IsVarargs() && varTypeIsFloating(argNode))
{
- regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
- instruction ins = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
- inst_RV_RV(ins, argNode->GetRegNum(), targetReg);
+ regNumber srcReg = argNode->GetRegNum();
+ regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
+ inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg);
}
#endif // FEATURE_VARARG
}
if (varTypeIsFloating(loadType))
{
- intArgReg = compiler->getCallArgIntRegister(argReg);
- instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
- inst_RV_RV(ins, argReg, intArgReg, loadType);
+ intArgReg = compiler->getCallArgIntRegister(argReg);
+ inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
}
else
{
regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
if (remainingIntArgMask != RBM_NONE)
{
- instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
GetEmitter()->emitDisableGC();
for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
{
// also load it in corresponding float arg reg
regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
- inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
+ inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg);
}
argOffset += REGSIZE_BYTES;
// Copy the floating-point value to an integer register. If we copied a float to a long, then
// right-shift the value so the high 32 bits of the floating-point value sit in the low 32
// bits of the integer register.
- instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
- inst_RV_RV(ins, op1->GetRegNum(), tmpReg, targetType);
+ regNumber srcReg = op1->GetRegNum();
+ var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
+ inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType);
if (targetType == TYP_DOUBLE)
{
// right shift by 32 bits to get to exponent.
// Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
// value, no matter the floating-point type.
- inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
+ inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT);
// Mask exponent with all 1's and check if the exponent is all 1's
inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
assert(dstFltReg == genIsValidFloatReg(targetReg));
if (srcFltReg != dstFltReg)
{
- instruction ins;
- regNumber fltReg;
- regNumber intReg;
- if (dstFltReg)
- {
- ins = ins_CopyIntToFloat(srcType, targetType);
- fltReg = targetReg;
- intReg = srcReg;
- }
- else
- {
- ins = ins_CopyFloatToInt(srcType, targetType);
- intReg = targetReg;
- fltReg = srcReg;
- }
- inst_RV_RV(ins, fltReg, intReg, targetType);
+ inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);
}
else if (targetReg != srcReg)
{
#if FEATURE_VARARG
if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
{
- regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
- instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
- inst_RV_RV(ins, argReg, intArgReg, loadType);
+ regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
+ inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
}
#endif // FEATURE_VARARG
}
{
switch (ins)
{
+ case INS_movd: // TODO-Cleanup: replace with movq, https://github.com/dotnet/runtime/issues/47943.
case INS_andn:
case INS_bextr:
case INS_blsi:
case INS_cvtss2si:
case INS_cvtsi2sd:
case INS_cvtsi2ss:
- case INS_mov_xmm2i:
- case INS_mov_i2xmm:
case INS_movnti:
case INS_mulx:
case INS_pdep:
case INS_cvtsd2si:
case INS_cvtss2si:
case INS_extractps:
- case INS_mov_xmm2i:
+ case INS_movd:
case INS_movmskpd:
case INS_movmskps:
case INS_mulx:
case IF_RRD_RRD:
case IF_RWR_RRD:
case IF_RRW_RRD:
- if (ins == INS_mov_i2xmm)
- {
- printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
- }
- else if (ins == INS_mov_xmm2i)
- {
- printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
- }
- else if (ins == INS_pmovmskb)
+ if (ins == INS_pmovmskb)
{
printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
}
regNumber reg2 = id->idReg2();
emitAttr size = id->idOpSize();
- // Get the 'base' opcode
- code = insCodeRM(ins);
- code = AddVexPrefixIfNeeded(ins, code, size);
if (IsSSEOrAVXInstruction(ins))
{
+ assert((ins != INS_movd) || (isFloatReg(reg1) != isFloatReg(reg2)));
+
+ if ((ins != INS_movd) || isFloatReg(reg1))
+ {
+ code = insCodeRM(ins);
+ }
+ else
+ {
+ code = insCodeMR(ins);
+ }
+ code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);
if (TakesRexWPrefix(ins, size))
}
else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
{
+ assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
+ code = insCodeRM(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
#ifdef TARGET_AMD64
}
else if (ins == INS_movsxd)
{
+ assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
+ code = insCodeRM(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);
#endif // TARGET_AMD64
else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) ||
(ins == INS_tzcnt))
{
+ assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
+ code = insCodeRM(ins);
+ code = AddVexPrefixIfNeeded(ins, code, size);
code = insEncodeRMreg(ins, code);
if ((ins == INS_crc32) && (size > EA_1BYTE))
{
#endif // FEATURE_HW_INTRINSICS
else
{
- code = insEncodeMRreg(ins, insCodeMR(ins));
+ assert(!TakesVexPrefix(ins));
+ code = insCodeMR(ins);
+ code = insEncodeMRreg(ins, code);
if (ins != INS_test)
{
}
}
- regNumber reg345 = REG_NA;
+ regNumber regFor012Bits = reg2;
+ regNumber regFor345Bits = REG_NA;
if (IsBMIInstruction(ins))
{
- reg345 = getBmiRegNumber(ins);
+ regFor345Bits = getBmiRegNumber(ins);
+ }
+ if (regFor345Bits == REG_NA)
+ {
+ regFor345Bits = reg1;
}
- if (reg345 == REG_NA)
+ if (ins == INS_movd)
{
- reg345 = id->idReg1();
+ assert(isFloatReg(reg1) != isFloatReg(reg2));
+ if (isFloatReg(reg2))
+ {
+ std::swap(regFor012Bits, regFor345Bits);
+ }
}
- unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
- regCode |= insEncodeReg012(ins, reg2, size, &code);
+
+ unsigned regCode = insEncodeReg345(ins, regFor345Bits, size, &code);
+ regCode |= insEncodeReg012(ins, regFor012Bits, size, &code);
if (TakesVexPrefix(ins))
{
}
}
- emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ emitGCregLiveUpd(id->idGCref(), reg1, dst);
break;
case IF_RRW_RRD:
*/
case INS_xor:
- assert(id->idReg1() == id->idReg2());
- emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+ assert(reg1 == reg2);
+ emitGCregLiveUpd(id->idGCref(), reg1, dst);
break;
case INS_or:
case INS_and:
- emitGCregDeadUpd(id->idReg1(), dst);
+ emitGCregDeadUpd(reg1, dst);
break;
case INS_add:
((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
#endif
// Mark r1 as holding a byref
- emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+ emitGCregLiveUpd(GCT_BYREF, reg1, dst);
break;
default:
case IF_RWR_RRD:
case IF_RRW_RRD:
case IF_RWR_RRD_RRD:
- // INS_movxmm2i writes to reg2.
- if (ins == INS_mov_xmm2i)
- {
- emitGCregDeadUpd(id->idReg2(), dst);
- }
- else
- {
- emitGCregDeadUpd(id->idReg1(), dst);
- }
+ emitGCregDeadUpd(reg1, dst);
break;
default:
result.insThroughput = PERFSCORE_THROUGHPUT_25C;
break;
- case INS_mov_xmm2i:
- // movd reg, xmm
- result.insThroughput = PERFSCORE_THROUGHPUT_1C;
- result.insLatency = PERFSCORE_LATENCY_2C;
- break;
-
- case INS_mov_i2xmm:
- // movd xmm, reg
- result.insThroughput = PERFSCORE_THROUGHPUT_1C;
- result.insLatency = PERFSCORE_LATENCY_1C;
- break;
-
case INS_movd:
if (memAccessKind == PERFSCORE_MEMORY_NONE)
{
{
assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
op1Reg = op1->GetRegNum();
- emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
+ emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg);
}
else
{
{
// extract instructions return to GP-registers, so it needs int size as the emitsize
inst_RV_TT_IV(ins, emitTypeSize(TYP_INT), tmpTargetReg, op1, i);
- emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg);
+ emit->emitIns_R_R(INS_movd, EA_4BYTE, targetReg, tmpTargetReg);
}
else
{
assert(numArgs == 1);
assert((baseType == TYP_INT) || (baseType == TYP_UINT));
instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
- emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
+ emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg);
break;
}
HARDWARE_INTRINSIC(Vector128, AsVector4, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, AsVector128, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, Create, 16, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128, CreateScalarUnsafe, 16, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector128, Dot, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
// The instruction generated for float/double depends on which ISAs are supported
HARDWARE_INTRINSIC(Vector128, get_AllBitsSet, 16, 0, {INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_pcmpeqd, INS_cmpps, INS_cmppd}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, get_Count, 32, 0, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, get_Zero, 32, 0, {INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps, INS_xorps}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, Create, 32, -1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_mov_i2xmm, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256, CreateScalarUnsafe, 32, 1, {INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movd, INS_movss, INS_movsdsse2}, HW_Category_SIMDScalar, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(Vector256, Dot, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_NoCodeGen)
HARDWARE_INTRINSIC(Vector256, GetElement, 32, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_Helper, HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(Vector256, GetLower, 32, 1, {INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movdqu, INS_movups, INS_movupd}, HW_Category_SimpleSIMD, HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, CompareScalarOrdered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2, CompareUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmppd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2, CompareScalarUnordered, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cmpsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2, ConvertToInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, ConvertToInt32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2, ConvertToUInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, ConvertToVector128Double, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2pd, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2pd, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_invalid, INS_invalid, INS_cvtss2sd, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromSecondArg)
HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtps2dq, INS_cvtpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Int32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, ConvertToVector128Int32WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttps2dq, INS_cvttpd2dq}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, ConvertToVector128Single, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtdq2ps, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtpd2ps}, HW_Category_SimpleSIMD, HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128Single, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsd2ss, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2, ConvertScalarToVector128UInt32, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2, Divide, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divpd}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(SSE2, DivideScalar, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_divsd}, HW_Category_SIMDScalar, HW_Flag_CopyUpperBits)
HARDWARE_INTRINSIC(SSE2, Extract, 16, 2, {INS_invalid, INS_invalid, INS_pextrw, INS_pextrw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
// {TYP_BYTE, TYP_UBYTE, TYP_SHORT, TYP_USHORT, TYP_INT, TYP_UINT, TYP_LONG, TYP_ULONG, TYP_FLOAT, TYP_DOUBLE}
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
// SSE2 64-bit-only Intrinsics
-HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_cvtsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_X64, ConvertToInt64WithTruncation, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvttsd2si}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64, ConvertToUInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Double, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_cvtsi2sd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_i2xmm, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128Int64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64, ConvertScalarToVector128UInt64, 16, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
HARDWARE_INTRINSIC(SSE2_X64, StoreNonTemporal, 16, 2, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movnti, INS_movnti, INS_invalid, INS_invalid}, HW_Category_MemoryStore, HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
// ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
HARDWARE_INTRINSIC(AVX2, CompareGreaterThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, CompareLessThan, 32, 2, {INS_pcmpgtb, INS_invalid, INS_pcmpgtw, INS_invalid, INS_pcmpgtd, INS_invalid, INS_pcmpgtq, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_NoFlag)
HARDWARE_INTRINSIC(AVX2, ExtractVector128, 32, 2, {INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_vextracti128, INS_invalid, INS_invalid}, HW_Category_IMM, HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_mov_xmm2i, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2, ConvertToInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2, ConvertToUInt32, 32, 1, {INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_movd, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SIMDScalar, HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int16, 32, 1, {INS_pmovsxbw, INS_pmovzxbw, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int32, 32, 1, {INS_pmovsxbd, INS_pmovzxbd, INS_pmovsxwd, INS_pmovzxwd, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
HARDWARE_INTRINSIC(AVX2, ConvertToVector256Int64, 32, 1, {INS_pmovsxbq, INS_pmovzxbq, INS_pmovsxwq, INS_pmovzxwq, INS_pmovsxdq, INS_pmovzxdq, INS_invalid, INS_invalid, INS_invalid, INS_invalid}, HW_Category_SimpleSIMD, HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
return ins_Copy(dstType);
}
#if defined(TARGET_XARCH)
- if (dstIsFloatReg)
- {
- return INS_mov_i2xmm;
- }
- else
- {
- return INS_mov_xmm2i;
- }
+ return INS_movd;
#elif defined(TARGET_ARM64)
if (dstIsFloatReg)
{
return (type == TYP_FLOAT) ? INS_ucomiss : INS_ucomisd;
}
-instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
-{
- // On SSE2/AVX - the same instruction is used for moving double/quad word to XMM/YMM register.
- assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
-
-#if !defined(TARGET_64BIT)
- // No 64-bit registers on x86.
- assert((srcType != TYP_LONG) && (srcType != TYP_ULONG));
-#endif // !defined(TARGET_64BIT)
-
- return INS_mov_i2xmm;
-}
-
-instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
-{
- // On SSE2/AVX - the same instruction is used for moving double/quad word of XMM/YMM to an integer register.
- assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
-
-#if !defined(TARGET_64BIT)
- // No 64-bit registers on x86.
- assert((dstType != TYP_LONG) && (dstType != TYP_ULONG));
-#endif // !defined(TARGET_64BIT)
-
- return INS_mov_xmm2i;
-}
-
instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
{
switch (oper)
return INS_vmov;
}
-instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
-{
- assert((dstType == TYP_FLOAT) || (dstType == TYP_DOUBLE));
- assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
-
- if ((srcType == TYP_LONG) || (srcType == TYP_ULONG))
- {
- return INS_vmov_i2d;
- }
- else
- {
- return INS_vmov_i2f;
- }
-}
-
-instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
-{
- assert((srcType == TYP_FLOAT) || (srcType == TYP_DOUBLE));
- assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
-
- if ((dstType == TYP_LONG) || (dstType == TYP_ULONG))
- {
- return INS_vmov_d2i;
- }
- else
- {
- return INS_vmov_f2i;
- }
-}
-
instruction CodeGen::ins_FloatCompare(var_types type)
{
// Not used and not implemented
unreached();
}
-#elif defined(TARGET_ARM64)
-instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
-{
- assert((dstType == TYP_FLOAT) || (dstType == TYP_DOUBLE));
- assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
-
- return INS_mov;
-}
-
-instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
-{
- assert((srcType == TYP_FLOAT) || (srcType == TYP_DOUBLE));
- assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
-
- return INS_mov;
-}
-
-#endif // TARGET_ARM64
+#endif // TARGET_ARM
/*****************************************************************************
*
INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
// These are the SSE instructions used on x86
-INST3(mov_i2xmm, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
-INST3(mov_xmm2i, "movd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x7E), INS_FLAGS_None) // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
INST3(pmovmskb, "pmovmskb", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0xD7), INS_FLAGS_None) // Move the MSB bits of all bytes in a xmm reg to an int reg
INST3(movmskpd, "movmskpd", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x50), INS_FLAGS_None) // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
-INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None)
-INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None)
+INST3(movd, "movd", IUM_WR, PCKDBL(0x7E), BAD_CODE, PCKDBL(0x6E), INS_FLAGS_None) // Move Double/Quadword between mm regs <-> memory/r32/r64 regs, cleanup https://github.com/dotnet/runtime/issues/47943
+INST3(movq, "movq", IUM_WR, PCKDBL(0xD6), BAD_CODE, SSEFLT(0x7E), INS_FLAGS_None) // Move Quadword between memory/mm <-> regs, cleanup https://github.com/dotnet/runtime/issues/47943
INST3(movsdsse2, "movsd", IUM_WR, SSEDBL(0x11), BAD_CODE, SSEDBL(0x10), INS_Flags_IsDstSrcSrcAVXInstruction)
INST3(punpckldq, "punpckldq", IUM_WR, BAD_CODE, BAD_CODE, PCKDBL(0x62), INS_Flags_IsDstDstSrcAVXInstruction)
// For AVX2, move it to all 4 of the 64-bit lanes using:
// vpbroadcastq targetReg, targetReg
- instruction ins;
-
regNumber op1loReg = genConsumeReg(op1lo);
- ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT);
- inst_RV_RV(ins, targetReg, op1loReg, TYP_INT, emitTypeSize(TYP_INT));
+ inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), targetReg, op1loReg, TYP_INT);
regNumber tmpReg = simdNode->GetSingleTempReg();
regNumber op1hiReg = genConsumeReg(op1hi);
- ins = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT);
- inst_RV_RV(ins, tmpReg, op1hiReg, TYP_INT, emitTypeSize(TYP_INT));
+ inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), tmpReg, op1hiReg, TYP_INT);
ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16);
GetEmitter()->emitIns_R_I(ins, EA_16BYTE, tmpReg, 4); // shift left by 4 bytes
regNumber srcReg = genConsumeReg(op1);
if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
{
- ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
- assert(ins != INS_invalid);
- inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType));
+ inst_RV_RV(ins_Copy(srcReg, TYP_FLOAT), targetReg, srcReg, baseType, emitTypeSize(baseType));
srcReg = targetReg;
}
{
if (op1Reg != targetReg)
{
- if (varTypeIsFloating(baseType))
- {
- ins = ins_Copy(targetType);
- }
- else if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
- {
- ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
- }
-
- assert(ins != INS_invalid);
- inst_RV_RV(ins, targetReg, op1Reg, baseType, emitTypeSize(baseType));
+ inst_RV_RV(ins_Copy(op1Reg, TYP_FLOAT), targetReg, op1Reg, baseType, emitTypeSize(baseType));
}
}
// prepare mask
#ifdef TARGET_AMD64
GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X5300000053000000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG);
#else
if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
{
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X53000000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
}
else
{
instruction ins = getOpForSIMDIntrinsic(intrinsicID, baseType);
if (intrinsicID == SIMDIntrinsicConvertToDouble)
{
- // Note that for mov_xmm2i, the int register is always in the reg2 position
- inst_RV_RV(INS_mov_xmm2i, tmpReg, tmpIntReg, TYP_LONG);
+ inst_RV_RV(INS_movd, tmpIntReg, tmpReg, TYP_LONG);
inst_RV_RV(ins, targetReg, tmpIntReg, baseType, emitActualTypeSize(baseType));
}
else
{
inst_RV_RV(ins, tmpIntReg, tmpReg, baseType, emitActualTypeSize(baseType));
- inst_RV_RV(INS_mov_i2xmm, targetReg, tmpIntReg, TYP_LONG);
+ inst_RV_RV(INS_movd, targetReg, tmpIntReg, TYP_LONG);
}
}
// prepare mask for converting upper 32 bits
#ifdef TARGET_AMD64
GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4530000000000000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG);
#else
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
#endif
if (level == SIMD_AVX2_Supported)
// prepare mask for converting lower 32 bits
#ifdef TARGET_AMD64
GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4330000000000000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG);
#else
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
#endif
if (level == SIMD_AVX2_Supported)
// prepare mask for converting upper 32 bits
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
if (level == SIMD_AVX2_Supported)
// prepare mask for converting lower 32 bits
GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000);
- inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+ inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
if (level == SIMD_AVX2_Supported)
{
// We need a temp xmm register if the baseType is not floating point and
// accessing non-zero'th element.
- instruction ins;
-
if (byteShiftCnt != 0)
{
assert(tmpReg != REG_NA);
}
assert((byteShiftCnt > 0) && (byteShiftCnt <= 32));
- ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
+ instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
GetEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), tmpReg, byteShiftCnt);
}
else
}
assert(tmpReg != REG_NA);
- ins = ins_CopyFloatToInt(TYP_FLOAT, baseType);
- // (Note that for mov_xmm2i, the int register is always in the reg2 position.)
- inst_RV_RV(ins, tmpReg, targetReg, baseType);
+ inst_RV_RV(ins_Copy(tmpReg, baseType), targetReg, tmpReg, baseType);
}
}
assert(genIsValidIntReg(tmpReg));
// Move the value from xmm reg to an int reg
- instruction ins = ins_CopyFloatToInt(TYP_FLOAT, TYP_INT);
- // (Note that for mov_xmm2i, the int register is always in the reg2 position.
- inst_RV_RV(ins, op2Reg, tmpReg, baseType);
+ inst_RV_RV(ins_Copy(op2Reg, TYP_INT), tmpReg, op2Reg, baseType);
assert((index >= 0) && (index <= 15));