Delete mov_i2xmm and mov_xmm2i. (#47843)
authorSergey Andreenko <seandree@microsoft.com>
Tue, 9 Feb 2021 05:27:08 +0000 (21:27 -0800)
committerGitHub <noreply@github.com>
Tue, 9 Feb 2021 05:27:08 +0000 (21:27 -0800)
* Delete mov_i2xmm and mov_xmm2i.

* Delete `ins_CopyFloatToInt`.

* Delete 'ins_CopyIntToFloat'.

* review feedback

src/coreclr/jit/codegen.h
src/coreclr/jit/codegencommon.cpp
src/coreclr/jit/codegenxarch.cpp
src/coreclr/jit/emitxarch.cpp
src/coreclr/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/jit/hwintrinsiclistxarch.h
src/coreclr/jit/instr.cpp
src/coreclr/jit/instrsxarch.h
src/coreclr/jit/simdcodegenxarch.cpp

index 7f40326..2ac972c 100644 (file)
@@ -1450,8 +1450,6 @@ public:
 
     instruction ins_Copy(var_types dstType);
     instruction ins_Copy(regNumber srcReg, var_types dstType);
-    instruction ins_CopyIntToFloat(var_types srcType, var_types dstTyp);
-    instruction ins_CopyFloatToInt(var_types srcType, var_types dstTyp);
     static instruction ins_FloatStore(var_types type = TYP_DOUBLE);
     static instruction ins_FloatCopy(var_types type = TYP_DOUBLE);
     instruction ins_FloatConv(var_types to, var_types from);
index 90c7500..c6fc52a 100644 (file)
@@ -4437,17 +4437,7 @@ void CodeGen::genFnPrologCalleeRegArgs(regNumber xtraReg, bool* pXtraRegClobbere
                 }
 #endif
                 instruction copyIns = ins_Copy(regNum, destMemType);
-#if defined(TARGET_XARCH)
-                // For INS_mov_xmm2i, the source xmm reg comes first.
-                if (copyIns == INS_mov_xmm2i)
-                {
-                    GetEmitter()->emitIns_R_R(copyIns, size, regNum, destRegNum);
-                }
-                else
-#endif // TARGET_XARCH
-                {
-                    GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
-                }
+                GetEmitter()->emitIns_R_R(copyIns, size, destRegNum, regNum);
 #ifdef USING_SCOPE_INFO
                 psiMoveToReg(varNum);
 #endif // USING_SCOPE_INFO
@@ -12067,42 +12057,15 @@ void CodeGen::genRegCopy(GenTree* treeNode)
         }
         return;
     }
+
+    regNumber srcReg     = genConsumeReg(op1);
     var_types targetType = treeNode->TypeGet();
     regNumber targetReg  = treeNode->GetRegNum();
+    assert(srcReg != REG_NA);
     assert(targetReg != REG_NA);
     assert(targetType != TYP_STRUCT);
 
-    // Check whether this node and the node from which we're copying the value have
-    // different register types. This can happen if (currently iff) we have a SIMD
-    // vector type that fits in an integer register, in which case it is passed as
-    // an argument, or returned from a call, in an integer register and must be
-    // copied if it's in an xmm register.
-
-    bool srcFltReg = (varTypeUsesFloatReg(op1));
-    bool tgtFltReg = (varTypeUsesFloatReg(treeNode));
-    if (srcFltReg != tgtFltReg)
-    {
-        instruction ins;
-        regNumber   fpReg;
-        regNumber   intReg;
-        if (tgtFltReg)
-        {
-            ins    = ins_CopyIntToFloat(op1->TypeGet(), treeNode->TypeGet());
-            fpReg  = targetReg;
-            intReg = op1->GetRegNum();
-        }
-        else
-        {
-            ins    = ins_CopyFloatToInt(op1->TypeGet(), treeNode->TypeGet());
-            intReg = targetReg;
-            fpReg  = op1->GetRegNum();
-        }
-        inst_RV_RV(ins, fpReg, intReg, targetType);
-    }
-    else
-    {
-        inst_RV_RV(ins_Copy(targetType), targetReg, genConsumeReg(op1), targetType);
-    }
+    inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);
 
     if (op1->IsLocal())
     {
index c4e184b..f4467fe 100644 (file)
@@ -2743,7 +2743,7 @@ void CodeGen::genCodeForInitBlkUnroll(GenTreeBlk* node)
         }
         else
         {
-            emit->emitIns_R_R(INS_mov_i2xmm, EA_PTRSIZE, srcXmmReg, srcIntReg);
+            emit->emitIns_R_R(INS_movd, EA_PTRSIZE, srcXmmReg, srcIntReg);
             emit->emitIns_R_R(INS_punpckldq, EA_16BYTE, srcXmmReg, srcXmmReg);
 #ifdef TARGET_X86
             // For x86, we need one more to convert it from 8 bytes to 16 bytes.
@@ -5039,9 +5039,9 @@ void CodeGen::genCallInstruction(GenTreeCall* call)
         // integer and floating point registers so, let's do that.
         if (call->IsVarargs() && varTypeIsFloating(argNode))
         {
-            regNumber   targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
-            instruction ins       = ins_CopyFloatToInt(argNode->TypeGet(), TYP_LONG);
-            inst_RV_RV(ins, argNode->GetRegNum(), targetReg);
+            regNumber srcReg    = argNode->GetRegNum();
+            regNumber targetReg = compiler->getCallArgIntRegister(argNode->GetRegNum());
+            inst_RV_RV(ins_Copy(srcReg, TYP_LONG), targetReg, srcReg);
         }
 #endif // FEATURE_VARARG
     }
@@ -5783,9 +5783,8 @@ void CodeGen::genJmpMethod(GenTree* jmp)
 
             if (varTypeIsFloating(loadType))
             {
-                intArgReg       = compiler->getCallArgIntRegister(argReg);
-                instruction ins = ins_CopyFloatToInt(loadType, TYP_LONG);
-                inst_RV_RV(ins, argReg, intArgReg, loadType);
+                intArgReg = compiler->getCallArgIntRegister(argReg);
+                inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
             }
             else
             {
@@ -5824,7 +5823,6 @@ void CodeGen::genJmpMethod(GenTree* jmp)
         regMaskTP remainingIntArgMask = RBM_ARG_REGS & ~fixedIntArgMask;
         if (remainingIntArgMask != RBM_NONE)
         {
-            instruction insCopyIntToFloat = ins_CopyIntToFloat(TYP_LONG, TYP_DOUBLE);
             GetEmitter()->emitDisableGC();
             for (int argNum = 0, argOffset = 0; argNum < MAX_REG_ARG; ++argNum)
             {
@@ -5838,7 +5836,7 @@ void CodeGen::genJmpMethod(GenTree* jmp)
 
                     // also load it in corresponding float arg reg
                     regNumber floatReg = compiler->getCallArgFloatRegister(argReg);
-                    inst_RV_RV(insCopyIntToFloat, floatReg, argReg);
+                    inst_RV_RV(ins_Copy(argReg, TYP_DOUBLE), floatReg, argReg);
                 }
 
                 argOffset += REGSIZE_BYTES;
@@ -6591,8 +6589,9 @@ void CodeGen::genCkfinite(GenTree* treeNode)
     // Copy the floating-point value to an integer register. If we copied a float to a long, then
     // right-shift the value so the high 32 bits of the floating-point value sit in the low 32
     // bits of the integer register.
-    instruction ins = ins_CopyFloatToInt(targetType, (targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
-    inst_RV_RV(ins, op1->GetRegNum(), tmpReg, targetType);
+    regNumber srcReg        = op1->GetRegNum();
+    var_types targetIntType = ((targetType == TYP_FLOAT) ? TYP_INT : TYP_LONG);
+    inst_RV_RV(ins_Copy(srcReg, targetIntType), tmpReg, srcReg, targetType);
     if (targetType == TYP_DOUBLE)
     {
         // right shift by 32 bits to get to exponent.
@@ -6661,7 +6660,7 @@ void CodeGen::genCkfinite(GenTree* treeNode)
 
     // Copy only the low 32 bits. This will be the high order 32 bits of the floating-point
     // value, no matter the floating-point type.
-    inst_RV_RV(ins_CopyFloatToInt(TYP_FLOAT, TYP_INT), copyToTmpSrcReg, tmpReg, TYP_FLOAT);
+    inst_RV_RV(ins_Copy(copyToTmpSrcReg, TYP_INT), tmpReg, copyToTmpSrcReg, TYP_FLOAT);
 
     // Mask exponent with all 1's and check if the exponent is all 1's
     inst_RV_IV(INS_and, tmpReg, expMask, EA_4BYTE);
@@ -7082,22 +7081,7 @@ void CodeGen::genBitCast(var_types targetType, regNumber targetReg, var_types sr
     assert(dstFltReg == genIsValidFloatReg(targetReg));
     if (srcFltReg != dstFltReg)
     {
-        instruction ins;
-        regNumber   fltReg;
-        regNumber   intReg;
-        if (dstFltReg)
-        {
-            ins    = ins_CopyIntToFloat(srcType, targetType);
-            fltReg = targetReg;
-            intReg = srcReg;
-        }
-        else
-        {
-            ins    = ins_CopyFloatToInt(srcType, targetType);
-            intReg = targetReg;
-            fltReg = srcReg;
-        }
-        inst_RV_RV(ins, fltReg, intReg, targetType);
+        inst_RV_RV(ins_Copy(srcReg, targetType), targetReg, srcReg, targetType);
     }
     else if (targetReg != srcReg)
     {
@@ -8760,9 +8744,8 @@ void CodeGen::genProfilingEnterCallback(regNumber initReg, bool* pInitRegZeroed)
 #if FEATURE_VARARG
         if (compiler->info.compIsVarArgs && varTypeIsFloating(loadType))
         {
-            regNumber   intArgReg = compiler->getCallArgIntRegister(argReg);
-            instruction ins       = ins_CopyFloatToInt(loadType, TYP_LONG);
-            inst_RV_RV(ins, argReg, intArgReg, loadType);
+            regNumber intArgReg = compiler->getCallArgIntRegister(argReg);
+            inst_RV_RV(ins_Copy(argReg, TYP_LONG), intArgReg, argReg, loadType);
         }
 #endif //  FEATURE_VARARG
     }
index 0e78dac..a6b3d4a 100644 (file)
@@ -506,6 +506,7 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
     {
         switch (ins)
         {
+            case INS_movd: // TODO-Cleanup: replace with movq, https://github.com/dotnet/runtime/issues/47943.
             case INS_andn:
             case INS_bextr:
             case INS_blsi:
@@ -518,8 +519,6 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
             case INS_cvtss2si:
             case INS_cvtsi2sd:
             case INS_cvtsi2ss:
-            case INS_mov_xmm2i:
-            case INS_mov_i2xmm:
             case INS_movnti:
             case INS_mulx:
             case INS_pdep:
@@ -1239,7 +1238,7 @@ bool emitter::emitInsCanOnlyWriteSSE2OrAVXReg(instrDesc* id)
         case INS_cvtsd2si:
         case INS_cvtss2si:
         case INS_extractps:
-        case INS_mov_xmm2i:
+        case INS_movd:
         case INS_movmskpd:
         case INS_movmskps:
         case INS_mulx:
@@ -8837,15 +8836,7 @@ void emitter::emitDispIns(
         case IF_RRD_RRD:
         case IF_RWR_RRD:
         case IF_RRW_RRD:
-            if (ins == INS_mov_i2xmm)
-            {
-                printf("%s, %s", emitRegName(id->idReg1(), EA_16BYTE), emitRegName(id->idReg2(), attr));
-            }
-            else if (ins == INS_mov_xmm2i)
-            {
-                printf("%s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), EA_16BYTE));
-            }
-            else if (ins == INS_pmovmskb)
+            if (ins == INS_pmovmskb)
             {
                 printf("%s, %s", emitRegName(id->idReg1(), EA_4BYTE), emitRegName(id->idReg2(), attr));
             }
@@ -11447,11 +11438,19 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
     regNumber   reg2 = id->idReg2();
     emitAttr    size = id->idOpSize();
 
-    // Get the 'base' opcode
-    code = insCodeRM(ins);
-    code = AddVexPrefixIfNeeded(ins, code, size);
     if (IsSSEOrAVXInstruction(ins))
     {
+        assert((ins != INS_movd) || (isFloatReg(reg1) != isFloatReg(reg2)));
+
+        if ((ins != INS_movd) || isFloatReg(reg1))
+        {
+            code = insCodeRM(ins);
+        }
+        else
+        {
+            code = insCodeMR(ins);
+        }
+        code = AddVexPrefixIfNeeded(ins, code, size);
         code = insEncodeRMreg(ins, code);
 
         if (TakesRexWPrefix(ins, size))
@@ -11461,6 +11460,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
     }
     else if ((ins == INS_movsx) || (ins == INS_movzx) || (insIsCMOV(ins)))
     {
+        assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
+        code = insCodeRM(ins);
+        code = AddVexPrefixIfNeeded(ins, code, size);
         code = insEncodeRMreg(ins, code) | (int)(size == EA_2BYTE);
 #ifdef TARGET_AMD64
 
@@ -11472,6 +11474,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
     }
     else if (ins == INS_movsxd)
     {
+        assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
+        code = insCodeRM(ins);
+        code = AddVexPrefixIfNeeded(ins, code, size);
         code = insEncodeRMreg(ins, code);
 
 #endif // TARGET_AMD64
@@ -11480,6 +11485,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
     else if ((ins == INS_bsf) || (ins == INS_bsr) || (ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) ||
              (ins == INS_tzcnt))
     {
+        assert(hasCodeRM(ins) && !hasCodeMI(ins) && !hasCodeMR(ins));
+        code = insCodeRM(ins);
+        code = AddVexPrefixIfNeeded(ins, code, size);
         code = insEncodeRMreg(ins, code);
         if ((ins == INS_crc32) && (size > EA_1BYTE))
         {
@@ -11499,7 +11507,9 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
 #endif // FEATURE_HW_INTRINSICS
     else
     {
-        code = insEncodeMRreg(ins, insCodeMR(ins));
+        assert(!TakesVexPrefix(ins));
+        code = insCodeMR(ins);
+        code = insEncodeMRreg(ins, code);
 
         if (ins != INS_test)
         {
@@ -11543,17 +11553,27 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
         }
     }
 
-    regNumber reg345 = REG_NA;
+    regNumber regFor012Bits = reg2;
+    regNumber regFor345Bits = REG_NA;
     if (IsBMIInstruction(ins))
     {
-        reg345 = getBmiRegNumber(ins);
+        regFor345Bits = getBmiRegNumber(ins);
+    }
+    if (regFor345Bits == REG_NA)
+    {
+        regFor345Bits = reg1;
     }
-    if (reg345 == REG_NA)
+    if (ins == INS_movd)
     {
-        reg345 = id->idReg1();
+        assert(isFloatReg(reg1) != isFloatReg(reg2));
+        if (isFloatReg(reg2))
+        {
+            std::swap(regFor012Bits, regFor345Bits);
+        }
     }
-    unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
-    regCode |= insEncodeReg012(ins, reg2, size, &code);
+
+    unsigned regCode = insEncodeReg345(ins, regFor345Bits, size, &code);
+    regCode |= insEncodeReg012(ins, regFor012Bits, size, &code);
 
     if (TakesVexPrefix(ins))
     {
@@ -11648,7 +11668,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
                     }
                 }
 
-                emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                emitGCregLiveUpd(id->idGCref(), reg1, dst);
                 break;
 
             case IF_RRW_RRD:
@@ -11668,13 +11688,13 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
 
                     */
                     case INS_xor:
-                        assert(id->idReg1() == id->idReg2());
-                        emitGCregLiveUpd(id->idGCref(), id->idReg1(), dst);
+                        assert(reg1 == reg2);
+                        emitGCregLiveUpd(id->idGCref(), reg1, dst);
                         break;
 
                     case INS_or:
                     case INS_and:
-                        emitGCregDeadUpd(id->idReg1(), dst);
+                        emitGCregDeadUpd(reg1, dst);
                         break;
 
                     case INS_add:
@@ -11691,7 +11711,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
                                ((regMask & emitThisByrefRegs) && (ins == INS_add || ins == INS_sub)));
 #endif
                         // Mark r1 as holding a byref
-                        emitGCregLiveUpd(GCT_BYREF, id->idReg1(), dst);
+                        emitGCregLiveUpd(GCT_BYREF, reg1, dst);
                         break;
 
                     default:
@@ -11773,15 +11793,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
                 case IF_RWR_RRD:
                 case IF_RRW_RRD:
                 case IF_RWR_RRD_RRD:
-                    // INS_movxmm2i writes to reg2.
-                    if (ins == INS_mov_xmm2i)
-                    {
-                        emitGCregDeadUpd(id->idReg2(), dst);
-                    }
-                    else
-                    {
-                        emitGCregDeadUpd(id->idReg1(), dst);
-                    }
+                    emitGCregDeadUpd(reg1, dst);
                     break;
 
                 default:
@@ -14681,18 +14693,6 @@ emitter::insExecutionCharacteristics emitter::getInsExecutionCharacteristics(ins
             result.insThroughput = PERFSCORE_THROUGHPUT_25C;
             break;
 
-        case INS_mov_xmm2i:
-            // movd  reg, xmm
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_2C;
-            break;
-
-        case INS_mov_i2xmm:
-            // movd  xmm, reg
-            result.insThroughput = PERFSCORE_THROUGHPUT_1C;
-            result.insLatency    = PERFSCORE_LATENCY_1C;
-            break;
-
         case INS_movd:
             if (memAccessKind == PERFSCORE_MEMORY_NONE)
             {
index ddca579..a805128 100644 (file)
@@ -1451,7 +1451,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             {
                 assert(baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG);
                 op1Reg = op1->GetRegNum();
-                emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
+                emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg);
             }
             else
             {
@@ -1562,7 +1562,7 @@ void CodeGen::genSSE41Intrinsic(GenTreeHWIntrinsic* node)
                 {
                     // extract instructions return to GP-registers, so it needs int size as the emitsize
                     inst_RV_TT_IV(ins, emitTypeSize(TYP_INT), tmpTargetReg, op1, i);
-                    emit->emitIns_R_R(INS_mov_i2xmm, EA_4BYTE, targetReg, tmpTargetReg);
+                    emit->emitIns_R_R(INS_movd, EA_4BYTE, targetReg, tmpTargetReg);
                 }
                 else
                 {
@@ -1688,7 +1688,7 @@ void CodeGen::genAvxOrAvx2Intrinsic(GenTreeHWIntrinsic* node)
             assert(numArgs == 1);
             assert((baseType == TYP_INT) || (baseType == TYP_UINT));
             instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-            emit->emitIns_R_R(ins, emitActualTypeSize(baseType), op1Reg, targetReg);
+            emit->emitIns_R_R(ins, emitActualTypeSize(baseType), targetReg, op1Reg);
             break;
         }
 
index dde116c..a9c4696 100644 (file)
@@ -43,7 +43,7 @@ HARDWARE_INTRINSIC(Vector128,       AsVector3,
 HARDWARE_INTRINSIC(Vector128,       AsVector4,                                  16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       AsVector128,                                16,             1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       Create,                                     16,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector128,       CreateScalarUnsafe,                         16,             1,      {INS_mov_i2xmm,         INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector128,       CreateScalarUnsafe,                         16,             1,      {INS_movd,              INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector128,       Dot,                                        16,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
 // The instruction generated for float/double depends on which ISAs are supported
 HARDWARE_INTRINSIC(Vector128,       get_AllBitsSet,                             16,             0,      {INS_pcmpeqd,           INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_pcmpeqd,            INS_cmpps,              INS_cmppd},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
@@ -80,7 +80,7 @@ HARDWARE_INTRINSIC(Vector256,       get_AllBitsSet,
 HARDWARE_INTRINSIC(Vector256,       get_Count,                                  32,             0,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       get_Zero,                                   32,             0,      {INS_xorps,             INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps,              INS_xorps},             HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       Create,                                     32,            -1,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoCodeGen)
-HARDWARE_INTRINSIC(Vector256,       CreateScalarUnsafe,                         32,             1,      {INS_mov_i2xmm,         INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_mov_i2xmm,          INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(Vector256,       CreateScalarUnsafe,                         32,             1,      {INS_movd,              INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movd,               INS_movss,              INS_movsdsse2},         HW_Category_SIMDScalar,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(Vector256,       Dot,                                        32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_NoCodeGen)
 HARDWARE_INTRINSIC(Vector256,       GetElement,                                 32,             2,      {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_Helper,                 HW_Flag_SpecialImport|HW_Flag_NoContainment|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(Vector256,       GetLower,                                   32,             1,      {INS_movdqu,            INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movdqu,             INS_movups,             INS_movupd},            HW_Category_SimpleSIMD,             HW_Flag_SpecialImport|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
@@ -251,17 +251,17 @@ HARDWARE_INTRINSIC(SSE2,            CompareOrdered,
 HARDWARE_INTRINSIC(SSE2,            CompareScalarOrdered,                       16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            CompareUnordered,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmppd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            CompareScalarUnordered,                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cmpsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
-HARDWARE_INTRINSIC(SSE2,            ConvertToInt32,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToInt32,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToInt32WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            ConvertToUInt32,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertToUInt32,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Double,                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2pd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2pd,           INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Double,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtss2sd,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromSecondArg)
 HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Int32,                    16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtps2dq,           INS_cvtpd2dq},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Int32,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Int32,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Int32WithTruncation,      16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttps2dq,          INS_cvttpd2dq},         HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertToVector128Single,                   16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtdq2ps,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtpd2ps},          HW_Category_SimpleSIMD,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128Single,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsd2ss,           INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128UInt32,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2,            ConvertScalarToVector128UInt32,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2,            Divide,                                     16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divpd},             HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(SSE2,            DivideScalar,                               16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_divsd},             HW_Category_SIMDScalar,             HW_Flag_CopyUpperBits)
 HARDWARE_INTRINSIC(SSE2,            Extract,                                    16,              2,     {INS_invalid,           INS_invalid,            INS_pextrw,             INS_pextrw,             INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM|HW_Flag_BaseTypeFromFirstArg|HW_Flag_NoRMWSemantics)
@@ -318,12 +318,12 @@ HARDWARE_INTRINSIC(SSE2,            Xor,
 //                                                                                                      {TYP_BYTE,              TYP_UBYTE,              TYP_SHORT,              TYP_USHORT,             TYP_INT,                TYP_UINT,               TYP_LONG,               TYP_ULONG,              TYP_FLOAT,              TYP_DOUBLE}
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  SSE2 64-bit-only Intrinsics
-HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64,                             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_cvtsd2si},          HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertToInt64WithTruncation,               16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvttsd2si},         HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(SSE2_X64,        ConvertToUInt64,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertToUInt64,                            16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Double,             16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_cvtsi2sd,           INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
-HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Int64,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
-HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128UInt64,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_i2xmm,          INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128Int64,              16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
+HARDWARE_INTRINSIC(SSE2_X64,        ConvertScalarToVector128UInt64,             16,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen)
 HARDWARE_INTRINSIC(SSE2_X64,        StoreNonTemporal,                           16,              2,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movnti,             INS_movnti,             INS_invalid,            INS_invalid},           HW_Category_MemoryStore,            HW_Flag_NoRMWSemantics|HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromSecondArg)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
@@ -519,8 +519,8 @@ HARDWARE_INTRINSIC(AVX2,            CompareEqual,
 HARDWARE_INTRINSIC(AVX2,            CompareGreaterThan,                         32,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            CompareLessThan,                            32,              2,     {INS_pcmpgtb,           INS_invalid,            INS_pcmpgtw,            INS_invalid,            INS_pcmpgtd,            INS_invalid,            INS_pcmpgtq,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_NoFlag)
 HARDWARE_INTRINSIC(AVX2,            ExtractVector128,                           32,              2,     {INS_vextracti128,      INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_vextracti128,       INS_invalid,            INS_invalid},           HW_Category_IMM,                    HW_Flag_FullRangeIMM)
-HARDWARE_INTRINSIC(AVX2,            ConvertToInt32,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(AVX2,            ConvertToUInt32,                            32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_mov_xmm2i,          INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2,            ConvertToInt32,                             32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(AVX2,            ConvertToUInt32,                            32,              1,     {INS_invalid,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_movd,               INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SIMDScalar,             HW_Flag_BaseTypeFromFirstArg|HW_Flag_SpecialCodeGen|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int16,                    32,              1,     {INS_pmovsxbw,          INS_pmovzxbw,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int32,                    32,              1,     {INS_pmovsxbd,          INS_pmovzxbd,           INS_pmovsxwd,           INS_pmovzxwd,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
 HARDWARE_INTRINSIC(AVX2,            ConvertToVector256Int64,                    32,              1,     {INS_pmovsxbq,          INS_pmovzxbq,           INS_pmovsxwq,           INS_pmovzxwq,           INS_pmovsxdq,           INS_pmovzxdq,           INS_invalid,            INS_invalid,            INS_invalid,            INS_invalid},           HW_Category_SimpleSIMD,             HW_Flag_SpecialCodeGen|HW_Flag_BaseTypeFromFirstArg)
index 06ef501..a1135a3 100644 (file)
@@ -1836,14 +1836,7 @@ instruction CodeGen::ins_Copy(regNumber srcReg, var_types dstType)
         return ins_Copy(dstType);
     }
 #if defined(TARGET_XARCH)
-    if (dstIsFloatReg)
-    {
-        return INS_mov_i2xmm;
-    }
-    else
-    {
-        return INS_mov_xmm2i;
-    }
+    return INS_movd;
 #elif defined(TARGET_ARM64)
     if (dstIsFloatReg)
     {
@@ -2021,32 +2014,6 @@ instruction CodeGen::ins_FloatCompare(var_types type)
     return (type == TYP_FLOAT) ? INS_ucomiss : INS_ucomisd;
 }
 
-instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
-{
-    // On SSE2/AVX - the same instruction is used for moving double/quad word to XMM/YMM register.
-    assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
-
-#if !defined(TARGET_64BIT)
-    // No 64-bit registers on x86.
-    assert((srcType != TYP_LONG) && (srcType != TYP_ULONG));
-#endif // !defined(TARGET_64BIT)
-
-    return INS_mov_i2xmm;
-}
-
-instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
-{
-    // On SSE2/AVX - the same instruction is used for moving double/quad word of XMM/YMM to an integer register.
-    assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
-
-#if !defined(TARGET_64BIT)
-    // No 64-bit registers on x86.
-    assert((dstType != TYP_LONG) && (dstType != TYP_ULONG));
-#endif // !defined(TARGET_64BIT)
-
-    return INS_mov_xmm2i;
-}
-
 instruction CodeGen::ins_MathOp(genTreeOps oper, var_types type)
 {
     switch (oper)
@@ -2166,36 +2133,6 @@ instruction CodeGen::ins_FloatCopy(var_types type)
     return INS_vmov;
 }
 
-instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
-{
-    assert((dstType == TYP_FLOAT) || (dstType == TYP_DOUBLE));
-    assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
-
-    if ((srcType == TYP_LONG) || (srcType == TYP_ULONG))
-    {
-        return INS_vmov_i2d;
-    }
-    else
-    {
-        return INS_vmov_i2f;
-    }
-}
-
-instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
-{
-    assert((srcType == TYP_FLOAT) || (srcType == TYP_DOUBLE));
-    assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
-
-    if ((dstType == TYP_LONG) || (dstType == TYP_ULONG))
-    {
-        return INS_vmov_d2i;
-    }
-    else
-    {
-        return INS_vmov_f2i;
-    }
-}
-
 instruction CodeGen::ins_FloatCompare(var_types type)
 {
     // Not used and not implemented
@@ -2308,24 +2245,7 @@ instruction CodeGen::ins_FloatConv(var_types to, var_types from)
     unreached();
 }
 
-#elif defined(TARGET_ARM64)
-instruction CodeGen::ins_CopyIntToFloat(var_types srcType, var_types dstType)
-{
-    assert((dstType == TYP_FLOAT) || (dstType == TYP_DOUBLE));
-    assert((srcType == TYP_INT) || (srcType == TYP_UINT) || (srcType == TYP_LONG) || (srcType == TYP_ULONG));
-
-    return INS_mov;
-}
-
-instruction CodeGen::ins_CopyFloatToInt(var_types srcType, var_types dstType)
-{
-    assert((srcType == TYP_FLOAT) || (srcType == TYP_DOUBLE));
-    assert((dstType == TYP_INT) || (dstType == TYP_UINT) || (dstType == TYP_LONG) || (dstType == TYP_ULONG));
-
-    return INS_mov;
-}
-
-#endif // TARGET_ARM64
+#endif // TARGET_ARM
 
 /*****************************************************************************
  *
index c6a5069..25e5de7 100644 (file)
@@ -181,12 +181,10 @@ INSTMUL(imul_15,        "imul",             IUM_RD, BAD_CODE,     0x4400003868,
 
 INST3(FIRST_SSE_INSTRUCTION, "FIRST_SSE_INSTRUCTION", IUM_WR, BAD_CODE, BAD_CODE, BAD_CODE, INS_FLAGS_None)
 // These are the SSE instructions used on x86
-INST3(mov_i2xmm,        "movd",             IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x6E),                            INS_FLAGS_None)    // Move int reg to a xmm reg. reg1=xmm reg, reg2=int reg
-INST3(mov_xmm2i,        "movd",             IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x7E),                            INS_FLAGS_None)    // Move xmm reg to an int reg. reg1=xmm reg, reg2=int reg
 INST3(pmovmskb,         "pmovmskb",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0xD7),                            INS_FLAGS_None)    // Move the MSB bits of all bytes in a xmm reg to an int reg
 INST3(movmskpd,         "movmskpd",         IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x50),                            INS_FLAGS_None)    // Extract 2-bit sign mask from xmm and store in reg. The upper bits of r32 or r64 are filled with zeros.
-INST3(movd,             "movd",             IUM_WR, PCKDBL(0x7E), BAD_CODE,     PCKDBL(0x6E),                            INS_FLAGS_None)
-INST3(movq,             "movq",             IUM_WR, PCKDBL(0xD6), BAD_CODE,     SSEFLT(0x7E),                            INS_FLAGS_None)
+INST3(movd,             "movd",             IUM_WR, PCKDBL(0x7E), BAD_CODE,     PCKDBL(0x6E),                            INS_FLAGS_None)    // Move Double/Quadword between mm regs <-> memory/r32/r64 regs, cleanup https://github.com/dotnet/runtime/issues/47943
+INST3(movq,             "movq",             IUM_WR, PCKDBL(0xD6), BAD_CODE,     SSEFLT(0x7E),                            INS_FLAGS_None)    // Move Quadword between memory/mm <-> regs, cleanup https://github.com/dotnet/runtime/issues/47943
 INST3(movsdsse2,        "movsd",            IUM_WR, SSEDBL(0x11), BAD_CODE,     SSEDBL(0x10),                            INS_Flags_IsDstSrcSrcAVXInstruction)
 
 INST3(punpckldq,        "punpckldq",        IUM_WR, BAD_CODE,     BAD_CODE,     PCKDBL(0x62),                            INS_Flags_IsDstDstSrcAVXInstruction)
index f1bafb9..6897dd0 100644 (file)
@@ -531,17 +531,13 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
             // For AVX2, move it to all 4 of the 64-bit lanes using:
             //     vpbroadcastq targetReg, targetReg
 
-            instruction ins;
-
             regNumber op1loReg = genConsumeReg(op1lo);
-            ins                = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT);
-            inst_RV_RV(ins, targetReg, op1loReg, TYP_INT, emitTypeSize(TYP_INT));
+            inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), targetReg, op1loReg, TYP_INT);
 
             regNumber tmpReg = simdNode->GetSingleTempReg();
 
             regNumber op1hiReg = genConsumeReg(op1hi);
-            ins                = ins_CopyIntToFloat(TYP_INT, TYP_FLOAT);
-            inst_RV_RV(ins, tmpReg, op1hiReg, TYP_INT, emitTypeSize(TYP_INT));
+            inst_RV_RV(ins_Copy(op1loReg, TYP_FLOAT), tmpReg, op1hiReg, TYP_INT);
 
             ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftLeftInternal, TYP_SIMD16);
             GetEmitter()->emitIns_R_I(ins, EA_16BYTE, tmpReg, 4); // shift left by 4 bytes
@@ -600,9 +596,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
         regNumber srcReg = genConsumeReg(op1);
         if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
         {
-            ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
-            assert(ins != INS_invalid);
-            inst_RV_RV(ins, targetReg, srcReg, baseType, emitTypeSize(baseType));
+            inst_RV_RV(ins_Copy(srcReg, TYP_FLOAT), targetReg, srcReg, baseType, emitTypeSize(baseType));
             srcReg = targetReg;
         }
 
@@ -653,17 +647,7 @@ void CodeGen::genSIMDIntrinsicInit(GenTreeSIMD* simdNode)
         {
             if (op1Reg != targetReg)
             {
-                if (varTypeIsFloating(baseType))
-                {
-                    ins = ins_Copy(targetType);
-                }
-                else if (baseType == TYP_INT || baseType == TYP_UINT || baseType == TYP_LONG || baseType == TYP_ULONG)
-                {
-                    ins = ins_CopyIntToFloat(baseType, TYP_FLOAT);
-                }
-
-                assert(ins != INS_invalid);
-                inst_RV_RV(ins, targetReg, op1Reg, baseType, emitTypeSize(baseType));
+                inst_RV_RV(ins_Copy(op1Reg, TYP_FLOAT), targetReg, op1Reg, baseType, emitTypeSize(baseType));
             }
         }
 
@@ -845,12 +829,12 @@ void CodeGen::genSIMDIntrinsic32BitConvert(GenTreeSIMD* simdNode)
 // prepare mask
 #ifdef TARGET_AMD64
         GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X5300000053000000);
-        inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
+        inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG);
 #else
         if (compiler->getSIMDSupportLevel() == SIMD_AVX2_Supported)
         {
             GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X53000000);
-            inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+            inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
         }
         else
         {
@@ -910,14 +894,13 @@ void CodeGen::genSIMDLo64BitConvert(SIMDIntrinsicID intrinsicID,
     instruction ins = getOpForSIMDIntrinsic(intrinsicID, baseType);
     if (intrinsicID == SIMDIntrinsicConvertToDouble)
     {
-        // Note that for mov_xmm2i, the int register is always in the reg2 position
-        inst_RV_RV(INS_mov_xmm2i, tmpReg, tmpIntReg, TYP_LONG);
+        inst_RV_RV(INS_movd, tmpIntReg, tmpReg, TYP_LONG);
         inst_RV_RV(ins, targetReg, tmpIntReg, baseType, emitActualTypeSize(baseType));
     }
     else
     {
         inst_RV_RV(ins, tmpIntReg, tmpReg, baseType, emitActualTypeSize(baseType));
-        inst_RV_RV(INS_mov_i2xmm, targetReg, tmpIntReg, TYP_LONG);
+        inst_RV_RV(INS_movd, targetReg, tmpIntReg, TYP_LONG);
     }
 }
 
@@ -1008,10 +991,10 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
 // prepare mask for converting upper 32 bits
 #ifdef TARGET_AMD64
         GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4530000000000000);
-        inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
+        inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG);
 #else
         GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000);
-        inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+        inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
         GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 #endif
         if (level == SIMD_AVX2_Supported)
@@ -1030,10 +1013,10 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
 // prepare mask for converting lower 32 bits
 #ifdef TARGET_AMD64
         GetEmitter()->emitIns_R_I(INS_mov, EA_8BYTE, tmpIntReg, (ssize_t)0X4330000000000000);
-        inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_ULONG);
+        inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_ULONG);
 #else
         GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000);
-        inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+        inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
         GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 #endif
         if (level == SIMD_AVX2_Supported)
@@ -1123,7 +1106,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
 
         // prepare mask for converting upper 32 bits
         GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X45300000);
-        inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+        inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
         GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 
         if (level == SIMD_AVX2_Supported)
@@ -1141,7 +1124,7 @@ void CodeGen::genSIMDIntrinsic64BitConvert(GenTreeSIMD* simdNode)
 
         // prepare mask for converting lower 32 bits
         GetEmitter()->emitIns_R_I(INS_mov, EA_4BYTE, tmpIntReg, (ssize_t)0X43300000);
-        inst_RV_RV(INS_mov_i2xmm, tmpReg, tmpIntReg, TYP_UINT);
+        inst_RV_RV(INS_movd, tmpReg, tmpIntReg, TYP_UINT);
         GetEmitter()->emitIns_R_I(INS_pslldq, EA_16BYTE, tmpReg, 4);
 
         if (level == SIMD_AVX2_Supported)
@@ -1883,8 +1866,6 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
         {
             // We need a temp xmm register if the baseType is not floating point and
             // accessing non-zero'th element.
-            instruction ins;
-
             if (byteShiftCnt != 0)
             {
                 assert(tmpReg != REG_NA);
@@ -1895,7 +1876,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
                 }
 
                 assert((byteShiftCnt > 0) && (byteShiftCnt <= 32));
-                ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
+                instruction ins = getOpForSIMDIntrinsic(SIMDIntrinsicShiftRightInternal, TYP_SIMD16);
                 GetEmitter()->emitIns_R_I(ins, emitActualTypeSize(simdType), tmpReg, byteShiftCnt);
             }
             else
@@ -1904,9 +1885,7 @@ void CodeGen::genSIMDIntrinsicGetItem(GenTreeSIMD* simdNode)
             }
 
             assert(tmpReg != REG_NA);
-            ins = ins_CopyFloatToInt(TYP_FLOAT, baseType);
-            // (Note that for mov_xmm2i, the int register is always in the reg2 position.)
-            inst_RV_RV(ins, tmpReg, targetReg, baseType);
+            inst_RV_RV(ins_Copy(tmpReg, baseType), targetReg, tmpReg, baseType);
         }
     }
 
@@ -1987,9 +1966,7 @@ void CodeGen::genSIMDIntrinsicSetItem(GenTreeSIMD* simdNode)
         assert(genIsValidIntReg(tmpReg));
 
         // Move the value from xmm reg to an int reg
-        instruction ins = ins_CopyFloatToInt(TYP_FLOAT, TYP_INT);
-        // (Note that for mov_xmm2i, the int register is always in the reg2 position.
-        inst_RV_RV(ins, op2Reg, tmpReg, baseType);
+        inst_RV_RV(ins_Copy(op2Reg, TYP_INT), tmpReg, op2Reg, baseType);
 
         assert((index >= 0) && (index <= 15));