Updating most of the SSE Compare intrinsics to support containment
authorTanner Gooding <tagoo@outlook.com>
Sat, 13 Jan 2018 04:47:07 +0000 (20:47 -0800)
committerTanner Gooding <tagoo@outlook.com>
Wed, 17 Jan 2018 00:30:10 +0000 (16:30 -0800)
src/jit/codegenlinear.h
src/jit/emitfmtsxarch.h
src/jit/emitxarch.cpp
src/jit/emitxarch.h
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/lowerxarch.cpp

index 91ee30c..6321e0c 100644 (file)
@@ -117,6 +117,7 @@ void genPutArgStkSIMD12(GenTree* treeNode);
 #if FEATURE_HW_INTRINSICS && defined(_TARGET_XARCH_)
 void genHWIntrinsic(GenTreeHWIntrinsic* node);
 void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
+void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins);
 void genSSEIntrinsic(GenTreeHWIntrinsic* node);
 void genSSE2Intrinsic(GenTreeHWIntrinsic* node);
 void genSSE3Intrinsic(GenTreeHWIntrinsic* node);
index d0aef50..953103a 100644 (file)
@@ -125,6 +125,8 @@ IF_DEF(RRW_MRD,     IS_GM_RD|IS_R1_RW,          DSP)      // r/w    reg , read [
 IF_DEF(RRW_MRD_CNS, IS_GM_RD|IS_R1_RW,          DSP_CNS)  // r/w    reg , read [mem], const
 
 IF_DEF(RWR_RRD_MRD, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP)      // write  reg , read reg2 , read [mem]
+IF_DEF(RWR_MRD_CNS, IS_GM_RD|IS_R1_WR,          DSP_CNS)  // write  reg , read [mem], const
+IF_DEF(RWR_RRD_MRD_CNS, IS_GM_RD|IS_R1_WR|IS_R2_RD, DSP_CNS) // write  reg , read reg2 , read [mem], const
 IF_DEF(RWR_MRD_OFF, IS_GM_RD|IS_R1_WR,          DSP)      // write  reg , offset mem
 
 IF_DEF(MRD_RRD,     IS_GM_RD|IS_R1_RD,          DSP)      // read  [mem], read  reg
@@ -151,6 +153,8 @@ IF_DEF(RRW_SRD,     IS_SF_RD|IS_R1_RW,          NONE)     // r/w    reg , read [
 IF_DEF(RRW_SRD_CNS, IS_SF_RD|IS_R1_RW,          CNS )     // r/w    reg , read [stk], const
 
 IF_DEF(RWR_RRD_SRD, IS_SF_RD|IS_R1_WR|IS_R2_RD, NONE)     // write  reg , read  reg2, read [stk]
+IF_DEF(RWR_SRD_CNS, IS_SF_RD|IS_R1_WR,          CNS )     // write  reg , read [stk], const
+IF_DEF(RWR_RRD_SRD_CNS, IS_SF_RD|IS_R1_WR|IS_R2_RD, CNS ) // write  reg , read  reg2, read [stk], const
 
 IF_DEF(SRD_RRD,     IS_SF_RD|IS_R1_RD,          NONE)     // read  [stk], read  reg
 IF_DEF(SWR_RRD,     IS_SF_WR|IS_R1_RD,          NONE)     // write [stk], read  reg
@@ -177,6 +181,8 @@ IF_DEF(RRW_ARD,     IS_AM_RD|IS_R1_RW,          AMD )     // r/w    reg , read [
 IF_DEF(RRW_ARD_CNS, IS_AM_RD|IS_R1_RW,          AMD_CNS)  // r/w    reg , read [adr], const
 
 IF_DEF(RWR_RRD_ARD, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD )     // write  reg , read  reg2, read [adr]
+IF_DEF(RWR_ARD_CNS, IS_AM_RD|IS_R1_WR,          AMD_CNS)  // write  reg , read [adr], const
+IF_DEF(RWR_RRD_ARD_CNS, IS_AM_RD|IS_R1_WR|IS_R2_RD, AMD_CNS) // write  reg , read  reg2, read [adr], const
 
 IF_DEF(ARD_RRD,     IS_AM_RD|IS_R1_RD,          AMD )     // read  [adr], read  reg
 IF_DEF(AWR_RRD,     IS_AM_WR|IS_R1_RD,          AMD )     // write [adr], read  reg
index f380abb..8881092 100644 (file)
@@ -2597,12 +2597,16 @@ emitter::insFormat emitter::emitMapFmtAtoM(insFormat fmt)
             return IF_RRD_MRD;
         case IF_RWR_ARD:
             return IF_RWR_MRD;
+        case IF_RWR_ARD_CNS:
+            return IF_RWR_MRD_CNS;
         case IF_RRW_ARD:
             return IF_RRW_MRD;
         case IF_RRW_ARD_CNS:
             return IF_RRW_MRD_CNS;
         case IF_RWR_RRD_ARD:
             return IF_RWR_RRD_MRD;
+        case IF_RWR_RRD_ARD_CNS:
+            return IF_RWR_RRD_MRD_CNS;
 
         case IF_ARD_RRD:
             return IF_MRD_RRD;
@@ -3926,13 +3930,16 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT
 {
     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
 
+    assert(IsSSEOrAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
+
     ssize_t    offs = indir->Offset();
     instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
 
     id->idIns(ins);
     id->idReg1(reg1);
 
-    emitHandleMemOp(indir, id, IF_RRW_ARD_CNS, ins);
+    emitHandleMemOp(indir, id, IF_RWR_ARD_CNS, ins);
 
     // Plus one for the 1-byte immediate (ival)
     UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
@@ -3945,35 +3952,26 @@ void emitter::emitIns_R_A_I(instruction ins, emitAttr attr, regNumber reg1, GenT
 void emitter::emitIns_R_C_I(
     instruction ins, emitAttr attr, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
 {
+    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
+
+    assert(IsSSEOrAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
+
     // Static always need relocs
     if (!jitStaticFldIsGlobAddr(fldHnd))
     {
         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
     }
 
-    noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
-
     instrDesc*     id = emitNewInstrCnsDsp(attr, ival, offs);
-    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
-
-    // Plus one for the 1 byte immediate (ival)
-    sz += 1;
-
-    // VEX prefix
-    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
-
-    // REX prefix
-    if (IsExtendedReg(reg1, attr))
-    {
-        sz += emitGetRexPrefixSize(ins);
-    }
+    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
 
     id->idIns(ins);
-    id->idInsFmt(IF_RRW_MRD_CNS);
+    id->idInsFmt(IF_RWR_RRD_MRD_CNS);
     id->idReg1(reg1);
     id->idAddr()->iiaFieldHnd = fldHnd;
-    id->idCodeSize(sz);
 
+    id->idCodeSize(sz);
     dispIns(id);
     emitCurIGsize += sz;
 }
@@ -3982,30 +3980,24 @@ void emitter::emitIns_R_S_I(instruction ins, emitAttr attr, regNumber reg1, int
 {
     noway_assert(emitVerifyEncodable(ins, EA_SIZE(attr), reg1));
 
-    instrDesc*     id = emitNewInstrCns(attr, ival);
-    UNATIVE_OFFSET sz = emitInsSizeSV(insCodeRM(ins), varx, offs);
-
-    // Plus one for the 1 byte immediate (ival)
-    sz += 1;
-
-    // VEX prefix
-    sz += emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins));
+    assert(IsSSEOrAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
 
-    // REX prefix
-    if (IsExtendedReg(reg1, attr))
-    {
-        sz += emitGetRexPrefixSize(ins);
-    }
+    instrDesc*     id = emitNewInstrCns(attr, ival);
+    UNATIVE_OFFSET sz =
+        emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
 
     id->idIns(ins);
-    id->idInsFmt(IF_RRW_SRD_CNS);
+    id->idInsFmt(IF_RWR_RRD_SRD_CNS);
     id->idReg1(reg1);
     id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
-    id->idCodeSize(sz);
+
 #ifdef DEBUG
     id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
 #endif
 
+    id->idCodeSize(sz);
+
     dispIns(id);
     emitCurIGsize += sz;
 }
@@ -4110,6 +4102,54 @@ void emitter::emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regN
     emitCurIGsize += sz;
 }
 
+void emitter::emitIns_R_R_A_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt)
+{
+    assert(IsSSEOrAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
+
+    ssize_t    offs = indir->Offset();
+    instrDesc* id   = emitNewInstrAmdCns(attr, offs, ival);
+
+    id->idIns(ins);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+
+    emitHandleMemOp(indir, id, fmt, ins);
+
+    UNATIVE_OFFSET sz = emitInsSizeAM(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+    id->idCodeSize(sz);
+
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
+void emitter::emitIns_R_R_C_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival)
+{
+    assert(IsSSEOrAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
+
+    // Static always need relocs
+    if (!jitStaticFldIsGlobAddr(fldHnd))
+    {
+        attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
+    }
+
+    instrDesc*     id = emitNewInstrCnsDsp(attr, ival, offs);
+    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins)) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+
+    id->idIns(ins);
+    id->idInsFmt(IF_RWR_RRD_MRD_CNS);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaFieldHnd = fldHnd;
+
+    id->idCodeSize(sz);
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
 /**********************************************************************************
 * emitIns_R_R_R_I: Add an instruction with three register operands and an immediate.
 *
@@ -4144,6 +4184,31 @@ void emitter::emitIns_R_R_R_I(
     emitCurIGsize += sz;
 }
 
+void emitter::emitIns_R_R_S_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival)
+{
+    assert(IsSSEOrAVXInstruction(ins));
+    assert(IsThreeOperandAVXInstruction(ins));
+
+    instrDesc*     id = emitNewInstrCns(attr, ival);
+    UNATIVE_OFFSET sz =
+        emitInsSizeSV(insCodeRM(ins), varx, offs) + emitGetVexPrefixAdjustedSize(ins, attr, insCodeRM(ins)) + 1;
+
+    id->idIns(ins);
+    id->idInsFmt(IF_RWR_RRD_SRD_CNS);
+    id->idReg1(reg1);
+    id->idReg2(reg2);
+    id->idAddr()->iiaLclVar.initLclVarAddr(varx, offs);
+
+#ifdef DEBUG
+    id->idDebugOnlyInfo()->idVarRefOffs = emitVarRefOffs;
+#endif
+
+    id->idCodeSize(sz);
+    dispIns(id);
+    emitCurIGsize += sz;
+}
+
 /*****************************************************************************
  *
  *  Add an instruction with a register + static member operands.
@@ -5101,7 +5166,7 @@ void emitter::emitIns_SIMD_R_R_A(
         {
             emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
         }
-        emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RRW_ARD);
+        emitIns_R_A(ins, emitTypeSize(simdtype), reg, indir, IF_RWR_ARD);
     }
 }
 
@@ -5154,6 +5219,40 @@ void emitter::emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1,
     }
 }
 
+void emitter::emitIns_SIMD_R_R_A_I(
+    instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival, var_types simdtype)
+{
+    if (UseVEXEncoding())
+    {
+        emitIns_R_R_A_I(ins, emitTypeSize(simdtype), reg, reg1, indir, ival, IF_RWR_RRD_ARD_CNS);
+    }
+    else
+    {
+        if (reg1 != reg)
+        {
+            emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
+        }
+        emitIns_R_A_I(ins, emitTypeSize(simdtype), reg, indir, ival);
+    }
+}
+
+void emitter::emitIns_SIMD_R_R_C_I(
+    instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival, var_types simdtype)
+{
+    if (UseVEXEncoding())
+    {
+        emitIns_R_R_C_I(ins, emitTypeSize(simdtype), reg, reg1, fldHnd, offs, ival);
+    }
+    else
+    {
+        if (reg1 != reg)
+        {
+            emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
+        }
+        emitIns_R_C_I(ins, emitTypeSize(simdtype), reg, fldHnd, offs, ival);
+    }
+}
+
 void emitter::emitIns_SIMD_R_R_R_I(
     instruction ins, regNumber reg, regNumber reg1, regNumber reg2, int ival, var_types simdtype)
 {
@@ -5170,6 +5269,23 @@ void emitter::emitIns_SIMD_R_R_R_I(
         emitIns_R_R_I(ins, emitTypeSize(simdtype), reg, reg2, ival);
     }
 }
+
+void emitter::emitIns_SIMD_R_R_S_I(
+    instruction ins, regNumber reg, regNumber reg1, int varx, int offs, int ival, var_types simdtype)
+{
+    if (UseVEXEncoding())
+    {
+        emitIns_R_R_S_I(ins, emitTypeSize(simdtype), reg, reg1, varx, offs, ival);
+    }
+    else
+    {
+        if (reg1 != reg)
+        {
+            emitIns_R_R(INS_movaps, emitTypeSize(simdtype), reg, reg1);
+        }
+        emitIns_R_S_I(ins, emitTypeSize(simdtype), reg, varx, offs, ival);
+    }
+}
 #endif
 
 /*****************************************************************************
@@ -7185,14 +7301,15 @@ void emitter::emitDispIns(
             emitDispAddrMode(id);
             break;
 
-        case IF_RRW_ARD_CNS:
+        case IF_RWR_ARD_CNS:
+        {
             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
             emitDispAddrMode(id);
-
             emitGetInsAmdCns(id, &cnsVal);
-            val = cnsVal.cnsVal;
 
+            val = cnsVal.cnsVal;
             printf(", ");
+
             if (cnsVal.cnsReloc)
             {
                 emitDispReloc(val);
@@ -7202,12 +7319,34 @@ void emitter::emitDispIns(
                 goto PRINT_CONSTANT;
             }
             break;
+        }
 
         case IF_RWR_RRD_ARD:
             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
             emitDispAddrMode(id);
             break;
 
+        case IF_RWR_RRD_ARD_CNS:
+        {
+            printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
+            emitDispAddrMode(id);
+            emitGetInsAmdCns(id, &cnsVal);
+
+            val = cnsVal.cnsVal;
+            printf(", ");
+
+            if (cnsVal.cnsReloc)
+            {
+                emitDispReloc(val);
+            }
+            else
+            {
+                goto PRINT_CONSTANT;
+            }
+            
+            break;
+        }
+
         case IF_ARD_RRD:
         case IF_AWR_RRD:
         case IF_ARW_RRD:
@@ -7351,15 +7490,16 @@ void emitter::emitDispIns(
 
             break;
 
-        case IF_RRW_SRD_CNS:
+        case IF_RWR_SRD_CNS:
+        {
             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
             emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
-
             emitGetInsCns(id, &cnsVal);
-            val = cnsVal.cnsVal;
 
+            val = cnsVal.cnsVal;
             printf(", ");
+
             if (cnsVal.cnsReloc)
             {
                 emitDispReloc(val);
@@ -7369,6 +7509,7 @@ void emitter::emitDispIns(
                 goto PRINT_CONSTANT;
             }
             break;
+        }
 
         case IF_RWR_RRD_SRD:
             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
@@ -7376,6 +7517,27 @@ void emitter::emitDispIns(
                              id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
             break;
 
+        case IF_RWR_RRD_SRD_CNS:
+        {
+            printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
+            emitDispFrameRef(id->idAddr()->iiaLclVar.lvaVarNum(), id->idAddr()->iiaLclVar.lvaOffset(),
+                             id->idDebugOnlyInfo()->idVarRefOffs, asmfm);
+            emitGetInsCns(id, &cnsVal);
+
+            val = cnsVal.cnsVal;
+            printf(", ");
+
+            if (cnsVal.cnsReloc)
+            {
+                emitDispReloc(val);
+            }
+            else
+            {
+                goto PRINT_CONSTANT;
+            }
+            break;
+        }
+
         case IF_RRD_RRD:
         case IF_RWR_RRD:
         case IF_RRW_RRD:
@@ -7504,15 +7666,16 @@ void emitter::emitDispIns(
             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
             break;
 
-        case IF_RRW_MRD_CNS:
+        case IF_RWR_MRD_CNS:
+        {
             printf("%s, %s", emitRegName(id->idReg1(), attr), sstr);
             offs = emitGetInsDsp(id);
             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
-
             emitGetInsDcmCns(id, &cnsVal);
-            val = cnsVal.cnsVal;
 
+            val = cnsVal.cnsVal;
             printf(", ");
+
             if (cnsVal.cnsReloc)
             {
                 emitDispReloc(val);
@@ -7522,6 +7685,7 @@ void emitter::emitDispIns(
                 goto PRINT_CONSTANT;
             }
             break;
+        }
 
         case IF_RWR_RRD_MRD:
             printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
@@ -7529,6 +7693,27 @@ void emitter::emitDispIns(
             emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
             break;
 
+        case IF_RWR_RRD_MRD_CNS:
+        {
+            printf("%s, %s, %s", emitRegName(id->idReg1(), attr), emitRegName(id->idReg2(), attr), sstr);
+            offs = emitGetInsDsp(id);
+            emitDispClsVar(id->idAddr()->iiaFieldHnd, offs, ID_INFO_DSP_RELOC);
+            emitGetInsDcmCns(id, &cnsVal);
+
+            val = cnsVal.cnsVal;
+            printf(", ");
+
+            if (cnsVal.cnsReloc)
+            {
+                emitDispReloc(val);
+            }
+            else
+            {
+                goto PRINT_CONSTANT;
+            }
+            break;
+        }
+
         case IF_RWR_MRD_OFF:
 
             printf("%s, %s", emitRegName(id->idReg1(), attr), "offset");
@@ -7980,7 +8165,7 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
     {
         regNumber src1 = id->idReg2();
 
-        if (id->idInsFmt() != IF_RWR_RRD_ARD)
+        if ((id->idInsFmt() != IF_RWR_RRD_ARD) && (id->idInsFmt() != IF_RWR_RRD_ARD_CNS))
         {
             src1 = id->idReg1();
         }
@@ -11549,6 +11734,18 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             sz      = emitSizeOfInsDsc(id);
             break;
 
+        case IF_RWR_ARD_CNS:
+        case IF_RWR_RRD_ARD_CNS:
+        {
+            emitGetInsAmdCns(id, &cnsVal);
+            code    = insCodeRM(ins);
+            code    = AddVexPrefixIfNeeded(ins, code, size);
+            regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+            dst     = emitOutputAM(dst, id, code | regcode, &cnsVal);
+            sz      = emitSizeOfInsDsc(id);
+            break;
+        }
+
         case IF_ARD_RRD:
         case IF_AWR_RRD:
         case IF_ARW_RRD:
@@ -11699,6 +11896,34 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             }
             break;
 
+        case IF_RWR_SRD_CNS:
+        case IF_RWR_RRD_SRD_CNS:
+        {
+            emitGetInsCns(id, &cnsVal);
+            code = insCodeRM(ins);
+
+            // 4-byte AVX instructions are special cased inside emitOutputSV
+            // since they do not have space to encode ModRM byte.
+            if (Is4ByteAVXInstruction(ins))
+            {
+                dst = emitOutputSV(dst, id, code, &cnsVal);
+            }
+            else
+            {
+                code = AddVexPrefixIfNeeded(ins, code, size);
+
+                if (IsDstDstSrcAVXInstruction(ins))
+                {
+                    // encode source operand reg in 'vvvv' bits in 1's compliement form
+                    code = insEncodeReg3456(ins, id->idReg1(), size, code);
+                }
+
+                regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+                dst     = emitOutputSV(dst, id, code | regcode, &cnsVal);
+            }
+            break;
+        }
+
         case IF_SRD_RRD:
         case IF_SWR_RRD:
         case IF_SRW_RRD:
@@ -11815,6 +12040,34 @@ size_t emitter::emitOutputInstr(insGroup* ig, instrDesc* id, BYTE** dp)
             sz = emitSizeOfInsDsc(id);
             break;
 
+        case IF_RWR_MRD_CNS:
+        case IF_RWR_RRD_MRD_CNS:
+        {
+            emitGetInsCns(id, &cnsVal);
+            code = insCodeRM(ins);
+
+            // Special case 4-byte AVX instructions
+            if (Is4ByteAVXInstruction(ins))
+            {
+                dst = emitOutputCV(dst, id, code, &cnsVal);
+            }
+            else
+            {
+                code = AddVexPrefixIfNeeded(ins, code, size);
+
+                if (IsDstDstSrcAVXInstruction(ins))
+                {
+                    // encode source operand reg in 'vvvv' bits in 1's compliement form
+                    code = insEncodeReg3456(ins, id->idReg1(), size, code);
+                }
+
+                regcode = (insEncodeReg345(ins, id->idReg1(), size, &code) << 8);
+                dst     = emitOutputCV(dst, id, code | regcode | 0x0500, &cnsVal);
+            }
+            sz = emitSizeOfInsDsc(id);
+            break;
+        }
+
         case IF_RWR_MRD_OFF:
             code = insCode(ins);
             code = AddVexPrefixIfNeeded(ins, code, size);
index 5bcba76..885629d 100644 (file)
@@ -384,8 +384,16 @@ void emitIns_R_R_S(instruction ins, emitAttr attr, regNumber reg1, regNumber reg
 
 void emitIns_R_R_R(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3);
 
+void emitIns_R_R_A_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, GenTreeIndir* indir, int ival, insFormat fmt);
+
+void emitIns_R_R_C_I(
+    instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, CORINFO_FIELD_HANDLE fldHnd, int offs, int ival);
+
 void emitIns_R_R_R_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, regNumber reg3, int ival);
 
+void emitIns_R_R_S_I(instruction ins, emitAttr attr, regNumber reg1, regNumber reg2, int varx, int offs, int ival);
+
 void emitIns_S(instruction ins, emitAttr attr, int varx, int offs);
 
 void emitIns_S_R(instruction ins, emitAttr attr, regNumber ireg, int varx, int offs);
@@ -447,7 +455,18 @@ void emitIns_SIMD_R_R_C(
     instruction ins, regNumber reg, regNumber reg1, CORINFO_FIELD_HANDLE fldHnd, int offs, var_types simdtype);
 void emitIns_SIMD_R_R_R(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, var_types simdtype);
 void emitIns_SIMD_R_R_S(instruction ins, regNumber reg, regNumber reg1, int varx, int offs, var_types simdtype);
+void emitIns_SIMD_R_R_A_I(
+    instruction ins, regNumber reg, regNumber reg1, GenTreeIndir* indir, int ival, var_types simdtype);
+void emitIns_SIMD_R_R_C_I(instruction          ins,
+                          regNumber            reg,
+                          regNumber            reg1,
+                          CORINFO_FIELD_HANDLE fldHnd,
+                          int                  offs,
+                          int                  ival,
+                          var_types            simdtype);
 void emitIns_SIMD_R_R_R_I(instruction ins, regNumber reg, regNumber reg1, regNumber reg2, int ival, var_types simdtype);
+void emitIns_SIMD_R_R_S_I(
+    instruction ins, regNumber reg, regNumber reg1, int varx, int offs, int ival, var_types simdtype);
 #endif
 
 #if FEATURE_STACK_FP_X87
index 35c6eb3..9c60b27 100644 (file)
@@ -188,6 +188,115 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
     }
 }
 
+void CodeGen::genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins)
+{
+    var_types targetType = node->TypeGet();
+    regNumber targetReg  = node->gtRegNum;
+    GenTree*  op1        = node->gtGetOp1();
+    GenTree*  op2        = node->gtGetOp2();
+    int       ival       = Compiler::ivalOfHWIntrinsic(node->gtHWIntrinsicId);
+    emitter*  emit       = getEmitter();
+
+    // TODO-XArch-CQ: Commutative operations can have op1 be contained
+    // TODO-XArch-CQ: Non-VEX encoded instructions can have both ops contained
+
+    regNumber op1Reg = op1->gtRegNum;
+
+    assert(targetReg != REG_NA);
+    assert(op1Reg != REG_NA);
+
+    if (op2->isContained() || op2->isUsedFromSpillTemp())
+    {
+        TempDsc* tmpDsc = nullptr;
+        unsigned varNum = BAD_VAR_NUM;
+        unsigned offset = (unsigned)-1;
+
+        if (op2->isUsedFromSpillTemp())
+        {
+            assert(op2->IsRegOptional());
+
+            tmpDsc = getSpillTempDsc(op2);
+            varNum = tmpDsc->tdTempNum();
+            offset = 0;
+
+            compiler->tmpRlsTemp(tmpDsc);
+        }
+        else if (op2->isIndir())
+        {
+            GenTreeIndir* memIndir = op2->AsIndir();
+            GenTree*      memBase  = memIndir->gtOp1;
+
+            switch (memBase->OperGet())
+            {
+                case GT_LCL_VAR_ADDR:
+                {
+                    varNum = memBase->AsLclVarCommon()->GetLclNum();
+                    offset = 0;
+
+                    // Ensure that all the GenTreeIndir values are set to their defaults.
+                    assert(memBase->gtRegNum == REG_NA);
+                    assert(!memIndir->HasIndex());
+                    assert(memIndir->Scale() == 1);
+                    assert(memIndir->Offset() == 0);
+
+                    break;
+                }
+
+                case GT_CLS_VAR_ADDR:
+                {
+                    emit->emitIns_SIMD_R_R_C_I(ins, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0, ival,
+                                               targetType);
+                    return;
+                }
+
+                default:
+                {
+                    emit->emitIns_SIMD_R_R_A_I(ins, targetReg, op1Reg, memIndir, ival, targetType);
+                    return;
+                }
+            }
+        }
+        else
+        {
+            switch (op2->OperGet())
+            {
+                case GT_LCL_FLD:
+                {
+                    GenTreeLclFld* lclField = op2->AsLclFld();
+
+                    varNum = lclField->GetLclNum();
+                    offset = lclField->gtLclFld.gtLclOffs;
+                    break;
+                }
+
+                case GT_LCL_VAR:
+                {
+                    assert(op2->IsRegOptional() || !compiler->lvaTable[op2->gtLclVar.gtLclNum].lvIsRegCandidate());
+                    varNum = op2->AsLclVar()->GetLclNum();
+                    offset = 0;
+                    break;
+                }
+
+                default:
+                    unreached();
+                    break;
+            }
+        }
+
+        // Ensure we got a good varNum and offset.
+        // We also need to check for `tmpDsc != nullptr` since spill temp numbers
+        // are negative and start with -1, which also happens to be BAD_VAR_NUM.
+        assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
+        assert(offset != (unsigned)-1);
+
+        emit->emitIns_SIMD_R_R_S_I(ins, targetReg, op1Reg, varNum, offset, ival, targetType);
+    }
+    else
+    {
+        emit->emitIns_SIMD_R_R_R_I(ins, targetReg, op1Reg, op2->gtRegNum, ival, targetType);
+    }
+}
+
 void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
 {
     NamedIntrinsic intrinsicID = node->gtHWIntrinsicId;
@@ -239,6 +348,7 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
         {
             assert(node->TypeGet() == TYP_SIMD16);
             assert(node->gtSIMDBaseType == TYP_FLOAT);
+            assert(Compiler::ivalOfHWIntrinsic(intrinsicID) == -1);
 
             instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
             genHWIntrinsic_R_R_RM(node, ins);
@@ -270,12 +380,12 @@ void CodeGen::genSSEIntrinsic(GenTreeHWIntrinsic* node)
         case NI_SSE_CompareUnordered:
         case NI_SSE_CompareUnorderedScalar:
         {
-            assert(baseType == TYP_FLOAT);
-            op2Reg = op2->gtRegNum;
+            assert(node->TypeGet() == TYP_SIMD16);
+            assert(node->gtSIMDBaseType == TYP_FLOAT);
+            assert(Compiler::ivalOfHWIntrinsic(intrinsicID) != -1);
 
-            instruction ins  = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
-            int         ival = Compiler::ivalOfHWIntrinsic(intrinsicID);
-            emit->emitIns_SIMD_R_R_R_I(ins, targetReg, op1Reg, op2Reg, ival, TYP_SIMD16);
+            instruction ins = Compiler::insOfHWIntrinsic(intrinsicID, node->gtSIMDBaseType);
+            genHWIntrinsic_R_R_RM_I(node, ins);
             break;
         }
 
index fb49d36..a6f8073 100644 (file)
@@ -2314,6 +2314,30 @@ void Lowering::ContainCheckHWIntrinsic(GenTreeHWIntrinsic* node)
         case NI_SSE_AddScalar:
         case NI_SSE_And:
         case NI_SSE_AndNot:
+        case NI_SSE_CompareEqual:
+        case NI_SSE_CompareEqualScalar:
+        case NI_SSE_CompareGreaterThan:
+        case NI_SSE_CompareGreaterThanScalar:
+        case NI_SSE_CompareGreaterThanOrEqual:
+        case NI_SSE_CompareGreaterThanOrEqualScalar:
+        case NI_SSE_CompareLessThan:
+        case NI_SSE_CompareLessThanScalar:
+        case NI_SSE_CompareLessThanOrEqual:
+        case NI_SSE_CompareLessThanOrEqualScalar:
+        case NI_SSE_CompareNotEqual:
+        case NI_SSE_CompareNotEqualScalar:
+        case NI_SSE_CompareNotGreaterThan:
+        case NI_SSE_CompareNotGreaterThanScalar:
+        case NI_SSE_CompareNotGreaterThanOrEqual:
+        case NI_SSE_CompareNotGreaterThanOrEqualScalar:
+        case NI_SSE_CompareNotLessThan:
+        case NI_SSE_CompareNotLessThanScalar:
+        case NI_SSE_CompareNotLessThanOrEqual:
+        case NI_SSE_CompareNotLessThanOrEqualScalar:
+        case NI_SSE_CompareOrdered:
+        case NI_SSE_CompareOrderedScalar:
+        case NI_SSE_CompareUnordered:
+        case NI_SSE_CompareUnorderedScalar:
         case NI_SSE_ConvertToVector128SingleScalar:
         case NI_SSE_Divide:
         case NI_SSE_DivideScalar: