Implementing the AndNot, ExtractLowestSetBit, GetMaskUpToLowestSetBit, and ResetLowes...
authorTanner Gooding <tagoo@outlook.com>
Fri, 6 Jul 2018 06:34:58 +0000 (23:34 -0700)
committerTanner Gooding <tagoo@outlook.com>
Mon, 9 Jul 2018 23:21:18 +0000 (16:21 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/a27def0c523fab5468bd25d9f72b069b07fc3317

src/coreclr/src/jit/codegenlinear.h
src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/emitxarch.cpp
src/coreclr/src/jit/gentree.cpp
src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/src/jit/hwintrinsiclistxarch.h
src/coreclr/src/jit/hwintrinsicxarch.cpp
src/coreclr/src/jit/instrsxarch.h

index 6383985..dfa4bf0 100644 (file)
@@ -117,7 +117,7 @@ void genHWIntrinsic(GenTreeHWIntrinsic* node);
 #if defined(_TARGET_XARCH_)
 void genHWIntrinsic_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
 void genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival);
-void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins);
+void genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr);
 void genHWIntrinsic_R_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, int8_t ival);
 void genHWIntrinsic_R_R_RM_R(GenTreeHWIntrinsic* node, instruction ins);
 void genHWIntrinsic_R_R_R_RM(
index ec1e2ac..ca2d426 100644 (file)
@@ -2172,6 +2172,11 @@ public:
                                                    GenTree*       op1,
                                                    GenTree*       op2,
                                                    NamedIntrinsic hwIntrinsicID);
+    GenTreeHWIntrinsic* gtNewScalarHWIntrinsicNode(var_types      type,
+                                                   GenTree*       op1,
+                                                   GenTree*       op2,
+                                                   GenTree*       op3,
+                                                   NamedIntrinsic hwIntrinsicID);
     GenTree* gtNewMustThrowException(unsigned helper, var_types type, CORINFO_CLASS_HANDLE clsHnd);
     CORINFO_CLASS_HANDLE gtGetStructHandleForHWSIMD(var_types simdType, var_types simdBaseType);
 #endif // FEATURE_HW_INTRINSICS
index f12efef..c52d710 100644 (file)
@@ -50,6 +50,38 @@ bool IsFMAInstruction(instruction ins)
     return (ins >= INS_FIRST_FMA_INSTRUCTION) && (ins <= INS_LAST_FMA_INSTRUCTION);
 }
 
+bool IsBMIInstruction(instruction ins)
+{
+    return (ins >= INS_FIRST_BMI_INSTRUCTION) && (ins <= INS_LAST_BMI_INSTRUCTION);
+}
+
+regNumber getBmiRegNumber(instruction ins)
+{
+    switch (ins)
+    {
+        case INS_blsi:
+        {
+            return (regNumber)3;
+        }
+
+        case INS_blsmsk:
+        {
+            return (regNumber)2;
+        }
+
+        case INS_blsr:
+        {
+            return (regNumber)1;
+        }
+
+        default:
+        {
+            assert(IsBMIInstruction(ins));
+            return REG_NA;
+        }
+    }
+}
+
 regNumber getSseShiftRegNumber(instruction ins)
 {
     switch (ins)
@@ -113,12 +145,16 @@ bool emitter::IsDstDstSrcAVXInstruction(instruction ins)
         case INS_addss:
         case INS_addsubpd:
         case INS_addsubps:
+        case INS_andn:
         case INS_andnpd:
         case INS_andnps:
         case INS_andpd:
         case INS_andps:
         case INS_blendpd:
         case INS_blendps:
+        case INS_blsi:
+        case INS_blsmsk:
+        case INS_blsr:
         case INS_cmppd:
         case INS_cmpps:
         case INS_cmpsd:
@@ -571,6 +607,10 @@ bool TakesRexWPrefix(instruction ins, emitAttr attr)
     {
         switch (ins)
         {
+            case INS_andn:
+            case INS_blsi:
+            case INS_blsmsk:
+            case INS_blsr:
             case INS_cvttsd2si:
             case INS_cvttss2si:
             case INS_cvtsd2si:
@@ -799,7 +839,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c
             // 4-byte opcode: with the bytes ordered as 0x22114433
             // check for a prefix in the 11 position
             BYTE sizePrefix = (code >> 16) & 0xFF;
-            if (sizePrefix != 0 && isPrefix(sizePrefix))
+            if ((sizePrefix != 0) && isPrefix(sizePrefix))
             {
                 // 'pp' bits in byte2 of VEX prefix allows us to encode SIMD size prefixes as two bits
                 //
@@ -810,7 +850,7 @@ unsigned emitter::emitOutputRexOrVexPrefixIfNeeded(instruction ins, BYTE* dst, c
                 switch (sizePrefix)
                 {
                     case 0x66:
-                        vexPrefix |= 0x01;
+                        vexPrefix |= IsBMIInstruction(ins) ? 0x00 : 0x01;
                         break;
                     case 0xF3:
                         vexPrefix |= 0x02;
@@ -4236,8 +4276,7 @@ void emitter::emitIns_R_R_C(
         attr = EA_SET_FLG(attr, EA_DSP_RELOC_FLG);
     }
 
-    instrDesc*     id = emitNewInstrDsp(attr, offs);
-    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
+    instrDesc* id = emitNewInstrDsp(attr, offs);
 
     id->idIns(ins);
     id->idInsFmt(IF_RWR_RRD_MRD);
@@ -4245,6 +4284,7 @@ void emitter::emitIns_R_R_C(
     id->idReg2(reg2);
     id->idAddr()->iiaFieldHnd = fldHnd;
 
+    UNATIVE_OFFSET sz = emitInsSizeCV(id, insCodeRM(ins));
     id->idCodeSize(sz);
 
     dispIns(id);
@@ -9356,7 +9396,17 @@ BYTE* emitter::emitOutputAM(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
             }
         }
 
-        unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+        regNumber reg345 = REG_NA;
+        if (IsBMIInstruction(ins))
+        {
+            reg345 = getBmiRegNumber(ins);
+        }
+        if (reg345 == REG_NA)
+        {
+            reg345 = id->idReg1();
+        }
+        unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
+
         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
 
         if (UseVEXEncoding() && (ins != INS_crc32))
@@ -10098,7 +10148,21 @@ BYTE* emitter::emitOutputSV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
             }
         }
 
-        unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+        regNumber reg345 = REG_NA;
+        if (IsBMIInstruction(ins))
+        {
+            reg345 = getBmiRegNumber(ins);
+        }
+        if (reg345 == REG_NA)
+        {
+            reg345 = id->idReg1();
+        }
+        else
+        {
+            code = insEncodeReg3456(ins, id->idReg1(), size, code);
+        }
+        unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
+
         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
 
         if (UseVEXEncoding() && (ins != INS_crc32))
@@ -10548,7 +10612,21 @@ BYTE* emitter::emitOutputCV(BYTE* dst, instrDesc* id, code_t code, CnsVal* addc)
             }
         }
 
-        unsigned regcode = insEncodeReg345(ins, id->idReg1(), size, &code);
+        regNumber reg345 = REG_NA;
+        if (IsBMIInstruction(ins))
+        {
+            reg345 = getBmiRegNumber(ins);
+        }
+        if (reg345 == REG_NA)
+        {
+            reg345 = id->idReg1();
+        }
+        else
+        {
+            code = insEncodeReg3456(ins, id->idReg1(), size, code);
+        }
+        unsigned regcode = insEncodeReg345(ins, reg345, size, &code);
+
         dst += emitOutputRexOrVexPrefixIfNeeded(ins, dst, code);
 
         if (UseVEXEncoding() && (ins != INS_crc32))
@@ -11201,7 +11279,16 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
         }
     }
 
-    unsigned regCode = insEncodeReg345(ins, reg1, size, &code);
+    regNumber reg345 = REG_NA;
+    if (IsBMIInstruction(ins))
+    {
+        reg345 = getBmiRegNumber(ins);
+    }
+    if (reg345 == REG_NA)
+    {
+        reg345 = id->idReg1();
+    }
+    unsigned regCode = insEncodeReg345(ins, reg345, size, &code);
     regCode |= insEncodeReg012(ins, reg2, size, &code);
 
     if (TakesVexPrefix(ins))
index dd2c6dd..d2e06f9 100644 (file)
@@ -17512,6 +17512,20 @@ GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types      type,
     return new (this, GT_HWIntrinsic) GenTreeHWIntrinsic(type, op1, op2, hwIntrinsicID, TYP_UNKNOWN, 0);
 }
 
+GenTreeHWIntrinsic* Compiler::gtNewScalarHWIntrinsicNode(var_types      type,
+                                                         GenTree*       op1,
+                                                         GenTree*       op2,
+                                                         GenTree*       op3,
+                                                         NamedIntrinsic hwIntrinsicID)
+{
+    SetOpLclRelatedToSIMDIntrinsic(op1);
+    SetOpLclRelatedToSIMDIntrinsic(op2);
+    SetOpLclRelatedToSIMDIntrinsic(op3);
+
+    return new (this, GT_HWIntrinsic)
+        GenTreeHWIntrinsic(type, gtNewArgList(op1, op2, op3), hwIntrinsicID, TYP_UNKNOWN, 0);
+}
+
 //---------------------------------------------------------------------------------------
 // gtNewMustThrowException:
 //    create a throw node (calling into JIT helper) that must be thrown.
index 17c2d10..edb3c30 100644 (file)
@@ -203,7 +203,7 @@ void CodeGen::genHWIntrinsic(GenTreeHWIntrinsic* node)
                 }
                 else
                 {
-                    genHWIntrinsic_R_R_RM(node, ins);
+                    genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize));
                 }
                 break;
             }
@@ -607,13 +607,12 @@ void CodeGen::genHWIntrinsic_R_RM_I(GenTreeHWIntrinsic* node, instruction ins, i
 //    node - The hardware intrinsic node
 //    ins  - The instruction being generated
 //
-void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
+void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins, emitAttr attr)
 {
     var_types targetType = node->TypeGet();
     regNumber targetReg  = node->gtRegNum;
     GenTree*  op1        = node->gtGetOp1();
     GenTree*  op2        = node->gtGetOp2();
-    emitAttr  simdSize   = EA_ATTR(node->gtSIMDSize);
     emitter*  emit       = getEmitter();
 
     // TODO-XArch-CQ: Commutative operations can have op1 be contained
@@ -645,7 +644,7 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
         }
         else if (op2->OperIsHWIntrinsic())
         {
-            emit->emitIns_SIMD_R_R_AR(ins, simdSize, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum);
+            emit->emitIns_SIMD_R_R_AR(ins, attr, targetReg, op1Reg, op2->gtGetOp1()->gtRegNum);
             return;
         }
         else if (op2->isIndir())
@@ -670,13 +669,13 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
 
                 case GT_CLS_VAR_ADDR:
                 {
-                    emit->emitIns_SIMD_R_R_C(ins, simdSize, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0);
+                    emit->emitIns_SIMD_R_R_C(ins, attr, targetReg, op1Reg, memBase->gtClsVar.gtClsVarHnd, 0);
                     return;
                 }
 
                 default:
                 {
-                    emit->emitIns_SIMD_R_R_A(ins, simdSize, targetReg, op1Reg, memIndir);
+                    emit->emitIns_SIMD_R_R_A(ins, attr, targetReg, op1Reg, memIndir);
                     return;
                 }
             }
@@ -714,7 +713,7 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
         assert((varNum != BAD_VAR_NUM) || (tmpDsc != nullptr));
         assert(offset != (unsigned)-1);
 
-        emit->emitIns_SIMD_R_R_S(ins, simdSize, targetReg, op1Reg, varNum, offset);
+        emit->emitIns_SIMD_R_R_S(ins, attr, targetReg, op1Reg, varNum, offset);
     }
     else
     {
@@ -734,7 +733,7 @@ void CodeGen::genHWIntrinsic_R_R_RM(GenTreeHWIntrinsic* node, instruction ins)
             op1Reg = targetReg;
         }
 
-        emit->emitIns_SIMD_R_R_R(ins, simdSize, targetReg, op1Reg, op2Reg);
+        emit->emitIns_SIMD_R_R_R(ins, attr, targetReg, op1Reg, op2Reg);
     }
 }
 
@@ -1552,7 +1551,7 @@ void CodeGen::genSSE2Intrinsic(GenTreeHWIntrinsic* node)
             assert(op1 != nullptr);
             assert(op2 != nullptr);
             instruction ins = HWIntrinsicInfo::lookupIns(intrinsicId, baseType);
-            genHWIntrinsic_R_R_RM(node, ins);
+            genHWIntrinsic_R_R_RM(node, ins, EA_ATTR(node->gtSIMDSize));
             break;
         }
 
@@ -2145,6 +2144,18 @@ void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
 
     switch (intrinsicId)
     {
+        case NI_BMI1_AndNot:
+        {
+            assert(op2 != nullptr);
+            assert(op1->TypeGet() == op2->TypeGet());
+            assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+            genHWIntrinsic_R_R_RM(node, ins, emitTypeSize(node->TypeGet()));
+            break;
+        }
+
+        case NI_BMI1_ExtractLowestSetBit:
+        case NI_BMI1_GetMaskUpToLowestSetBit:
+        case NI_BMI1_ResetLowestSetBit:
         case NI_BMI1_TrailingZeroCount:
         {
             assert(op2 == nullptr);
index 0804de6..ed00c05 100644 (file)
@@ -462,6 +462,10 @@ HARDWARE_INTRINSIC(AES_IsSupported,                                 "get_IsSuppo
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI1 Intrinsics
 HARDWARE_INTRINSIC(BMI1_IsSupported,                                "get_IsSupported",                              BMI1,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(BMI1_AndNot,                                     "AndNot",                                       BMI1,         -1,               0,           2,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_andn,           INS_andn,           INS_andn,           INS_andn,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_ExtractLowestSetBit,                        "ExtractLowestSetBit",                          BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsi,           INS_blsi,           INS_blsi,           INS_blsi,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_GetMaskUpToLowestSetBit,                    "GetMaskUpToLowestSetBit",                      BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsmsk,         INS_blsmsk,         INS_blsmsk,         INS_blsmsk,         INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_ResetLowestSetBit,                          "ResetLowestSetBit",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsr,           INS_blsr,           INS_blsr,           INS_blsr,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZeroCount",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
index 994ea3c..0a5e891 100644 (file)
@@ -1291,6 +1291,19 @@ GenTree* Compiler::impBMI1Intrinsic(NamedIntrinsic        intrinsic,
 
     switch (intrinsic)
     {
+        case NI_BMI1_AndNot:
+        {
+            assert(sig->numArgs == 2);
+
+            GenTree*  op2      = impPopStack().val;
+            GenTree*  op1      = impPopStack().val;
+
+            return gtNewScalarHWIntrinsicNode(callType, op1, op2, intrinsic);
+        }
+
+        case NI_BMI1_ExtractLowestSetBit:
+        case NI_BMI1_GetMaskUpToLowestSetBit:
+        case NI_BMI1_ResetLowestSetBit:
         case NI_BMI1_TrailingZeroCount:
         {
             assert(sig->numArgs == 1);
index cf3f45d..477420e 100644 (file)
@@ -564,6 +564,15 @@ INST3(vfnmsub213ss,   "fmnsub213ss",   0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE,
 INST3(vfnmsub231ss,   "fmnsub231ss",   0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0xBF))   //
 INST3(LAST_FMA_INSTRUCTION, "LAST_FMA_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
 
+// BMI1
+INST3(FIRST_BMI_INSTRUCTION, "FIRST_BMI_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+INST3(andn,           "andn",          0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0xF2))   // Logical AND NOT
+INST3(blsi,           "blsi",          0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0xF3))   // Extract Lowest Set Isolated Bit
+INST3(blsmsk,         "blsmsk",        0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0xF3))   // Get Mask Up to Lowest Set Bit
+INST3(blsr,           "blsr",          0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSE38(0xF3))   // Reset Lowest Set Bit
+
+INST3(LAST_BMI_INSTRUCTION, "LAST_BMI_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
+
 INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE, BAD_CODE, BAD_CODE)
 
 // Scalar instructions in SSE4.2