Implementing the x86 TrailingZeroCount HWIntrinsic
authorTanner Gooding <tagoo@outlook.com>
Wed, 4 Jul 2018 23:32:27 +0000 (16:32 -0700)
committerTanner Gooding <tagoo@outlook.com>
Mon, 9 Jul 2018 23:21:18 +0000 (16:21 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/a58184ff85db3c60014dbc5d05831131c2355fb0

src/coreclr/src/jit/emitxarch.cpp
src/coreclr/src/jit/hwintrinsiccodegenxarch.cpp
src/coreclr/src/jit/hwintrinsiclistxarch.h
src/coreclr/src/jit/hwintrinsicxarch.cpp
src/coreclr/src/jit/instrsxarch.h

index 0be0dae..f12efef 100644 (file)
@@ -11136,7 +11136,7 @@ BYTE* emitter::emitOutputRR(BYTE* dst, instrDesc* id)
 #endif // _TARGET_AMD64_
     }
 #ifdef FEATURE_HW_INTRINSICS
-    else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt))
+    else if ((ins == INS_crc32) || (ins == INS_lzcnt) || (ins == INS_popcnt) || (ins == INS_tzcnt))
     {
         code = insEncodeRMreg(ins, code);
         if ((ins == INS_crc32) && (size > EA_1BYTE))
index ce58f8b..17c2d10 100644 (file)
@@ -2126,7 +2126,41 @@ void CodeGen::genAESIntrinsic(GenTreeHWIntrinsic* node)
 //
 void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
 {
-    NYI("Implement BMI1 intrinsic code generation");
+    NamedIntrinsic intrinsicId = node->gtHWIntrinsicId;
+    regNumber      targetReg   = node->gtRegNum;
+    GenTree*       op1         = node->gtGetOp1();
+    GenTree*       op2         = node->gtGetOp2();
+    var_types      baseType    = node->gtSIMDBaseType;
+    var_types      targetType  = node->TypeGet();
+    instruction    ins         = HWIntrinsicInfo::lookupIns(intrinsicId, targetType);
+    emitter*       emit        = getEmitter();
+
+    assert(targetReg != REG_NA);
+    assert(op1 != nullptr);
+
+    if (!op1->OperIsList())
+    {
+        genConsumeOperands(node);
+    }
+
+    switch (intrinsicId)
+    {
+        case NI_BMI1_TrailingZeroCount:
+        {
+            assert(op2 == nullptr);
+            assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+            genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()));
+            break;
+        }
+
+        default:
+        {
+            unreached();
+            break;
+        }
+    }
+
+    genProduceReg(node);
 }
 
 //------------------------------------------------------------------------
index e2fd071..0804de6 100644 (file)
@@ -462,6 +462,7 @@ HARDWARE_INTRINSIC(AES_IsSupported,                                 "get_IsSuppo
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  BMI1 Intrinsics
 HARDWARE_INTRINSIC(BMI1_IsSupported,                                "get_IsSupported",                              BMI1,         -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
+HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZeroCount",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
index 1fe22cf..994ea3c 100644 (file)
@@ -379,7 +379,6 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa)
     {
         // These ISAs have no implementation
         case InstructionSet_AES:
-        case InstructionSet_BMI1:
         case InstructionSet_BMI2:
         case InstructionSet_PCLMULQDQ:
         {
@@ -389,6 +388,7 @@ bool HWIntrinsicInfo::isFullyImplementedIsa(InstructionSet isa)
         // These ISAs are partially implemented
         case InstructionSet_AVX:
         case InstructionSet_AVX2:
+        case InstructionSet_BMI1:
         case InstructionSet_SSE42:
         {
             return true;
@@ -1287,7 +1287,23 @@ GenTree* Compiler::impBMI1Intrinsic(NamedIntrinsic        intrinsic,
                                     CORINFO_SIG_INFO*     sig,
                                     bool                  mustExpand)
 {
-    return nullptr;
+    var_types callType = JITtype2varType(sig->retType);
+
+    switch (intrinsic)
+    {
+        case NI_BMI1_TrailingZeroCount:
+        {
+            assert(sig->numArgs == 1);
+            GenTree* op1 = impPopStack().val;
+            return gtNewScalarHWIntrinsicNode(callType, op1, intrinsic);
+        }
+
+        default:
+        {
+            unreached();
+            return nullptr;
+        }
+    }
 }
 
 GenTree* Compiler::impBMI2Intrinsic(NamedIntrinsic        intrinsic,
index 489baa8..cf3f45d 100644 (file)
@@ -569,6 +569,9 @@ INST3(LAST_AVX_INSTRUCTION, "LAST_AVX_INSTRUCTION",  0, IUM_WR, 0, 0, BAD_CODE,
 // Scalar instructions in SSE4.2
 INST3( crc32,        "crc32"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, PACK4(0xF2, 0x0F, 0x38, 0xF0))
 
+// BMI1
+INST3( tzcnt,        "tzcnt"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEFLT(0xBC))    // Count the Number of Trailing Zero Bits
+
 // LZCNT
 INST3( lzcnt,        "lzcnt"       , 0, IUM_WR, 0, 0, BAD_CODE,     BAD_CODE, SSEFLT(0xBD))