Fix target register false dependency of lzcnt, tzcnt, and popcnt
authorFei Peng <fei.peng@intel.com>
Thu, 30 Aug 2018 20:23:37 +0000 (13:23 -0700)
committerFei Peng <fei.peng@intel.com>
Thu, 30 Aug 2018 20:23:37 +0000 (13:23 -0700)
src/jit/hwintrinsiccodegenxarch.cpp
src/jit/hwintrinsiclistxarch.h
src/jit/lsraxarch.cpp

index 474cba5..0a9dfb3 100644 (file)
@@ -2179,10 +2179,21 @@ void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node)
         case NI_BMI1_ExtractLowestSetBit:
         case NI_BMI1_GetMaskUpToLowestSetBit:
         case NI_BMI1_ResetLowestSetBit:
+        {
+            assert(op2 == nullptr);
+            assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+            genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()));
+            break;
+        }
+
         case NI_BMI1_TrailingZeroCount:
         {
             assert(op2 == nullptr);
             assert((targetType == TYP_INT) || (targetType == TYP_LONG));
+            // tzcnt has false dependency on the target register on Intel Sandy Bridge and Haswell processors,
+            // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming.
+            regNumber targetReg = node->gtRegNum;
+            getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
             genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet()));
             break;
         }
@@ -2353,6 +2364,10 @@ void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node)
     assert(node->gtHWIntrinsicId == NI_LZCNT_LeadingZeroCount);
 
     genConsumeOperands(node);
+    // lzcnt has false dependency on the target register on Intel Sandy Bridge and Haswell processors,
+    // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming.
+    regNumber targetReg = node->gtRegNum;
+    getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
     genHWIntrinsic_R_RM(node, INS_lzcnt, emitTypeSize(node->TypeGet()));
     genProduceReg(node);
 }
@@ -2379,6 +2394,10 @@ void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node)
     assert(node->gtHWIntrinsicId == NI_POPCNT_PopCount);
 
     genConsumeOperands(node);
+    // popcnt has false dependency on the target register on Intel Sandy Bridge, Haswell, and Skylake processors,
+    // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming.
+    regNumber targetReg = node->gtRegNum;
+    getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg);
     genHWIntrinsic_R_RM(node, INS_popcnt, emitTypeSize(node->TypeGet()));
     genProduceReg(node);
 }
index b7be02f..341da9b 100644 (file)
@@ -480,7 +480,7 @@ HARDWARE_INTRINSIC(BMI1_AndNot,                                     "AndNot",
 HARDWARE_INTRINSIC(BMI1_ExtractLowestSetBit,                        "ExtractLowestSetBit",                          BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsi,           INS_blsi,           INS_blsi,           INS_blsi,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(BMI1_GetMaskUpToLowestSetBit,                    "GetMaskUpToLowestSetBit",                      BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsmsk,         INS_blsmsk,         INS_blsmsk,         INS_blsmsk,         INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
 HARDWARE_INTRINSIC(BMI1_ResetLowestSetBit,                          "ResetLowestSetBit",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_blsr,           INS_blsr,           INS_blsr,           INS_blsr,           INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
-HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZeroCount",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(BMI1_TrailingZeroCount,                          "TrailingZeroCount",                            BMI1,         -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_tzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
@@ -514,7 +514,7 @@ HARDWARE_INTRINSIC(FMA_MultiplySubtractNegatedScalar,               "MultiplySub
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  LZCNT Intrinsics
 HARDWARE_INTRINSIC(LZCNT_IsSupported,                               "get_IsSupported",                              LZCNT,        -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount,                          "LeadingZeroCount",                             LZCNT,        -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_lzcnt,          INS_invalid,        INS_lzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(LZCNT_LeadingZeroCount,                          "LeadingZeroCount",                             LZCNT,        -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_lzcnt,          INS_invalid,        INS_lzcnt,          INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //                 Intrinsic ID                                     Function name                                   ISA         ival        SIMD size       NumArg                                                                                                     instructions                                                                                                     Category                            Flags
@@ -529,7 +529,7 @@ HARDWARE_INTRINSIC(PCLMULQDQ_IsSupported,                           "get_IsSuppo
 // ***************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************************
 //  POPCNT Intrinsics
 HARDWARE_INTRINSIC(POPCNT_IsSupported,                              "get_IsSupported",                              POPCNT,       -1,               0,           0,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_IsSupportedProperty,    HW_Flag_NoFlag)
-HARDWARE_INTRINSIC(POPCNT_PopCount,                                 "PopCount",                                     POPCNT,       -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_popcnt,         INS_invalid,        INS_popcnt,         INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics)
+HARDWARE_INTRINSIC(POPCNT_PopCount,                                 "PopCount",                                     POPCNT,       -1,               0,           1,     {INS_invalid,           INS_invalid,        INS_invalid,        INS_invalid,        INS_popcnt,         INS_invalid,        INS_popcnt,         INS_invalid,        INS_invalid,        INS_invalid},           HW_Category_Scalar,                 HW_Flag_NoFloatingPointUsed|HW_Flag_NoRMWSemantics|HW_Flag_MultiIns)
 #endif // FEATURE_HW_INTRINSIC
 
 #undef HARDWARE_INTRINSIC
index 56e3cc7..8f012c0 100644 (file)
@@ -2584,6 +2584,16 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree)
                 break;
             }
 
+            case NI_BMI1_TrailingZeroCount:
+            case NI_LZCNT_LeadingZeroCount:
+            case NI_POPCNT_PopCount:
+            {
+                assert(numArgs == 1);
+                srcCount += BuildDelayFreeUses(op1);
+                buildUses = false;
+                break;
+            }
+
             default:
             {
                 assert((intrinsicId > NI_HW_INTRINSIC_START) && (intrinsicId < NI_HW_INTRINSIC_END));