From 6cd754ad87db79d7dcfbde0374b2e10ab8d3d616 Mon Sep 17 00:00:00 2001 From: Pent Ploompuu Date: Mon, 3 Sep 2018 22:48:58 +0300 Subject: [PATCH] Improved fix for target register false dependency of lzcnt, tzcnt, and popcnt --- src/jit/codegen.h | 1 + src/jit/hwintrinsiccodegenxarch.cpp | 64 ++++++++++++++++++++++++++++--------- src/jit/lsraxarch.cpp | 10 ------ 3 files changed, 50 insertions(+), 25 deletions(-) diff --git a/src/jit/codegen.h b/src/jit/codegen.h index 9eb1ba3..0f06205 100644 --- a/src/jit/codegen.h +++ b/src/jit/codegen.h @@ -888,6 +888,7 @@ protected: void genLZCNTIntrinsic(GenTreeHWIntrinsic* node); void genPCLMULQDQIntrinsic(GenTreeHWIntrinsic* node); void genPOPCNTIntrinsic(GenTreeHWIntrinsic* node); + void genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins); template void genHWIntrinsicJumpTableFallback(NamedIntrinsic intrinsic, regNumber nonConstImmReg, diff --git a/src/jit/hwintrinsiccodegenxarch.cpp b/src/jit/hwintrinsiccodegenxarch.cpp index 0a9dfb3..f3fb366 100644 --- a/src/jit/hwintrinsiccodegenxarch.cpp +++ b/src/jit/hwintrinsiccodegenxarch.cpp @@ -2190,11 +2190,7 @@ void CodeGen::genBMI1Intrinsic(GenTreeHWIntrinsic* node) { assert(op2 == nullptr); assert((targetType == TYP_INT) || (targetType == TYP_LONG)); - // tzcnt has false dependency on the target register on Intel Sandy Bridge and Haswell processors, - // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming. - regNumber targetReg = node->gtRegNum; - getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet())); + genXCNTIntrinsic(node, ins); break; } @@ -2364,11 +2360,7 @@ void CodeGen::genLZCNTIntrinsic(GenTreeHWIntrinsic* node) assert(node->gtHWIntrinsicId == NI_LZCNT_LeadingZeroCount); genConsumeOperands(node); - // lzcnt has false dependency on the target register on Intel Sandy Bridge and Haswell processors, - // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming. - regNumber targetReg = node->gtRegNum; - getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - genHWIntrinsic_R_RM(node, INS_lzcnt, emitTypeSize(node->TypeGet())); + genXCNTIntrinsic(node, INS_lzcnt); genProduceReg(node); } @@ -2394,12 +2386,54 @@ void CodeGen::genPOPCNTIntrinsic(GenTreeHWIntrinsic* node) assert(node->gtHWIntrinsicId == NI_POPCNT_PopCount); genConsumeOperands(node); - // popcnt has false dependency on the target register on Intel Sandy Bridge, Haswell, and Skylake processors, - // so insert a `XOR target, target` to break the dependency via XOR triggering register renaming. - regNumber targetReg = node->gtRegNum; - getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); - genHWIntrinsic_R_RM(node, INS_popcnt, emitTypeSize(node->TypeGet())); + genXCNTIntrinsic(node, INS_popcnt); genProduceReg(node); } +//------------------------------------------------------------------------ +// genXCNTIntrinsic: Generates the code for a lzcnt/tzcnt/popcnt hardware intrinsic node, breaks false dependencies on +// the target register +// +// Arguments: +// node - The hardware intrinsic node +// ins - The instruction being generated +// +void CodeGen::genXCNTIntrinsic(GenTreeHWIntrinsic* node, instruction ins) +{ + // LZCNT/TZCNT/POPCNT have a false dependency on the target register on Intel Sandy Bridge, Haswell, and Skylake + // (POPCNT only) processors, so insert a `XOR target, target` to break the dependency via XOR triggering register + // renaming, but only if it's not an actual dependency. + + GenTree* op1 = node->gtGetOp1(); + regNumber sourceReg1 = REG_NA; + regNumber sourceReg2 = REG_NA; + + if (!op1->isContained()) + { + sourceReg1 = op1->gtRegNum; + } + else if (op1->isIndir()) + { + GenTreeIndir* indir = op1->AsIndir(); + GenTree* memBase = indir->Base(); + + if (memBase != nullptr) + { + sourceReg1 = memBase->gtRegNum; + } + + if (indir->HasIndex()) + { + sourceReg2 = indir->Index()->gtRegNum; + } + } + + regNumber targetReg = node->gtRegNum; + if ((targetReg != sourceReg1) && (targetReg != sourceReg2)) + { + getEmitter()->emitIns_R_R(INS_xor, EA_4BYTE, targetReg, targetReg); + } + genHWIntrinsic_R_RM(node, ins, emitTypeSize(node->TypeGet())); +} + #endif // FEATURE_HW_INTRINSICS diff --git a/src/jit/lsraxarch.cpp b/src/jit/lsraxarch.cpp index 8f012c0..56e3cc7 100644 --- a/src/jit/lsraxarch.cpp +++ b/src/jit/lsraxarch.cpp @@ -2584,16 +2584,6 @@ int LinearScan::BuildHWIntrinsic(GenTreeHWIntrinsic* intrinsicTree) break; } - case NI_BMI1_TrailingZeroCount: - case NI_LZCNT_LeadingZeroCount: - case NI_POPCNT_PopCount: - { - assert(numArgs == 1); - srcCount += BuildDelayFreeUses(op1); - buildUses = false; - break; - } - default: { assert((intrinsicId > NI_HW_INTRINSIC_START) && (intrinsicId < NI_HW_INTRINSIC_END)); -- 2.7.4