From 8a6e132f8b11c60d775987e9a4f400822e571267 Mon Sep 17 00:00:00 2001 From: Mike Danes Date: Sat, 15 Apr 2017 21:43:14 +0300 Subject: [PATCH] Do unsigned magic division Commit migrated from https://github.com/dotnet/coreclr/commit/64a4d4969363dd8a96afece3ff48e1b20cccba68 --- src/coreclr/src/jit/codegenxarch.cpp | 4 - src/coreclr/src/jit/jitstd/type_traits.h | 27 ++++ src/coreclr/src/jit/lower.cpp | 207 +++++++++++++++++++++++++++---- src/coreclr/src/jit/lower.h | 2 +- src/coreclr/src/jit/utils.cpp | 177 ++++++++++++++++++++++++++ src/coreclr/src/jit/utils.h | 8 ++ 6 files changed, 393 insertions(+), 32 deletions(-) diff --git a/src/coreclr/src/jit/codegenxarch.cpp b/src/coreclr/src/jit/codegenxarch.cpp index 6bb1242..e2f3187 100644 --- a/src/coreclr/src/jit/codegenxarch.cpp +++ b/src/coreclr/src/jit/codegenxarch.cpp @@ -535,10 +535,6 @@ void CodeGen::genCodeForNegNot(GenTree* tree) // Generate code to get the high N bits of a N*N=2N bit multiplication result void CodeGen::genCodeForMulHi(GenTreeOp* treeNode) { - if (treeNode->OperGet() == GT_MULHI) - { - assert(!(treeNode->gtFlags & GTF_UNSIGNED)); - } assert(!treeNode->gtOverflowEx()); regNumber targetReg = treeNode->gtRegNum; diff --git a/src/coreclr/src/jit/jitstd/type_traits.h b/src/coreclr/src/jit/jitstd/type_traits.h index f0f8518..06c8ad1 100644 --- a/src/coreclr/src/jit/jitstd/type_traits.h +++ b/src/coreclr/src/jit/jitstd/type_traits.h @@ -194,4 +194,31 @@ struct make_unsigned<__int64> typedef unsigned __int64 type; }; +template +struct make_signed +{ +}; + +template<> +struct make_signed +{ + typedef signed int type; +}; + +#ifndef _HOST_UNIX_ + +template<> +struct make_signed +{ + typedef signed long type; +}; + +#endif // !_HOST_UNIX_ + +template<> +struct make_signed +{ + typedef signed __int64 type; +}; + } // namespace jit_std diff --git a/src/coreclr/src/jit/lower.cpp b/src/coreclr/src/jit/lower.cpp index 5c46d3d..70b46f0 100644 --- a/src/coreclr/src/jit/lower.cpp +++ b/src/coreclr/src/jit/lower.cpp @@ -144,7 +144,7 @@ GenTree* Lowering::LowerNode(GenTree* node) case GT_UDIV: case GT_UMOD: - LowerUnsignedDivOrMod(node); + return LowerUnsignedDivOrMod(node->AsOp()); break; case GT_DIV: @@ -3979,46 +3979,199 @@ GenTree* Lowering::LowerAdd(GenTree* node) } //------------------------------------------------------------------------ -// LowerUnsignedDivOrMod: transform GT_UDIV/GT_UMOD nodes with a const power of 2 -// divisor into GT_RSZ/GT_AND nodes. +// LowerUnsignedDivOrMod: Lowers a GT_UDIV/GT_UMOD node. // // Arguments: -// node - pointer to the GT_UDIV/GT_UMOD node to be lowered +// divMod - pointer to the GT_UDIV/GT_UMOD node to be lowered // -void Lowering::LowerUnsignedDivOrMod(GenTree* node) +// Notes: +// - Transform UDIV/UMOD by power of 2 into RSZ/AND +// - Transform UDIV by constant >= 2^(N-1) into GE +// - Transform UDIV/UMOD by constant >= 3 into "magic division" + +GenTree* Lowering::LowerUnsignedDivOrMod(GenTreeOp* divMod) { - assert((node->OperGet() == GT_UDIV) || (node->OperGet() == GT_UMOD)); + assert(divMod->OperIs(GT_UDIV, GT_UMOD)); - GenTree* divisor = node->gtGetOp2(); - GenTree* dividend = node->gtGetOp1(); + GenTree* next = divMod->gtNext; + GenTree* dividend = divMod->gtGetOp1(); + GenTree* divisor = divMod->gtGetOp2(); - if (divisor->IsCnsIntOrI() -#ifdef _TARGET_X86_ - && (dividend->OperGet() != GT_LONG) +#if !defined(_TARGET_64BIT_) + if (dividend->OperIs(GT_LONG)) + { + return next; + } #endif - ) + + if (!divisor->IsCnsIntOrI()) { - size_t divisorValue = static_cast(divisor->gtIntCon.IconValue()); + return next; + } - if (isPow2(divisorValue)) + if (dividend->IsCnsIntOrI()) + { + // We shouldn't see a divmod with constant operands here but if we do then it's likely + // because optimizations are disabled or it's a case that's supposed to throw an exception. + // Don't optimize this. + return next; + } + + const var_types type = divMod->TypeGet(); + assert((type == TYP_INT) || (type == TYP_I_IMPL)); + + size_t divisorValue = static_cast(divisor->AsIntCon()->IconValue()); + + if (type == TYP_INT) + { + // Clear up the upper 32 bits of the value, they may be set to 1 because constants + // are treated as signed and stored in ssize_t which is 64 bit in size on 64 bit targets. + divisorValue &= UINT32_MAX; + } + + if (divisorValue == 0) + { + return next; + } + + const bool isDiv = divMod->OperIs(GT_UDIV); + + if (isPow2(divisorValue)) + { + genTreeOps newOper; + + if (isDiv) + { + newOper = GT_RSZ; + divisorValue = genLog2(divisorValue); + } + else { - genTreeOps newOper; + newOper = GT_AND; + divisorValue -= 1; + } - if (node->OperGet() == GT_UDIV) - { - newOper = GT_RSZ; - divisorValue = genLog2(divisorValue); - } - else - { - newOper = GT_AND; - divisorValue -= 1; - } + divMod->SetOper(newOper); + divisor->AsIntCon()->SetIconValue(divisorValue); + + return next; + } + + if (isDiv) + { + // If the divisor is greater or equal than 2^(N - 1) then the result is 1 + // iff the dividend is greater or equal than the divisor. + if (((type == TYP_INT) && (divisorValue > (UINT32_MAX / 2))) || + ((type == TYP_LONG) && (divisorValue > (UINT64_MAX / 2)))) + { + divMod->SetOper(GT_GE); + divMod->gtFlags |= GTF_UNSIGNED; + return next; + } + } + +// TODO-ARM-CQ: Currently there's no GT_MULHI for ARM32/64 +#ifdef _TARGET_XARCH_ + if (!comp->opts.MinOpts() && (divisorValue >= 3)) + { + size_t magic; + bool add; + int shift; - node->SetOper(newOper); - divisor->gtIntCon.SetIconValue(divisorValue); + if (type == TYP_INT) + { + magic = MagicDivide::GetUnsigned32Magic(static_cast(divisorValue), &add, &shift); } + else + { +#ifdef _TARGET_64BIT_ + magic = MagicDivide::GetUnsigned64Magic(static_cast(divisorValue), &add, &shift); +#else + unreached(); +#endif + } + + // Depending on the "add" flag returned by GetUnsignedMagicNumberForDivide we need to generate: + // add == false (when divisor == 3 for example): + // div = (dividend MULHI magic) RSZ shift + // add == true (when divisor == 7 for example): + // mulhi = dividend MULHI magic + // div = (((dividend SUB mulhi) RSZ 1) ADD mulhi)) RSZ (shift - 1) + const bool requiresAdjustment = add; + const bool requiresDividendMultiuse = requiresAdjustment || !isDiv; + const unsigned curBBWeight = m_block->getBBWeight(comp); + unsigned dividendLclNum = BAD_VAR_NUM; + + if (requiresDividendMultiuse) + { + LIR::Use dividendUse(BlockRange(), &divMod->gtOp1, divMod); + dividendLclNum = dividendUse.ReplaceWithLclVar(comp, curBBWeight); + dividend = divMod->gtGetOp1(); + } + + // Insert a new GT_MULHI node before the existing GT_UDIV/GT_UMOD node. + // The existing node will later be transformed into a GT_RSZ/GT_SUB that + // computes the final result. This way don't need to find and change the use + // of the existing node. + GenTree* mulhi = comp->gtNewOperNode(GT_MULHI, type, dividend, divisor); + mulhi->gtFlags |= GTF_UNSIGNED; + divisor->AsIntCon()->SetIconValue(magic); + BlockRange().InsertBefore(divMod, mulhi); + + if (requiresAdjustment) + { + GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type); + GenTree* sub = comp->gtNewOperNode(GT_SUB, type, dividend, mulhi); + BlockRange().InsertBefore(divMod, dividend, sub); + comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); + + GenTree* one = comp->gtNewIconNode(1, TYP_INT); + GenTree* rsz = comp->gtNewOperNode(GT_RSZ, type, sub, one); + BlockRange().InsertBefore(divMod, one, rsz); + + LIR::Use mulhiUse(BlockRange(), &sub->gtOp.gtOp2, sub); + unsigned mulhiLclNum = mulhiUse.ReplaceWithLclVar(comp, curBBWeight); + + GenTree* mulhiCopy = comp->gtNewLclvNode(mulhiLclNum, type); + GenTree* add = comp->gtNewOperNode(GT_ADD, type, rsz, mulhiCopy); + BlockRange().InsertBefore(divMod, mulhiCopy, add); + comp->lvaTable[mulhiLclNum].incRefCnts(curBBWeight, comp); + + mulhi = add; + shift -= 1; + } + + GenTree* shiftBy = comp->gtNewIconNode(shift, TYP_INT); + BlockRange().InsertBefore(divMod, shiftBy); + + if (isDiv) + { + divMod->SetOper(GT_RSZ); + divMod->gtOp1 = mulhi; + divMod->gtOp2 = shiftBy; + } + else + { + GenTree* div = comp->gtNewOperNode(GT_RSZ, type, mulhi, shiftBy); + + // divisor UMOD dividend = dividend SUB (div MUL divisor) + GenTree* divisor = comp->gtNewIconNode(divisorValue, type); + GenTree* mul = comp->gtNewOperNode(GT_MUL, type, div, divisor); + GenTree* dividend = comp->gtNewLclvNode(dividendLclNum, type); + + divMod->SetOper(GT_SUB); + divMod->gtOp1 = dividend; + divMod->gtOp2 = mul; + + BlockRange().InsertBefore(divMod, div, divisor, mul, dividend); + comp->lvaTable[dividendLclNum].incRefCnts(curBBWeight, comp); + } + + return mulhi; } +#endif + + return next; } //------------------------------------------------------------------------ diff --git a/src/coreclr/src/jit/lower.h b/src/coreclr/src/jit/lower.h index d9bb357..73d2be6 100644 --- a/src/coreclr/src/jit/lower.h +++ b/src/coreclr/src/jit/lower.h @@ -237,7 +237,7 @@ private: // Per tree node member functions void LowerStoreInd(GenTree* node); GenTree* LowerAdd(GenTree* node); - void LowerUnsignedDivOrMod(GenTree* node); + GenTree* LowerUnsignedDivOrMod(GenTreeOp* divMod); GenTree* LowerSignedDivOrMod(GenTree* node); void LowerBlockStore(GenTreeBlk* blkNode); diff --git a/src/coreclr/src/jit/utils.cpp b/src/coreclr/src/jit/utils.cpp index 9fbe394..c8f770f 100644 --- a/src/coreclr/src/jit/utils.cpp +++ b/src/coreclr/src/jit/utils.cpp @@ -1808,3 +1808,180 @@ float FloatingPointUtils::round(float x) return _copysignf(flrTempVal, x); } + +namespace MagicDivide +{ +template +static const Magic* TryGetMagic(const Magic (&table)[TableSize], typename Magic::DivisorType index) +{ + if ((index < TableBase) || (TableBase + TableSize <= index)) + { + return nullptr; + } + + const Magic* p = &table[index - TableBase]; + + if (p->magic == 0) + { + return nullptr; + } + + return p; +}; + +template +struct UnsignedMagic +{ + typedef T DivisorType; + + T magic; + bool add; + int shift; +}; + +template +const UnsignedMagic* TryGetUnsignedMagic(T divisor) +{ + return nullptr; +} + +template <> +const UnsignedMagic* TryGetUnsignedMagic(uint32_t divisor) +{ + static const UnsignedMagic table[]{ + {0xaaaaaaab, false, 1}, // 3 + {}, + {0xcccccccd, false, 2}, // 5 + {0xaaaaaaab, false, 2}, // 6 + {0x24924925, true, 3}, // 7 + {}, + {0x38e38e39, false, 1}, // 9 + {0xcccccccd, false, 3}, // 10 + {0xba2e8ba3, false, 3}, // 11 + {0xaaaaaaab, false, 3}, // 12 + }; + + return TryGetMagic<3>(table, divisor); +} + +template <> +const UnsignedMagic* TryGetUnsignedMagic(uint64_t divisor) +{ + static const UnsignedMagic table[]{ + {0xaaaaaaaaaaaaaaab, false, 1}, // 3 + {}, + {0xcccccccccccccccd, false, 2}, // 5 + {0xaaaaaaaaaaaaaaab, false, 2}, // 6 + {0x2492492492492493, true, 3}, // 7 + {}, + {0xe38e38e38e38e38f, false, 3}, // 9 + {0xcccccccccccccccd, false, 3}, // 10 + {0x2e8ba2e8ba2e8ba3, false, 1}, // 11 + {0xaaaaaaaaaaaaaaab, false, 3}, // 12 + }; + + return TryGetMagic<3>(table, divisor); +} + +//------------------------------------------------------------------------ +// GetUnsignedMagic: Generates a magic number and shift amount for the magic +// number unsigned division optimization. +// +// Arguments: +// d - The divisor +// add - Pointer to a flag indicating the kind of code to generate +// shift - Pointer to the shift value to be returned +// +// Returns: +// The magic number. +// +// Notes: +// This code is adapted from _The_PowerPC_Compiler_Writer's_Guide_, pages 57-58. +// The paper is based on "Division by invariant integers using multiplication" +// by Torbjorn Granlund and Peter L. Montgomery in PLDI 94 + +template +T GetUnsignedMagic(T d, bool* add /*out*/, int* shift /*out*/) +{ + assert((d >= 3) && !isPow2(d)); + + const UnsignedMagic* magic = TryGetUnsignedMagic(d); + + if (magic != nullptr) + { + *shift = magic->shift; + *add = magic->add; + return magic->magic; + } + + typedef typename jitstd::make_signed::type ST; + + const unsigned bits = sizeof(T) * 8; + const unsigned bitsMinus1 = bits - 1; + const T twoNMinus1 = T(1) << bitsMinus1; + + *add = false; + const T nc = -ST(1) - -ST(d) % ST(d); + unsigned p = bitsMinus1; + T q1 = twoNMinus1 / nc; + T r1 = twoNMinus1 - (q1 * nc); + T q2 = (twoNMinus1 - 1) / d; + T r2 = (twoNMinus1 - 1) - (q2 * d); + T delta; + + do + { + p++; + + if (r1 >= (nc - r1)) + { + q1 = 2 * q1 + 1; + r1 = 2 * r1 - nc; + } + else + { + q1 = 2 * q1; + r1 = 2 * r1; + } + + if ((r2 + 1) >= (d - r2)) + { + if (q2 >= (twoNMinus1 - 1)) + { + *add = true; + } + + q2 = 2 * q2 + 1; + r2 = 2 * r2 + 1 - d; + } + else + { + if (q2 >= twoNMinus1) + { + *add = true; + } + + q2 = 2 * q2; + r2 = 2 * r2 + 1; + } + + delta = d - 1 - r2; + + } while ((p < (bits * 2)) && ((q1 < delta) || ((q1 == delta) && (r1 == 0)))); + + *shift = p - bits; // resulting shift + return q2 + 1; // resulting magic number +} + +uint32_t GetUnsigned32Magic(uint32_t d, bool* add /*out*/, int* shift /*out*/) +{ + return GetUnsignedMagic(d, add, shift); +} + +#ifdef _TARGET_64BIT_ +uint64_t GetUnsigned64Magic(uint64_t d, bool* add /*out*/, int* shift /*out*/) +{ + return GetUnsignedMagic(d, add, shift); +} +#endif +} diff --git a/src/coreclr/src/jit/utils.h b/src/coreclr/src/jit/utils.h index b41cf84..fe785aa 100644 --- a/src/coreclr/src/jit/utils.h +++ b/src/coreclr/src/jit/utils.h @@ -718,4 +718,12 @@ private: CritSecHolder& operator=(const CritSecHolder&) = delete; }; +namespace MagicDivide +{ +uint32_t GetUnsigned32Magic(uint32_t d, bool* add /*out*/, int* shift /*out*/); +#ifdef _TARGET_64BIT_ +uint64_t GetUnsigned64Magic(uint64_t d, bool* add /*out*/, int* shift /*out*/); +#endif +} + #endif // _UTILS_H_ -- 2.7.4