From 982c332d06f3b0b722ca05ef5f486c7e57a88389 Mon Sep 17 00:00:00 2001 From: Bruce Forstall Date: Mon, 17 Apr 2017 16:32:36 -0700 Subject: [PATCH] Use isInternalRegDelayFree to avoid temp reg overallocation (dotnet/coreclr#11025) Use isInternalRegDelayFree to avoid temp reg overallocation Commit migrated from https://github.com/dotnet/coreclr/commit/e440f484375f06038f20d615591fa2b98ad3a319 --- src/coreclr/src/jit/codegenarmarch.cpp | 14 ++------------ src/coreclr/src/jit/emitarm64.cpp | 12 +----------- src/coreclr/src/jit/lsraarm.cpp | 15 ++++++--------- src/coreclr/src/jit/lsraarm64.cpp | 22 +++++++--------------- 4 files changed, 16 insertions(+), 47 deletions(-) diff --git a/src/coreclr/src/jit/codegenarmarch.cpp b/src/coreclr/src/jit/codegenarmarch.cpp index c0ef471..3144343 100644 --- a/src/coreclr/src/jit/codegenarmarch.cpp +++ b/src/coreclr/src/jit/codegenarmarch.cpp @@ -707,18 +707,8 @@ void CodeGen::genCodeForArrIndex(GenTreeArrIndex* arrIndex) noway_assert(tgtReg != REG_NA); // We will use a temp register to load the lower bound and dimension size values. - // - // This should be simply: - // regNumber tmpReg = arrIndex->GetSingleTempReg(); - // - // However, since LSRA might give us an internal temp register that is the same as the dst - // register, and the codegen here reuses the temp register after a definition of the target - // register, we requested two internal registers. If one is the target register, we simply - // use the other one. We can use ExtractTempReg() since it only asserts that there is at - // least one available temporary register (not that there is exactly one, for instance). - // Here, masking out tgtReg, there will be either 1 or 2. - - regNumber tmpReg = arrIndex->ExtractTempReg(~genRegMask(tgtReg)); + + regNumber tmpReg = arrIndex->GetSingleTempReg(); assert(tgtReg != tmpReg); unsigned dim = arrIndex->gtCurrDim; diff --git a/src/coreclr/src/jit/emitarm64.cpp b/src/coreclr/src/jit/emitarm64.cpp index 7ebf420..0328cb6 100644 --- a/src/coreclr/src/jit/emitarm64.cpp +++ b/src/coreclr/src/jit/emitarm64.cpp @@ -11072,17 +11072,7 @@ regNumber emitter::emitInsTernary(instruction ins, emitAttr attr, GenTree* dst, { if (isMulOverflow) { - // This should be simply: - // regNumber extraReg = dst->GetSingleTempReg(); - // - // However, since LSRA might give us an internal temp register that is the same as the dst - // register, and the codegen here reuses the temp register after a definition of the target - // register, we requested two internal registers. If one is the target register, we simply - // use the other one. We can use ExtractTempReg() since it only asserts that there is at - // least one available temporary register (not that there is exactly one, for instance). - // Here, masking out tgtReg, there will be either 1 or 2. - - regNumber extraReg = dst->ExtractTempReg(~genRegMask(dst->gtRegNum)); + regNumber extraReg = dst->GetSingleTempReg(); assert(extraReg != dst->gtRegNum); if ((dst->gtFlags & GTF_UNSIGNED) != 0) diff --git a/src/coreclr/src/jit/lsraarm.cpp b/src/coreclr/src/jit/lsraarm.cpp index 1dd167c..e83f50c 100644 --- a/src/coreclr/src/jit/lsraarm.cpp +++ b/src/coreclr/src/jit/lsraarm.cpp @@ -444,7 +444,8 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) if (tree->gtOverflow()) { // Need a register different from target reg to check for overflow. - info->internalIntCount = 1; + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; } __fallthrough; @@ -541,14 +542,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_ARR_INDEX: - info->srcCount = 2; - info->dstCount = 1; - - // We need one internal register when generating code for GT_ARR_INDEX, however the - // register allocator always may just give us the same one as it gives us for the 'dst' - // as a workaround we will just ask for two internal registers. - // - info->internalIntCount = 2; + info->srcCount = 2; + info->dstCount = 1; + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. diff --git a/src/coreclr/src/jit/lsraarm64.cpp b/src/coreclr/src/jit/lsraarm64.cpp index 87ad4ec..6de00f4 100644 --- a/src/coreclr/src/jit/lsraarm64.cpp +++ b/src/coreclr/src/jit/lsraarm64.cpp @@ -288,12 +288,9 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) case GT_MUL: if (tree->gtOverflow()) { - // Need a register different from target reg to check for overflow; - // code generation requires the temp reg to live beyond the definition - // of the target reg. Since we have no way to tell LSRA that, we request - // two temp registers, and use one that is not the target reg. - // TODO-ARM64-CQ: Figure out a way to only reserve one. - info->internalIntCount = 2; + // Need a register different from target reg to check for overflow. + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; } __fallthrough; @@ -606,15 +603,10 @@ void Lowering::TreeNodeInfoInit(GenTree* tree) break; case GT_ARR_INDEX: - info->srcCount = 2; - info->dstCount = 1; - - // We need one internal register when generating code for GT_ARR_INDEX. However, the - // register allocator may give us the same one it gives us for 'dst'. - // As a workaround we will just ask for two internal registers. - // TODO-ARM64-CQ: Figure out a way to only reserve one. - // - info->internalIntCount = 2; + info->srcCount = 2; + info->dstCount = 1; + info->internalIntCount = 1; + info->isInternalRegDelayFree = true; // For GT_ARR_INDEX, the lifetime of the arrObj must be extended because it is actually used multiple // times while the result is being computed. -- 2.7.4