From 3ffe695710db2ac25ad5b58d6f8d8cbe7e69b407 Mon Sep 17 00:00:00 2001 From: Sergey Andreenko Date: Thu, 27 Apr 2017 18:06:34 -0700 Subject: [PATCH] Fix lsra memory consumption (dotnet/coreclr#11233) * Rename ClearD to OldStyleClearD * create fast ClearD function. The new ClearD reuses existing buffer and assumes that epoch wasn't changed. * Reuse existing buffer for the predSet in lsra. * Use fast ClearD in LSRA. * Use fast ClearD in liveness * Fix the comment * delete the test from issues Commit migrated from https://github.com/dotnet/coreclr/commit/70014f8863209853a29d010219e36b7eae2d4cd8 --- src/coreclr/src/jit/assertionprop.cpp | 2 +- src/coreclr/src/jit/bitset.h | 11 +++++++++- src/coreclr/src/jit/bitsetasshortlong.h | 32 +++++++++++++++++++++++++++-- src/coreclr/src/jit/bitsetasuint64.h | 5 +++++ src/coreclr/src/jit/bitsetasuint64inclass.h | 17 ++++++++++++--- src/coreclr/src/jit/bitsetops.h | 1 + src/coreclr/src/jit/copyprop.cpp | 2 +- src/coreclr/src/jit/emit.cpp | 6 +++--- src/coreclr/src/jit/flowgraph.cpp | 8 ++++---- src/coreclr/src/jit/liveness.cpp | 3 +++ src/coreclr/src/jit/lsra.cpp | 20 ++++++++++++------ src/coreclr/src/jit/lsra.h | 2 +- src/coreclr/src/jit/regalloc.cpp | 4 ++-- src/coreclr/tests/issues.targets | 6 ------ 14 files changed, 89 insertions(+), 30 deletions(-) diff --git a/src/coreclr/src/jit/assertionprop.cpp b/src/coreclr/src/jit/assertionprop.cpp index 767d63a..04f2fbe 100644 --- a/src/coreclr/src/jit/assertionprop.cpp +++ b/src/coreclr/src/jit/assertionprop.cpp @@ -4556,7 +4556,7 @@ ASSERT_TP* Compiler::optInitAssertionDataflowFlags() } // Compute the data flow values for all tracked expressions // IN and OUT never change for the initial basic block B1 - BitVecOps::ClearD(apTraits, fgFirstBB->bbAssertionIn); + BitVecOps::OldStyleClearD(apTraits, fgFirstBB->bbAssertionIn); return jumpDestOut; } diff --git a/src/coreclr/src/jit/bitset.h b/src/coreclr/src/jit/bitset.h index 4ecb2fc..a4b0091 100644 --- a/src/coreclr/src/jit/bitset.h +++ b/src/coreclr/src/jit/bitset.h @@ -205,9 +205,13 @@ class BitSetOps // Destructively set "bs" to be the empty set. This method is unique, in that it does *not* // require "bs" to be a bitset of the current epoch. It ensures that it is after, however. // (If the representation is indirect, this requires allocating a new, empty representation. - // If this is a performance issue, we could provide a new version of ClearD that assumes/asserts + // If this is a performance issue, we could provide a new version of OldStyleClearD that assumes/asserts // that the rep is for the current epoch -- this would be useful if a given bitset were repeatedly // cleared within an epoch.) + // TODO #11263: delete it. + static void OldStyleClearD(Env env, BitSetType& bs); + + // Destructively set "bs" to be the empty set. static void ClearD(Env env, BitSetType& bs); // Returns a copy of "bs". If the representation of "bs" involves a level of indirection, the data @@ -326,6 +330,11 @@ public: BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignNocopy); BSO::AssignNoCopy(env, lhs, rhs); } + static void OldStyleClearD(Env env, BitSetType& bs) + { + BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_OldStyleClearD); + BSO::OldStyleClearD(env, bs); + } static void ClearD(Env env, BitSetType& bs) { BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ClearD); diff --git a/src/coreclr/src/jit/bitsetasshortlong.h b/src/coreclr/src/jit/bitsetasshortlong.h index 163cb36..962a8bb 100644 --- a/src/coreclr/src/jit/bitsetasshortlong.h +++ b/src/coreclr/src/jit/bitsetasshortlong.h @@ -43,6 +43,7 @@ private: static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2); static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i); static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i); + static void OldStyleClearDLong(Env env, BitSetShortLongRep& bs); static void ClearDLong(Env env, BitSetShortLongRep& bs); static BitSetShortLongRep MakeUninitArrayBits(Env env); static BitSetShortLongRep MakeEmptyArrayBits(Env env); @@ -122,6 +123,19 @@ public: lhs = rhs; } + static void OldStyleClearD(Env env, BitSetShortLongRep& bs) + { + if (IsShort(env)) + { + bs = (BitSetShortLongRep) nullptr; + } + else + { + assert(bs != UninitVal()); + OldStyleClearDLong(env, bs); + } + } + static void ClearD(Env env, BitSetShortLongRep& bs) { if (IsShort(env)) @@ -661,15 +675,29 @@ template void BitSetOps::ClearDLong(Env env, BitSetShortLongRep& bs) + /*BitSetTraits*/ BitSetTraits>::OldStyleClearDLong(Env env, BitSetShortLongRep& bs) { assert(!IsShort(env)); - // Recall that ClearD does *not* require "bs" to be of the current epoch. + // Recall that OldStyleClearD does *not* require "bs" to be of the current epoch. // Therefore, we must allocate a new representation. bs = MakeEmptyArrayBits(env); } template +void BitSetOps::ClearDLong(Env env, BitSetShortLongRep& bs) +{ + assert(!IsShort(env)); + unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t)); + for (unsigned i = 0; i < len; i++) + { + bs[i] = 0; + } +} + +template BitSetShortLongRep BitSetOpsbbLiveIn); for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext) { - VarSetOps::ClearD(this, optCopyPropKillSet); + VarSetOps::OldStyleClearD(this, optCopyPropKillSet); // Walk the tree to find if any local variable can be replaced with current live definitions. for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext) diff --git a/src/coreclr/src/jit/emit.cpp b/src/coreclr/src/jit/emit.cpp index 3b765b9..d2aa29f 100644 --- a/src/coreclr/src/jit/emit.cpp +++ b/src/coreclr/src/jit/emit.cpp @@ -1472,8 +1472,8 @@ void emitter::emitBegProlog() /* Nothing is live on entry to the prolog */ // These were initialized to Empty at the start of compilation. - VarSetOps::ClearD(emitComp, emitInitGCrefVars); - VarSetOps::ClearD(emitComp, emitPrevGCrefVars); + VarSetOps::OldStyleClearD(emitComp, emitInitGCrefVars); + VarSetOps::OldStyleClearD(emitComp, emitPrevGCrefVars); emitInitGCrefRegs = RBM_NONE; emitPrevGCrefRegs = RBM_NONE; emitInitByrefRegs = RBM_NONE; @@ -4564,7 +4564,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp, /* Assume no live GC ref variables on entry */ - VarSetOps::ClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen. + VarSetOps::OldStyleClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen. emitThisGCrefRegs = emitThisByrefRegs = RBM_NONE; emitThisGCrefVset = true; diff --git a/src/coreclr/src/jit/flowgraph.cpp b/src/coreclr/src/jit/flowgraph.cpp index 36f080d..1582179 100644 --- a/src/coreclr/src/jit/flowgraph.cpp +++ b/src/coreclr/src/jit/flowgraph.cpp @@ -1815,9 +1815,9 @@ void Compiler::fgComputeReachabilitySets() for (block = fgFirstBB; block != nullptr; block = block->bbNext) { - // Initialize the per-block bbReach sets. (Note that we can't just call BlockSetOps::ClearD() - // when re-running this computation, because if the epoch changes, the size and representation of the - // sets might change). + // Initialize the per-block bbReach sets. It creates a new empty set, + // because the block epoch could change since the previous initialization + // and the old set could have wrong size. block->bbReach = BlockSetOps::MakeEmpty(this); /* Mark block as reaching itself */ @@ -10011,7 +10011,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext) if (fgDomsComputed && block->bbNum > fgDomBBcount) { BlockSetOps::Assign(this, block->bbReach, bNext->bbReach); - BlockSetOps::ClearD(this, bNext->bbReach); + BlockSetOps::OldStyleClearD(this, bNext->bbReach); block->bbIDom = bNext->bbIDom; bNext->bbIDom = nullptr; diff --git a/src/coreclr/src/jit/liveness.cpp b/src/coreclr/src/jit/liveness.cpp index d498a6f..73f72e7 100644 --- a/src/coreclr/src/jit/liveness.cpp +++ b/src/coreclr/src/jit/liveness.cpp @@ -418,6 +418,8 @@ void Compiler::fgPerBlockLocalVarLiveness() } #endif // DEBUG + unsigned livenessVarEpoch = GetCurLVEpoch(); + BasicBlock* block; #if CAN_DISABLE_DFA @@ -587,6 +589,7 @@ void Compiler::fgPerBlockLocalVarLiveness() block->bbMemoryLiveIn = emptyMemoryKindSet; } + noway_assert(livenessVarEpoch == GetCurLVEpoch()); #ifdef DEBUG if (verbose) { diff --git a/src/coreclr/src/jit/lsra.cpp b/src/coreclr/src/jit/lsra.cpp index d558bcc..3bc2464 100644 --- a/src/coreclr/src/jit/lsra.cpp +++ b/src/coreclr/src/jit/lsra.cpp @@ -1317,6 +1317,8 @@ void LinearScan::setBlockSequence() compiler->EnsureBasicBlockEpoch(); bbVisitedSet = BlockSetOps::MakeEmpty(compiler); BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler)); + BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler)); + assert(blockSequence == nullptr && bbSeqCount == 0); blockSequence = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount]; bbNumMaxBeforeResolution = compiler->fgBBNumMax; @@ -1400,7 +1402,7 @@ void LinearScan::setBlockSequence() // (i.e. pred-first or random, since layout order is handled above). if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum)) { - addToBlockSequenceWorkList(readySet, succ); + addToBlockSequenceWorkList(readySet, succ, predSet); BlockSetOps::AddElemD(compiler, readySet, succ->bbNum); } } @@ -1433,7 +1435,7 @@ void LinearScan::setBlockSequence() { if (!isBlockVisited(block)) { - addToBlockSequenceWorkList(readySet, block); + addToBlockSequenceWorkList(readySet, block, predSet); BlockSetOps::AddElemD(compiler, readySet, block->bbNum); } } @@ -1442,7 +1444,7 @@ void LinearScan::setBlockSequence() { if (!isBlockVisited(block)) { - addToBlockSequenceWorkList(readySet, block); + addToBlockSequenceWorkList(readySet, block, predSet); BlockSetOps::AddElemD(compiler, readySet, block->bbNum); } } @@ -1540,6 +1542,9 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block // Arguments: // sequencedBlockSet - the set of blocks that are already sequenced // block - the new block to be added +// predSet - the buffer to save predecessors set. A block set allocated by the caller used here as a +// temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block +// set with every call to this function // // Return Value: // None. @@ -1561,13 +1566,13 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block // Note also that, when random traversal order is implemented, this method // should insert the blocks into the list in random order, so that we can always // simply select the first block in the list. -void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block) +void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet) { // The block that is being added is not already sequenced assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum)); // Get predSet of block - BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler)); + BlockSetOps::ClearD(compiler, predSet); flowList* pred; for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext) { @@ -1723,6 +1728,8 @@ void LinearScan::doLinearScan() } #endif // DEBUG + unsigned lsraBlockEpoch = compiler->GetCurBasicBlockEpoch(); + splitBBNumToTargetBBNumMap = nullptr; // This is complicated by the fact that physical registers have refs associated @@ -1738,7 +1745,7 @@ void LinearScan::doLinearScan() DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals")); - BlockSetOps::ClearD(compiler, bbVisitedSet); + clearVisitedBlocks(); initVarRegMaps(); allocateRegisters(); compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC); @@ -1759,6 +1766,7 @@ void LinearScan::doLinearScan() DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST)); compiler->compLSRADone = true; + noway_assert(lsraBlockEpoch = compiler->GetCurBasicBlockEpoch()); } //------------------------------------------------------------------------ diff --git a/src/coreclr/src/jit/lsra.h b/src/coreclr/src/jit/lsra.h index b6f8379..1eeb380 100644 --- a/src/coreclr/src/jit/lsra.h +++ b/src/coreclr/src/jit/lsra.h @@ -1131,7 +1131,7 @@ private: int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights); BasicBlockList* blockSequenceWorkList; bool blockSequencingDone; - void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block); + void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet); void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode); BasicBlock* getNextCandidateFromWorkList(); diff --git a/src/coreclr/src/jit/regalloc.cpp b/src/coreclr/src/jit/regalloc.cpp index 938f8e8..38967a4 100644 --- a/src/coreclr/src/jit/regalloc.cpp +++ b/src/coreclr/src/jit/regalloc.cpp @@ -1340,7 +1340,7 @@ RET: while (iter.NextElem(this, &varNum)) { // We'll need this for one of the calls... - VarSetOps::ClearD(this, varAsSet); + VarSetOps::OldStyleClearD(this, varAsSet); VarSetOps::AddElemD(this, varAsSet, varNum); // If this varBit and lastUse? @@ -6348,7 +6348,7 @@ void Compiler::rpPredictRegUse() /* Zero the variable/register interference graph */ for (unsigned i = 0; i < REG_COUNT; i++) { - VarSetOps::ClearD(this, raLclRegIntf[i]); + VarSetOps::OldStyleClearD(this, raLclRegIntf[i]); } // if there are PInvoke calls and compLvFrameListRoot is enregistered, diff --git a/src/coreclr/tests/issues.targets b/src/coreclr/tests/issues.targets index 0d2fb68..594d43d 100644 --- a/src/coreclr/tests/issues.targets +++ b/src/coreclr/tests/issues.targets @@ -210,9 +210,6 @@ 7163, fails on both legacy backend and RyuJIT - - The test is too large for x86 and causes OutOfMemory exception. - @@ -1133,9 +1130,6 @@ needs triage - - 11142 - needs triage -- 2.7.4