Fix lsra memory consumption (dotnet/coreclr#11233)
authorSergey Andreenko <seandree@microsoft.com>
Fri, 28 Apr 2017 01:06:34 +0000 (18:06 -0700)
committerGitHub <noreply@github.com>
Fri, 28 Apr 2017 01:06:34 +0000 (18:06 -0700)
* Rename ClearD to OldStyleClearD

* create fast ClearD function.

The new ClearD reuses existing buffer and assumes that epoch wasn't
changed.

* Reuse existing buffer for the predSet in lsra.

* Use fast ClearD in LSRA.

* Use fast ClearD in liveness

* Fix the comment

* delete the test from issues

Commit migrated from https://github.com/dotnet/coreclr/commit/70014f8863209853a29d010219e36b7eae2d4cd8

14 files changed:
src/coreclr/src/jit/assertionprop.cpp
src/coreclr/src/jit/bitset.h
src/coreclr/src/jit/bitsetasshortlong.h
src/coreclr/src/jit/bitsetasuint64.h
src/coreclr/src/jit/bitsetasuint64inclass.h
src/coreclr/src/jit/bitsetops.h
src/coreclr/src/jit/copyprop.cpp
src/coreclr/src/jit/emit.cpp
src/coreclr/src/jit/flowgraph.cpp
src/coreclr/src/jit/liveness.cpp
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/lsra.h
src/coreclr/src/jit/regalloc.cpp
src/coreclr/tests/issues.targets

index 767d63a..04f2fbe 100644 (file)
@@ -4556,7 +4556,7 @@ ASSERT_TP* Compiler::optInitAssertionDataflowFlags()
     }
     // Compute the data flow values for all tracked expressions
     // IN and OUT never change for the initial basic block B1
-    BitVecOps::ClearD(apTraits, fgFirstBB->bbAssertionIn);
+    BitVecOps::OldStyleClearD(apTraits, fgFirstBB->bbAssertionIn);
     return jumpDestOut;
 }
 
index 4ecb2fc..a4b0091 100644 (file)
@@ -205,9 +205,13 @@ class BitSetOps
     // Destructively set "bs" to be the empty set.  This method is unique, in that it does *not*
     // require "bs" to be a bitset of the current epoch.  It ensures that it is after, however.
     // (If the representation is indirect, this requires allocating a new, empty representation.
-    // If this is a performance issue, we could provide a new version of ClearD that assumes/asserts
+    // If this is a performance issue, we could provide a new version of OldStyleClearD that assumes/asserts
     // that the rep is for the current epoch -- this would be useful if a given bitset were repeatedly
     // cleared within an epoch.)
+    // TODO #11263: delete it.
+    static void OldStyleClearD(Env env, BitSetType& bs);
+
+    // Destructively set "bs" to be the empty set.
     static void ClearD(Env env, BitSetType& bs);
 
     // Returns a copy of "bs".  If the representation of "bs" involves a level of indirection, the data
@@ -326,6 +330,11 @@ public:
         BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_AssignNocopy);
         BSO::AssignNoCopy(env, lhs, rhs);
     }
+    static void OldStyleClearD(Env env, BitSetType& bs)
+    {
+        BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_OldStyleClearD);
+        BSO::OldStyleClearD(env, bs);
+    }
     static void ClearD(Env env, BitSetType& bs)
     {
         BitSetTraits::GetOpCounter(env)->RecordOp(BitSetSupport::BSOP_ClearD);
index 163cb36..962a8bb 100644 (file)
@@ -43,6 +43,7 @@ private:
     static void DiffDLong(Env env, BitSetShortLongRep& bs1, BitSetShortLongRep bs2);
     static void AddElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
     static void RemoveElemDLong(Env env, BitSetShortLongRep& bs, unsigned i);
+    static void OldStyleClearDLong(Env env, BitSetShortLongRep& bs);
     static void ClearDLong(Env env, BitSetShortLongRep& bs);
     static BitSetShortLongRep MakeUninitArrayBits(Env env);
     static BitSetShortLongRep MakeEmptyArrayBits(Env env);
@@ -122,6 +123,19 @@ public:
         lhs = rhs;
     }
 
+    static void OldStyleClearD(Env env, BitSetShortLongRep& bs)
+    {
+        if (IsShort(env))
+        {
+            bs = (BitSetShortLongRep) nullptr;
+        }
+        else
+        {
+            assert(bs != UninitVal());
+            OldStyleClearDLong(env, bs);
+        }
+    }
+
     static void ClearD(Env env, BitSetShortLongRep& bs)
     {
         if (IsShort(env))
@@ -661,15 +675,29 @@ template <typename Env, typename BitSetTraits>
 void BitSetOps</*BitSetType*/ BitSetShortLongRep,
                /*Brand*/ BSShortLong,
                /*Env*/ Env,
-               /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
+               /*BitSetTraits*/ BitSetTraits>::OldStyleClearDLong(Env env, BitSetShortLongRep& bs)
 {
     assert(!IsShort(env));
-    // Recall that ClearD does *not* require "bs" to be of the current epoch.
+    // Recall that OldStyleClearD does *not* require "bs" to be of the current epoch.
     // Therefore, we must allocate a new representation.
     bs = MakeEmptyArrayBits(env);
 }
 
 template <typename Env, typename BitSetTraits>
+void BitSetOps</*BitSetType*/ BitSetShortLongRep,
+               /*Brand*/ BSShortLong,
+               /*Env*/ Env,
+               /*BitSetTraits*/ BitSetTraits>::ClearDLong(Env env, BitSetShortLongRep& bs)
+{
+    assert(!IsShort(env));
+    unsigned len = BitSetTraits::GetArrSize(env, sizeof(size_t));
+    for (unsigned i = 0; i < len; i++)
+    {
+        bs[i] = 0;
+    }
+}
+
+template <typename Env, typename BitSetTraits>
 BitSetShortLongRep BitSetOps</*BitSetType*/ BitSetShortLongRep,
                              /*Brand*/ BSShortLong,
                              /*Env*/ Env,
index 243e9e3..aec4d05 100644 (file)
@@ -44,6 +44,11 @@ public:
         lhs = rhs;
     }
 
+    static void OldStyleClearD(Env env, UINT64& bs)
+    {
+        bs = 0;
+    }
+
     static void ClearD(Env env, UINT64& bs)
     {
         bs = 0;
index be92624..ffa99d3 100644 (file)
@@ -178,16 +178,22 @@ private:
         return res;
     }
 
-    inline void ClearD(Env env)
+    inline void OldStyleClearD(Env env)
     {
-        // Recall that ClearD does *not* require "*this" to be of the current epoch.
-        Uint64BitSetOps::ClearD(env, m_bits);
+        // Recall that OldStyleClearD does *not* require "*this" to be of the current epoch.
+        Uint64BitSetOps::OldStyleClearD(env, m_bits);
 #ifdef DEBUG
         // But it updates it to of the current epoch.
         m_epoch = BitSetTraits::GetEpoch(env);
 #endif
     }
 
+    inline void ClearD(Env env)
+    {
+        assert(m_epoch == BitSetTraits::GetEpoch(env));
+        Uint64BitSetOps::ClearD(env, m_bits);
+    }
+
     inline bool IsEmpty(Env env) const
     {
         CheckEpoch(env);
@@ -369,6 +375,11 @@ public:
         lhs = rhs;
     }
 
+    static void OldStyleClearD(Env env, BST& bs)
+    {
+        bs.OldStyleClearD(env);
+    }
+
     static void ClearD(Env env, BST& bs)
     {
         bs.ClearD(env);
index edf39ea..bb4db9d 100644 (file)
@@ -5,6 +5,7 @@
 BSOPNAME(BSOP_Assign)
 BSOPNAME(BSOP_AssignAllowUninitRhs)
 BSOPNAME(BSOP_AssignNocopy)
+BSOPNAME(BSOP_OldStyleClearD)
 BSOPNAME(BSOP_ClearD)
 BSOPNAME(BSOP_MakeSingleton)
 BSOPNAME(BSOP_MakeEmpty)
index bf714f0..b17956d 100644 (file)
@@ -296,7 +296,7 @@ void Compiler::optBlockCopyProp(BasicBlock* block, LclNumToGenTreePtrStack* curS
     VarSetOps::Assign(this, compCurLife, block->bbLiveIn);
     for (GenTreePtr stmt = block->bbTreeList; stmt; stmt = stmt->gtNext)
     {
-        VarSetOps::ClearD(this, optCopyPropKillSet);
+        VarSetOps::OldStyleClearD(this, optCopyPropKillSet);
 
         // Walk the tree to find if any local variable can be replaced with current live definitions.
         for (GenTreePtr tree = stmt->gtStmt.gtStmtList; tree; tree = tree->gtNext)
index 3b765b9..d2aa29f 100644 (file)
@@ -1472,8 +1472,8 @@ void emitter::emitBegProlog()
     /* Nothing is live on entry to the prolog */
 
     // These were initialized to Empty at the start of compilation.
-    VarSetOps::ClearD(emitComp, emitInitGCrefVars);
-    VarSetOps::ClearD(emitComp, emitPrevGCrefVars);
+    VarSetOps::OldStyleClearD(emitComp, emitInitGCrefVars);
+    VarSetOps::OldStyleClearD(emitComp, emitPrevGCrefVars);
     emitInitGCrefRegs = RBM_NONE;
     emitPrevGCrefRegs = RBM_NONE;
     emitInitByrefRegs = RBM_NONE;
@@ -4564,7 +4564,7 @@ unsigned emitter::emitEndCodeGen(Compiler* comp,
 
     /* Assume no live GC ref variables on entry */
 
-    VarSetOps::ClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
+    VarSetOps::OldStyleClearD(emitComp, emitThisGCrefVars); // This is initialized to Empty at the start of codegen.
     emitThisGCrefRegs = emitThisByrefRegs = RBM_NONE;
     emitThisGCrefVset                     = true;
 
index 36f080d..1582179 100644 (file)
@@ -1815,9 +1815,9 @@ void Compiler::fgComputeReachabilitySets()
 
     for (block = fgFirstBB; block != nullptr; block = block->bbNext)
     {
-        // Initialize the per-block bbReach sets. (Note that we can't just call BlockSetOps::ClearD()
-        // when re-running this computation, because if the epoch changes, the size and representation of the
-        // sets might change).
+        // Initialize the per-block bbReach sets. It creates a new empty set,
+        // because the block epoch could change since the previous initialization
+        // and the old set could have wrong size.
         block->bbReach = BlockSetOps::MakeEmpty(this);
 
         /* Mark block as reaching itself */
@@ -10011,7 +10011,7 @@ void Compiler::fgCompactBlocks(BasicBlock* block, BasicBlock* bNext)
     if (fgDomsComputed && block->bbNum > fgDomBBcount)
     {
         BlockSetOps::Assign(this, block->bbReach, bNext->bbReach);
-        BlockSetOps::ClearD(this, bNext->bbReach);
+        BlockSetOps::OldStyleClearD(this, bNext->bbReach);
 
         block->bbIDom = bNext->bbIDom;
         bNext->bbIDom = nullptr;
index d498a6f..73f72e7 100644 (file)
@@ -418,6 +418,8 @@ void Compiler::fgPerBlockLocalVarLiveness()
     }
 #endif // DEBUG
 
+    unsigned livenessVarEpoch = GetCurLVEpoch();
+
     BasicBlock* block;
 
 #if CAN_DISABLE_DFA
@@ -587,6 +589,7 @@ void Compiler::fgPerBlockLocalVarLiveness()
         block->bbMemoryLiveIn = emptyMemoryKindSet;
     }
 
+    noway_assert(livenessVarEpoch == GetCurLVEpoch());
 #ifdef DEBUG
     if (verbose)
     {
index d558bcc..3bc2464 100644 (file)
@@ -1317,6 +1317,8 @@ void LinearScan::setBlockSequence()
     compiler->EnsureBasicBlockEpoch();
     bbVisitedSet = BlockSetOps::MakeEmpty(compiler);
     BlockSet BLOCKSET_INIT_NOCOPY(readySet, BlockSetOps::MakeEmpty(compiler));
+    BlockSet BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));
+
     assert(blockSequence == nullptr && bbSeqCount == 0);
     blockSequence            = new (compiler, CMK_LSRA) BasicBlock*[compiler->fgBBcount];
     bbNumMaxBeforeResolution = compiler->fgBBNumMax;
@@ -1400,7 +1402,7 @@ void LinearScan::setBlockSequence()
             // (i.e. pred-first or random, since layout order is handled above).
             if (!BlockSetOps::IsMember(compiler, readySet, succ->bbNum))
             {
-                addToBlockSequenceWorkList(readySet, succ);
+                addToBlockSequenceWorkList(readySet, succ, predSet);
                 BlockSetOps::AddElemD(compiler, readySet, succ->bbNum);
             }
         }
@@ -1433,7 +1435,7 @@ void LinearScan::setBlockSequence()
                 {
                     if (!isBlockVisited(block))
                     {
-                        addToBlockSequenceWorkList(readySet, block);
+                        addToBlockSequenceWorkList(readySet, block, predSet);
                         BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
                     }
                 }
@@ -1442,7 +1444,7 @@ void LinearScan::setBlockSequence()
                 {
                     if (!isBlockVisited(block))
                     {
-                        addToBlockSequenceWorkList(readySet, block);
+                        addToBlockSequenceWorkList(readySet, block, predSet);
                         BlockSetOps::AddElemD(compiler, readySet, block->bbNum);
                     }
                 }
@@ -1540,6 +1542,9 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block
 // Arguments:
 //    sequencedBlockSet - the set of blocks that are already sequenced
 //    block             - the new block to be added
+//    predSet           - the buffer to save predecessors set. A block set allocated by the caller used here as a
+//    temporary block set for constructing a predecessor set. Allocated by the caller to avoid reallocating a new block
+//    set with every call to this function
 //
 // Return Value:
 //    None.
@@ -1561,13 +1566,13 @@ int LinearScan::compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block
 //    Note also that, when random traversal order is implemented, this method
 //    should insert the blocks into the list in random order, so that we can always
 //    simply select the first block in the list.
-void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block)
+void LinearScan::addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet)
 {
     // The block that is being added is not already sequenced
     assert(!BlockSetOps::IsMember(compiler, sequencedBlockSet, block->bbNum));
 
     // Get predSet of block
-    BlockSet  BLOCKSET_INIT_NOCOPY(predSet, BlockSetOps::MakeEmpty(compiler));
+    BlockSetOps::ClearD(compiler, predSet);
     flowList* pred;
     for (pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
     {
@@ -1723,6 +1728,8 @@ void LinearScan::doLinearScan()
     }
 #endif // DEBUG
 
+    unsigned lsraBlockEpoch = compiler->GetCurBasicBlockEpoch();
+
     splitBBNumToTargetBBNumMap = nullptr;
 
     // This is complicated by the fact that physical registers have refs associated
@@ -1738,7 +1745,7 @@ void LinearScan::doLinearScan()
 
     DBEXEC(VERBOSE, lsraDumpIntervals("after buildIntervals"));
 
-    BlockSetOps::ClearD(compiler, bbVisitedSet);
+    clearVisitedBlocks();
     initVarRegMaps();
     allocateRegisters();
     compiler->EndPhase(PHASE_LINEAR_SCAN_ALLOC);
@@ -1759,6 +1766,7 @@ void LinearScan::doLinearScan()
     DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
 
     compiler->compLSRADone = true;
+    noway_assert(lsraBlockEpoch = compiler->GetCurBasicBlockEpoch());
 }
 
 //------------------------------------------------------------------------
index b6f8379..1eeb380 100644 (file)
@@ -1131,7 +1131,7 @@ private:
     int compareBlocksForSequencing(BasicBlock* block1, BasicBlock* block2, bool useBlockWeights);
     BasicBlockList* blockSequenceWorkList;
     bool            blockSequencingDone;
-    void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block);
+    void addToBlockSequenceWorkList(BlockSet sequencedBlockSet, BasicBlock* block, BlockSet& predSet);
     void removeFromBlockSequenceWorkList(BasicBlockList* listNode, BasicBlockList* prevNode);
     BasicBlock* getNextCandidateFromWorkList();
 
index 938f8e8..38967a4 100644 (file)
@@ -1340,7 +1340,7 @@ RET:
         while (iter.NextElem(this, &varNum))
         {
             // We'll need this for one of the calls...
-            VarSetOps::ClearD(this, varAsSet);
+            VarSetOps::OldStyleClearD(this, varAsSet);
             VarSetOps::AddElemD(this, varAsSet, varNum);
 
             // If this varBit and lastUse?
@@ -6348,7 +6348,7 @@ void Compiler::rpPredictRegUse()
         /*  Zero the variable/register interference graph */
         for (unsigned i = 0; i < REG_COUNT; i++)
         {
-            VarSetOps::ClearD(this, raLclRegIntf[i]);
+            VarSetOps::OldStyleClearD(this, raLclRegIntf[i]);
         }
 
         // if there are PInvoke calls and compLvFrameListRoot is enregistered,
index 0d2fb68..594d43d 100644 (file)
         <ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\Dev11\External\dev11_239804\ShowLocallocAlignment\ShowLocallocAlignment.cmd">
             <Issue>7163, fails on both legacy backend and RyuJIT</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\JitBlue\DevDiv_255294\DevDiv_255294\DevDiv_255294.cmd">
-            <Issue>The test is too large for x86 and causes OutOfMemory exception.</Issue>
-        </ExcludeList>
     </ItemGroup>
 
     <!-- Tests that need to be triaged for vararg usage as that is not supported -->
         <ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\Dev11\dev10_865840\dev10_865840\dev10_865840.cmd">
             <Issue>needs triage</Issue>
         </ExcludeList>
-        <ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\JITBlue\DevDiv_255294\DevDiv_255294\DevDiv_255294.cmd">
-            <Issue>11142</Issue>
-        </ExcludeList>
         <ExcludeList Include="$(XunitTestBinBase)\JIT\Regression\VS-ia64-JIT\V1.2-Beta1\b302509\b302509\b302509.cmd">
             <Issue>needs triage</Issue>
         </ExcludeList>