Dump LSRA stats.
authorsivarv <sivarv@microsoft.com>
Tue, 11 Oct 2016 23:20:39 +0000 (16:20 -0700)
committersivarv <sivarv@microsoft.com>
Wed, 12 Oct 2016 00:37:15 +0000 (17:37 -0700)
Commit migrated from https://github.com/dotnet/coreclr/commit/2a2f751cf952c49e436a5a9a7ed15d5820fd0b23

src/coreclr/src/jit/compiler.h
src/coreclr/src/jit/jit.h
src/coreclr/src/jit/jitconfigvalues.h
src/coreclr/src/jit/lsra.cpp
src/coreclr/src/jit/lsra.h

index 146f553..b53f524 100644 (file)
@@ -6275,7 +6275,7 @@ public:
 
     BOOL eeIsValueClass(CORINFO_CLASS_HANDLE clsHnd);
 
-#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD)
+#if defined(DEBUG) || defined(FEATURE_JIT_METHOD_PERF) || defined(FEATURE_SIMD) || defined(TRACK_LSRA_STATS)
 
     bool IsSuperPMIException(unsigned code)
     {
index 8ef0dcf..d9d0ba2 100644 (file)
@@ -483,9 +483,11 @@ typedef ptrdiff_t ssize_t;
 #ifdef DEBUG
 #define MEASURE_MEM_ALLOC 1 // Collect memory allocation stats.
 #define LOOP_HOIST_STATS 1  // Collect loop hoisting stats.
+#define TRACK_LSRA_STATS 1  // Collect LSRA stats
 #else
 #define MEASURE_MEM_ALLOC 0 // You can set this to 1 to get memory stats in retail, as well
 #define LOOP_HOIST_STATS 0  // You can set this to 1 to get loop hoist stats in retail, as well
+#define TRACK_LSRA_STATS 0  // You can set this to 1 to get LSRA stats in retail, as well
 #endif
 
 // Timing calls to clr.dll is only available under certain conditions.
index 55f976b..de340fa 100644 (file)
@@ -17,9 +17,10 @@ CONFIG_INTEGER(DebugBreakOnVerificationFailure, W("DebugBreakOnVerificationFailu
                                                                                          // verification failure
 CONFIG_INTEGER(DiffableDasm, W("JitDiffableDasm"), 0)            // Make the disassembly diff-able
 CONFIG_INTEGER(DisplayLoopHoistStats, W("JitLoopHoistStats"), 0) // Display JIT loop hoisting statistics
-CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0)     // Prints all jitted methods to the console
-CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1)      // Whether absolute addr be encoded as PC-rel offset by
-                                                                 // RyuJIT where possible
+CONFIG_INTEGER(DisplayLsraStats, W("JitLsraStats"), 0)       // Display JIT Linear Scan Register Allocator statistics
+CONFIG_INTEGER(DumpJittedMethods, W("DumpJittedMethods"), 0) // Prints all jitted methods to the console
+CONFIG_INTEGER(EnablePCRelAddr, W("JitEnablePCRelAddr"), 1)  // Whether absolute addr be encoded as PC-rel offset by
+                                                             // RyuJIT where possible
 CONFIG_INTEGER(InterpreterFallback, W("InterpreterFallback"), 0) // Fallback to the interpreter when the JIT compiler
                                                                  // fails
 CONFIG_INTEGER(JitAssertOnMaxRAPasses, W("JitAssertOnMaxRAPasses"), 0)
index 5f32b3b..e427b5d 100644 (file)
@@ -1111,12 +1111,14 @@ LinearScan::LinearScan(Compiler* theCompiler)
 #endif
 
     dumpTerse = (JitConfig.JitDumpTerseLsra() != 0);
-
 #endif // DEBUG
+
     availableIntRegs = (RBM_ALLINT & ~compiler->codeGen->regSet.rsMaskResvd);
+
 #if ETW_EBP_FRAMED
     availableIntRegs &= ~RBM_FPBASE;
 #endif // ETW_EBP_FRAMED
+
     availableFloatRegs  = RBM_ALLFLOAT;
     availableDoubleRegs = RBM_ALLDOUBLE;
 
@@ -1328,6 +1330,13 @@ void LinearScan::setBlockSequence()
         blockInfo[block->bbNum].hasCriticalOutEdge = false;
         blockInfo[block->bbNum].weight             = block->bbWeight;
 
+#if TRACK_LSRA_STATS
+        blockInfo[block->bbNum].spillCount         = 0;
+        blockInfo[block->bbNum].copyRegCount       = 0;
+        blockInfo[block->bbNum].resolutionMovCount = 0;
+        blockInfo[block->bbNum].splitEdgeCount     = 0;
+#endif // TRACK_LSRA_STATS
+
         if (block->GetUniquePred(compiler) == nullptr)
         {
             for (flowList* pred = block->bbPreds; pred != nullptr; pred = pred->flNext)
@@ -1720,6 +1729,17 @@ void LinearScan::doLinearScan()
     resolveRegisters();
     compiler->EndPhase(PHASE_LINEAR_SCAN_RESOLVE);
 
+#if TRACK_LSRA_STATS
+    if ((JitConfig.DisplayLsraStats() != 0)
+#ifdef DEBUG
+        || VERBOSE
+#endif
+        )
+    {
+        dumpLsraStats(jitstdout);
+    }
+#endif // TRACK_LSRA_STATS
+
     DBEXEC(VERBOSE, TupleStyleDump(LSRA_DUMP_POST));
 
     compiler->compLSRADone = true;
@@ -5957,6 +5977,8 @@ void LinearScan::spillInterval(Interval* interval, RefPosition* fromRefPosition,
     }
 #endif // DEBUG
 
+    INTRACK_STATS(updateLsraStat(LSRA_STAT_SPILL, fromRefPosition->bbNum));
+
     interval->isActive  = false;
     interval->isSpilled = true;
 
@@ -7704,6 +7726,7 @@ void LinearScan::writeRegisters(RefPosition* currentRefPosition, GenTree* tree)
 //   than the one it was spilled from (GT_RELOAD).
 //
 // Arguments:
+//    block             - basic block in which GT_COPY/GT_RELOAD is inserted.
 //    tree              - This is the node to copy or reload.
 //                        Insert copy or reload node between this node and its parent.
 //    multiRegIdx       - register position of tree node for which copy or reload is needed.
@@ -7772,6 +7795,10 @@ void LinearScan::insertCopyOrReload(BasicBlock* block, GenTreePtr tree, unsigned
     else
     {
         oper = GT_COPY;
+
+#if TRACK_LSRA_STATS
+        updateLsraStat(LSRA_STAT_COPY_REG, block->bbNum);
+#endif
     }
 
     // If the parent is a reload/copy node, then tree must be a multi-reg call node
@@ -8895,6 +8922,8 @@ void LinearScan::addResolution(
     {
         interval->isSplit = true;
     }
+
+    INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
 }
 
 //------------------------------------------------------------------------
@@ -9368,6 +9397,9 @@ void LinearScan::resolveEdge(BasicBlock*      fromBlock,
             // in resolveEdges(), after all the edge resolution has been done (by calling this
             // method for each edge).
             block = compiler->fgSplitEdge(fromBlock, toBlock);
+
+            // Split edges are counted against fromBlock.
+            INTRACK_STATS(updateLsraStat(LSRA_STAT_SPLIT_EDGE, fromBlock->bbNum));
             break;
         default:
             unreached();
@@ -9605,6 +9637,8 @@ void LinearScan::resolveEdge(BasicBlock*      fromBlock,
                                    sourceIntervals[sourceReg]->varNum, fromReg);
                         location[sourceReg]              = REG_NA;
                         location[source[otherTargetReg]] = (regNumberSmall)fromReg;
+
+                        INTRACK_STATS(updateLsraStat(LSRA_STAT_RESOLUTION_MOV, block->bbNum));
                     }
                     else
                     {
@@ -9736,6 +9770,126 @@ void TreeNodeInfo::addInternalCandidates(LinearScan* lsra, regMaskTP mask)
     internalCandsIndex = (unsigned char)i;
 }
 
+#if TRACK_LSRA_STATS
+// ----------------------------------------------------------
+// updateLsraStat: Increment LSRA stat counter.
+//
+// Arguments:
+//    stat      -   LSRA stat enum
+//    bbNum     -   Basic block to which LSRA stat needs to be
+//                  associated with.
+//
+void LinearScan::updateLsraStat(LsraStat stat, unsigned bbNum)
+{
+    if (bbNum > bbNumMaxBeforeResolution)
+    {
+        // This is a newly created basic block as part of resolution.
+        // These blocks contain resolution moves that are already accounted.
+        return;
+    }
+
+    switch (stat)
+    {
+        case LSRA_STAT_SPILL:
+            ++(blockInfo[bbNum].spillCount);
+            break;
+
+        case LSRA_STAT_COPY_REG:
+            ++(blockInfo[bbNum].copyRegCount);
+            break;
+
+        case LSRA_STAT_RESOLUTION_MOV:
+            ++(blockInfo[bbNum].resolutionMovCount);
+            break;
+
+        case LSRA_STAT_SPLIT_EDGE:
+            ++(blockInfo[bbNum].splitEdgeCount);
+            break;
+
+        default:
+            break;
+    }
+}
+
+// -----------------------------------------------------------
+// dumpLsraStats - dumps Lsra stats to given file.
+//
+// Arguments:
+//    file    -  file to which stats are to be written.
+//
+void LinearScan::dumpLsraStats(FILE* file)
+{
+    unsigned sumSpillCount         = 0;
+    unsigned sumCopyRegCount       = 0;
+    unsigned sumResolutionMovCount = 0;
+    unsigned sumSplitEdgeCount     = 0;
+    UINT64   wtdSpillCount         = 0;
+    UINT64   wtdCopyRegCount       = 0;
+    UINT64   wtdResolutionMovCount = 0;
+
+    fprintf(file, "----------\n");
+    fprintf(file, "LSRA Stats");
+#ifdef DEBUG
+    if (!VERBOSE)
+    {
+        fprintf(file, " : %s\n", compiler->info.compFullName);
+    }
+    else
+    {
+        // In verbose mode no need to print full name
+        // while printing lsra stats.
+        fprintf(file, "\n");
+    }
+#else
+    fprintf(file, " : %s\n", compiler->eeGetMethodFullName(compiler->info.compCompHnd));
+#endif
+
+    fprintf(file, "----------\n");
+
+    for (BasicBlock* block = compiler->fgFirstBB; block != nullptr; block = block->bbNext)
+    {
+        if (block->bbNum > bbNumMaxBeforeResolution)
+        {
+            continue;
+        }
+
+        unsigned spillCount         = blockInfo[block->bbNum].spillCount;
+        unsigned copyRegCount       = blockInfo[block->bbNum].copyRegCount;
+        unsigned resolutionMovCount = blockInfo[block->bbNum].resolutionMovCount;
+        unsigned splitEdgeCount     = blockInfo[block->bbNum].splitEdgeCount;
+
+        if (spillCount != 0 || copyRegCount != 0 || resolutionMovCount != 0)
+        {
+            fprintf(file, "BB%02u [%8d]: ", block->bbNum, block->bbWeight);
+            fprintf(file, "SpillCount = %d, ResolutionMovs = %d, SplitEdges = %d, CopyReg = %d\n", spillCount,
+                    resolutionMovCount, splitEdgeCount, copyRegCount);
+        }
+
+        sumSpillCount += spillCount;
+        sumCopyRegCount += copyRegCount;
+        sumResolutionMovCount += resolutionMovCount;
+        sumSplitEdgeCount += splitEdgeCount;
+
+        wtdSpillCount += (UINT64)spillCount * block->bbWeight;
+        wtdCopyRegCount += (UINT64)copyRegCount * block->bbWeight;
+        wtdResolutionMovCount += (UINT64)resolutionMovCount * block->bbWeight;
+    }
+
+    fprintf(file, "Total Spill Count: %d    Weighted: %I64u\n", sumSpillCount, wtdSpillCount);
+    fprintf(file, "Total CopyReg Count: %d   Weighted: %I64u\n", sumCopyRegCount, wtdCopyRegCount);
+    fprintf(file, "Total ResolutionMov Count: %d    Weighted: %I64u\n", sumResolutionMovCount, wtdResolutionMovCount);
+    fprintf(file, "Total number of split edges: %d\n", sumSplitEdgeCount);
+
+    // compute total number of spill temps created
+    unsigned numSpillTemps = 0;
+    for (int i = 0; i < TYP_COUNT; i++)
+    {
+        numSpillTemps += maxSpill[i];
+    }
+    fprintf(file, "Total Number of spill temps created: %d\n\n", numSpillTemps);
+}
+#endif // TRACK_LSRA_STATS
+
 #ifdef DEBUG
 void dumpRegMask(regMaskTP regs)
 {
index 70610b6..193effa 100644 (file)
@@ -73,6 +73,25 @@ struct LsraBlockInfo
     unsigned int         predBBNum;
     bool                 hasCriticalInEdge;
     bool                 hasCriticalOutEdge;
+
+#if TRACK_LSRA_STATS
+    // Per block maintained LSRA statistics.
+
+    // Number of spills of local vars or tree temps in this basic block.
+    unsigned spillCount;
+
+    // Number of GT_COPY nodes inserted in this basic block while allocating regs.
+    // Note that GT_COPY nodes are also inserted as part of basic block boundary
+    // resolution, which are accounted against resolutionMovCount but not
+    // against copyRegCount.
+    unsigned copyRegCount;
+
+    // Number of resolution moves inserted in this basic block.
+    unsigned resolutionMovCount;
+
+    // Number of critical edges from this block that are split.
+    unsigned splitEdgeCount;
+#endif // TRACK_LSRA_STATS
 };
 
 // This is sort of a bit mask
@@ -1027,6 +1046,20 @@ private:
     void validateIntervals();
 #endif // DEBUG
 
+#if TRACK_LSRA_STATS
+    enum LsraStat{
+        LSRA_STAT_SPILL, LSRA_STAT_COPY_REG, LSRA_STAT_RESOLUTION_MOV, LSRA_STAT_SPLIT_EDGE,
+    };
+
+    void updateLsraStat(LsraStat stat, unsigned currentBBNum);
+
+    void dumpLsraStats(FILE* file);
+
+#define INTRACK_STATS(x) x
+#else // !TRACK_LSRA_STATS
+#define INTRACK_STATS(x)
+#endif // !TRACK_LSRA_STATS
+
     Compiler* compiler;
 
 private: