Improve the speed of using COMPlus_JitTimeLogCsv (#49798)
authorBruce Forstall <brucefo@microsoft.com>
Fri, 19 Mar 2021 00:21:15 +0000 (17:21 -0700)
committerGitHub <noreply@github.com>
Fri, 19 Mar 2021 00:21:15 +0000 (17:21 -0700)
* Improve the speed of using COMPlus_JitTimeLogCsv

I noticed that setting this and running a superpmi replay
was super slow. (Btw, note that you must use `--sequential`
in the scenario to avoid parallel writes to the log file).

This change fixes this in a number of ways:
1. The `CycleTimer::CyclesPerSecond()` function does not-trivial
computation to compute its answer. So cache the result.
2. It's expensive to constantly open and close the output file
(once per function). Partially, this is because anti-virus seems to
get involved. So open it once, and keep it open until the JIT
process is shut down.
3. Avoid calling `eeGetMethodFullName()` in DEBUG builds. (Note that
JitTimeLogCsv collects interesting information beyond just per-phase
timings that are also useful for extracting from a CSV file.)

* Fix CachedCyclesPerSecond logging

* Fix clang build break

src/coreclr/jit/compiler.cpp
src/coreclr/jit/compiler.h
src/coreclr/jit/inline.cpp
src/coreclr/jit/utils.cpp
src/coreclr/jit/utils.h

index 3570d37..625b27b 100644 (file)
@@ -1467,6 +1467,8 @@ void Compiler::compShutdown()
             fclose(jitTimeLogFile);
         }
     }
+
+    JitTimer::Shutdown();
 #endif // FEATURE_JIT_METHOD_PERF
 
 #if COUNT_AST_OPERS
@@ -7592,7 +7594,7 @@ void Compiler::compDispCallArgStats(FILE* fout)
 CritSecObject       CompTimeSummaryInfo::s_compTimeSummaryLock;
 CompTimeSummaryInfo CompTimeSummaryInfo::s_compTimeSummary;
 #if MEASURE_CLRAPI_CALLS
-double JitTimer::s_cyclesPerSec = CycleTimer::CyclesPerSecond();
+double JitTimer::s_cyclesPerSec = CachedCyclesPerSecond();
 #endif
 #endif // FEATURE_JIT_METHOD_PERF
 
@@ -7787,7 +7789,7 @@ void CompTimeSummaryInfo::Print(FILE* f)
         return;
     }
     // Otherwise...
-    double countsPerSec = CycleTimer::CyclesPerSecond();
+    double countsPerSec = CachedCyclesPerSecond();
     if (countsPerSec == 0.0)
     {
         fprintf(f, "Processor does not have a high-frequency timer.\n");
@@ -8181,6 +8183,10 @@ void JitTimer::CLRApiCallLeave(unsigned apix)
 
 CritSecObject JitTimer::s_csvLock;
 
+// It's expensive to constantly open and close the file, so open it once and close it
+// when the process exits. This should be accessed under the s_csvLock.
+FILE* JitTimer::s_csvFile = nullptr;
+
 LPCWSTR Compiler::JitTimeLogCsv()
 {
     LPCWSTR jitTimeLogCsv = JitConfig.JitTimeLogCsv();
@@ -8197,40 +8203,45 @@ void JitTimer::PrintCsvHeader()
 
     CritSecHolder csvLock(s_csvLock);
 
-    FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
-    if (fp != nullptr)
+    if (s_csvFile == nullptr)
+    {
+        s_csvFile = _wfopen(jitTimeLogCsv, W("a"));
+    }
+    if (s_csvFile != nullptr)
     {
         // Seek to the end of the file s.t. `ftell` doesn't lie to us on Windows
-        fseek(fp, 0, SEEK_END);
+        fseek(s_csvFile, 0, SEEK_END);
 
         // Write the header if the file is empty
-        if (ftell(fp) == 0)
+        if (ftell(s_csvFile) == 0)
         {
-            fprintf(fp, "\"Method Name\",");
-            fprintf(fp, "\"Assembly or SPMI Index\",");
-            fprintf(fp, "\"IL Bytes\",");
-            fprintf(fp, "\"Basic Blocks\",");
-            fprintf(fp, "\"Min Opts\",");
-            fprintf(fp, "\"Loops Cloned\",");
+            fprintf(s_csvFile, "\"Method Name\",");
+            fprintf(s_csvFile, "\"Assembly or SPMI Index\",");
+            fprintf(s_csvFile, "\"IL Bytes\",");
+            fprintf(s_csvFile, "\"Basic Blocks\",");
+            fprintf(s_csvFile, "\"Min Opts\",");
+            fprintf(s_csvFile, "\"Loops\",");
+            fprintf(s_csvFile, "\"Loops Cloned\",");
 
             for (int i = 0; i < PHASE_NUMBER_OF; i++)
             {
-                fprintf(fp, "\"%s\",", PhaseNames[i]);
+                fprintf(s_csvFile, "\"%s\",", PhaseNames[i]);
                 if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[i])
                 {
-                    fprintf(fp, "\"Node Count After %s\",", PhaseNames[i]);
+                    fprintf(s_csvFile, "\"Node Count After %s\",", PhaseNames[i]);
                 }
             }
 
-            InlineStrategy::DumpCsvHeader(fp);
+            InlineStrategy::DumpCsvHeader(s_csvFile);
+
+            fprintf(s_csvFile, "\"Executable Code Bytes\",");
+            fprintf(s_csvFile, "\"GC Info Bytes\",");
+            fprintf(s_csvFile, "\"Total Bytes Allocated\",");
+            fprintf(s_csvFile, "\"Total Cycles\",");
+            fprintf(s_csvFile, "\"CPS\"\n");
 
-            fprintf(fp, "\"Executable Code Bytes\",");
-            fprintf(fp, "\"GC Info Bytes\",");
-            fprintf(fp, "\"Total Bytes Allocated\",");
-            fprintf(fp, "\"Total Cycles\",");
-            fprintf(fp, "\"CPS\"\n");
+            fflush(s_csvFile);
         }
-        fclose(fp);
     }
 }
 
@@ -8244,8 +8255,14 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
         return;
     }
 
-    // eeGetMethodFullName uses locks, so don't enter crit sec before this call.
-    const char* methName = comp->eeGetMethodFullName(comp->info.compMethodHnd);
+// eeGetMethodFullName uses locks, so don't enter crit sec before this call.
+#if defined(DEBUG) || defined(LATE_DISASM)
+    // If we already have computed the name because for some reason we're generating the CSV
+    // for a DEBUG build (presumably not for the time info), just re-use it.
+    const char* methName = comp->info.compFullName;
+#else
+    const char*          methName  = comp->eeGetMethodFullName(comp->info.compMethodHnd);
+#endif
 
     // Try and access the SPMI index to report in the data set.
     //
@@ -8258,22 +8275,27 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
 
     CritSecHolder csvLock(s_csvLock);
 
-    FILE* fp = _wfopen(jitTimeLogCsv, W("a"));
-    fprintf(fp, "\"%s\",", methName);
+    if (s_csvFile == nullptr)
+    {
+        return;
+    }
+
+    fprintf(s_csvFile, "\"%s\",", methName);
     if (index != 0)
     {
-        fprintf(fp, "%d,", index);
+        fprintf(s_csvFile, "%d,", index);
     }
     else
     {
         const char* methodAssemblyName = comp->info.compCompHnd->getAssemblyName(
             comp->info.compCompHnd->getModuleAssembly(comp->info.compCompHnd->getClassModule(comp->info.compClassHnd)));
-        fprintf(fp, "\"%s\",", methodAssemblyName);
+        fprintf(s_csvFile, "\"%s\",", methodAssemblyName);
     }
-    fprintf(fp, "%u,", comp->info.compILCodeSize);
-    fprintf(fp, "%u,", comp->fgBBcount);
-    fprintf(fp, "%u,", comp->opts.MinOpts());
-    fprintf(fp, "%u,", comp->optLoopsCloned);
+    fprintf(s_csvFile, "%u,", comp->info.compILCodeSize);
+    fprintf(s_csvFile, "%u,", comp->fgBBcount);
+    fprintf(s_csvFile, "%u,", comp->opts.MinOpts());
+    fprintf(s_csvFile, "%u,", comp->optLoopCount);
+    fprintf(s_csvFile, "%u,", comp->optLoopsCloned);
     unsigned __int64 totCycles = 0;
     for (int i = 0; i < PHASE_NUMBER_OF; i++)
     {
@@ -8281,22 +8303,35 @@ void JitTimer::PrintCsvMethodStats(Compiler* comp)
         {
             totCycles += m_info.m_cyclesByPhase[i];
         }
-        fprintf(fp, "%I64u,", m_info.m_cyclesByPhase[i]);
+        fprintf(s_csvFile, "%I64u,", m_info.m_cyclesByPhase[i]);
 
         if ((JitConfig.JitMeasureIR() != 0) && PhaseReportsIRSize[i])
         {
-            fprintf(fp, "%u,", m_info.m_nodeCountAfterPhase[i]);
+            fprintf(s_csvFile, "%u,", m_info.m_nodeCountAfterPhase[i]);
         }
     }
 
-    comp->m_inlineStrategy->DumpCsvData(fp);
+    comp->m_inlineStrategy->DumpCsvData(s_csvFile);
+
+    fprintf(s_csvFile, "%u,", comp->info.compNativeCodeSize);
+    fprintf(s_csvFile, "%Iu,", comp->compInfoBlkSize);
+    fprintf(s_csvFile, "%Iu,", comp->compGetArenaAllocator()->getTotalBytesAllocated());
+    fprintf(s_csvFile, "%I64u,", m_info.m_totalCycles);
+    fprintf(s_csvFile, "%f\n", CachedCyclesPerSecond());
 
-    fprintf(fp, "%u,", comp->info.compNativeCodeSize);
-    fprintf(fp, "%Iu,", comp->compInfoBlkSize);
-    fprintf(fp, "%Iu,", comp->compGetArenaAllocator()->getTotalBytesAllocated());
-    fprintf(fp, "%I64u,", m_info.m_totalCycles);
-    fprintf(fp, "%f\n", CycleTimer::CyclesPerSecond());
-    fclose(fp);
+    fflush(s_csvFile);
+}
+
+// Perform process shutdown actions.
+//
+// static
+void JitTimer::Shutdown()
+{
+    CritSecHolder csvLock(s_csvLock);
+    if (s_csvFile != nullptr)
+    {
+        fclose(s_csvFile);
+    }
 }
 
 // Completes the timing of the current method, and adds it to "sum".
index 43f6fd2..a1ac034 100644 (file)
@@ -1319,6 +1319,7 @@ class JitTimer
     CompTimeInfo m_info; // The CompTimeInfo for this compilation.
 
     static CritSecObject s_csvLock; // Lock to protect the time log file.
+    static FILE*         s_csvFile; // The time log file handle.
     void PrintCsvMethodStats(Compiler* comp);
 
 private:
@@ -1363,6 +1364,8 @@ public:
         }
         return res;
     }
+
+    static void Shutdown();
 };
 #endif // FEATURE_JIT_METHOD_PERF
 
index a2eee7d..e8c8fac 100644 (file)
@@ -1472,7 +1472,7 @@ void InlineStrategy::DumpDataContents(FILE* file)
     unsigned __int64 compCycles               = m_Compiler->getInlineCycleCount();
     if (compCycles > 0)
     {
-        double countsPerSec      = CycleTimer::CyclesPerSecond();
+        double countsPerSec      = CachedCyclesPerSecond();
         double counts            = (double)compCycles;
         microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000);
     }
@@ -1567,7 +1567,7 @@ void InlineStrategy::DumpXml(FILE* file, unsigned indent)
     unsigned __int64 compCycles               = m_Compiler->getInlineCycleCount();
     if (compCycles > 0)
     {
-        double countsPerSec      = CycleTimer::CyclesPerSecond();
+        double countsPerSec      = CachedCyclesPerSecond();
         double counts            = (double)compCycles;
         microsecondsSpentJitting = (unsigned)((counts / countsPerSec) * 1000 * 1000);
     }
index dafa816..cf1494c 100644 (file)
@@ -1792,8 +1792,47 @@ bool MethodSet::IsActiveMethod(const char* methodName, int methodHash)
     return false;
 }
 
+//------------------------------------------------------------------------
+// CachedCyclesPerSecond - Return the cached value of CycleTimer::CyclesPerSecond().
+//
+// Calling CycleTimer::CyclesPerSecond() can be expensive: it runs a loop of non-empty
+// code to compute its value. So call it once and cache the result.
+//
+double CachedCyclesPerSecond()
+{
+    static volatile LONG s_CachedCyclesPerSecondInitialized = 0;
+    static double        s_CachedCyclesPerSecond            = 0.0;
+    static CritSecObject s_CachedCyclesPerSecondLock;
+
+    if (s_CachedCyclesPerSecondInitialized == 1)
+    {
+        return s_CachedCyclesPerSecond;
+    }
+
+    // It wasn't initialized yet, so initialize it. There might be a race,
+    // so lock the update.
+
+    CritSecHolder cachedCyclesPerSecondLock(s_CachedCyclesPerSecondLock);
+
+    if (s_CachedCyclesPerSecondInitialized == 1)
+    {
+        // Someone else initialized it first.
+        return s_CachedCyclesPerSecond;
+    }
+
+    s_CachedCyclesPerSecond = CycleTimer::CyclesPerSecond();
+
+    LONG originalInitializedValue = InterlockedCompareExchange(&s_CachedCyclesPerSecondInitialized, 1, 0);
+    if (originalInitializedValue == 1)
+    {
+        // This is unexpected; the critical section should have protected us.
+    }
+
+    return s_CachedCyclesPerSecond;
+}
+
 #ifdef FEATURE_JIT_METHOD_PERF
-CycleCount::CycleCount() : cps(CycleTimer::CyclesPerSecond())
+CycleCount::CycleCount() : cps(CachedCyclesPerSecond())
 {
 }
 
index 3f08f3e..e22320b 100644 (file)
@@ -766,4 +766,10 @@ int64_t GetSigned64Magic(int64_t d, int* shift /*out*/);
 #endif
 }
 
+//
+// Profiling helpers
+//
+
+double CachedCyclesPerSecond();
+
 #endif // _UTILS_H_