From b0daee58d37d64369adebd3350ee1e5683032b1b Mon Sep 17 00:00:00 2001 From: Koundinya Veluri Date: Mon, 10 Jun 2019 23:27:02 -0700 Subject: [PATCH] Add optimization tiers to the Linux perf maps for perfcollect (dotnet/coreclr#24967) Add optimization tiers to the Linux perf maps for perfcollect Fixes https://github.com/dotnet/coreclr/issues/23222: - It looks like module unloads are currently not taken into account. Once they would be taken into account, Although we have method JIT events from `lttng` with the code address and optimization tier, samples can only be associated with method JIT events by associating the time range when the module is loaded with times of samples, and the event times from `lttng` would not necessarily correspond with times from samples taken by `perf`. - Updated to include the optimization tier in the perf map for each jitted or R2R method code address - Refactored common code between eventtrace and perfmap for getting jit tiers Commit migrated from https://github.com/dotnet/coreclr/commit/d5906ecce405c8ff9476a1ea3d9a297bf5d9991c --- src/coreclr/src/inc/clrconfigvalues.h | 1 + src/coreclr/src/inc/eventtracebase.h | 11 ------ src/coreclr/src/vm/eventtrace.cpp | 52 +++-------------------------- src/coreclr/src/vm/method.hpp | 17 ++++++++++ src/coreclr/src/vm/perfmap.cpp | 44 ++++++++++++++++++------ src/coreclr/src/vm/perfmap.h | 9 +++-- src/coreclr/src/vm/prestub.cpp | 63 ++++++++++++++++++++++++++++++++++- 7 files changed, 124 insertions(+), 73 deletions(-) diff --git a/src/coreclr/src/inc/clrconfigvalues.h b/src/coreclr/src/inc/clrconfigvalues.h index 03c6569..7713af5 100644 --- a/src/coreclr/src/inc/clrconfigvalues.h +++ b/src/coreclr/src/inc/clrconfigvalues.h @@ -569,6 +569,7 @@ RETAIL_CONFIG_DWORD_INFO(UNSUPPORTED_ProfAPI_ValidateNGENInstrumentation, W("Pro #ifdef FEATURE_PERFMAP RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapEnabled, W("PerfMapEnabled"), 0, "This flag is used on Linux to enable writing /tmp/perf-$pid.map. It is disabled by default", CLRConfig::REGUTIL_default) RETAIL_CONFIG_DWORD_INFO_EX(EXTERNAL_PerfMapIgnoreSignal, W("PerfMapIgnoreSignal"), 0, "When perf map is enabled, this option will configure the specified signal to be accepted and ignored as a marker in the perf logs. It is disabled by default", CLRConfig::REGUTIL_default) +RETAIL_CONFIG_DWORD_INFO(EXTERNAL_PerfMapShowOptimizationTiers, W("PerfMapShowOptimizationTiers"), 1, "Shows optimization tiers in the perf map for methods, as part of the symbol name. Useful for seeing separate stack frames for different optimization tiers of each method.") #endif RETAIL_CONFIG_STRING_INFO(EXTERNAL_StartupDelayMS, W("StartupDelayMS"), "") diff --git a/src/coreclr/src/inc/eventtracebase.h b/src/coreclr/src/inc/eventtracebase.h index b4be4df..43cc99a 100644 --- a/src/coreclr/src/inc/eventtracebase.h +++ b/src/coreclr/src/inc/eventtracebase.h @@ -607,17 +607,6 @@ namespace ETW }MethodStructs; - enum class JitOptimizationTier - { - Unknown, // to identify older runtimes that would send this value - MinOptJitted, - Optimized, - QuickJitted, - OptimizedTier1, - - Count - }; - static const UINT8 MethodFlagsJitOptimizationTierShift = 7; static const unsigned int MethodFlagsJitOptimizationTierLowMask = 0x7; diff --git a/src/coreclr/src/vm/eventtrace.cpp b/src/coreclr/src/vm/eventtrace.cpp index 8489fff..400fdf2 100644 --- a/src/coreclr/src/vm/eventtrace.cpp +++ b/src/coreclr/src/vm/eventtrace.cpp @@ -6322,7 +6322,6 @@ VOID ETW::MethodLog::SendMethodEvent(MethodDesc *pMethodDesc, DWORD dwEventOptio if(pMethodDesc->GetMethodTable_NoLogging()) bIsGenericMethod = pMethodDesc->HasClassOrMethodInstantiation_NoLogging(); - int jitOptimizationTier = -1; NativeCodeVersionId nativeCodeId = 0; ulMethodFlags = ulMethodFlags | (bHasSharedGenericCode ? ETW::MethodLog::MethodStructs::SharedGenericCode : 0) | @@ -6340,59 +6339,16 @@ VOID ETW::MethodLog::SendMethodEvent(MethodDesc *pMethodDesc, DWORD dwEventOptio ulMethodFlags |= ETW::MethodLog::MethodStructs::ReadyToRunRejectedPrecompiledCode; } - if (pConfig->JitSwitchedToMinOpt()) - { - jitOptimizationTier = (int)JitOptimizationTier::MinOptJitted; - } -#ifdef FEATURE_TIERED_COMPILATION - else if (pConfig->JitSwitchedToOptimized()) - { - _ASSERTE(pMethodDesc->IsEligibleForTieredCompilation()); - _ASSERTE(pConfig->GetCodeVersion().GetOptimizationTier() == NativeCodeVersion::OptimizationTierOptimized); - jitOptimizationTier = (int)JitOptimizationTier::Optimized; - } - else if (pMethodDesc->IsEligibleForTieredCompilation()) - { - switch (pConfig->GetCodeVersion().GetOptimizationTier()) - { - case NativeCodeVersion::OptimizationTier0: - jitOptimizationTier = (int)JitOptimizationTier::QuickJitted; - break; - - case NativeCodeVersion::OptimizationTier1: - jitOptimizationTier = (int)JitOptimizationTier::OptimizedTier1; - break; - - case NativeCodeVersion::OptimizationTierOptimized: - jitOptimizationTier = (int)JitOptimizationTier::Optimized; - break; - - default: - UNREACHABLE(); - } - } -#endif - #ifdef FEATURE_CODE_VERSIONING nativeCodeId = pConfig->GetCodeVersion().GetVersionId(); #endif } - if (jitOptimizationTier < 0) - { - if (pMethodDesc->IsJitOptimizationDisabled()) - { - jitOptimizationTier = (int)JitOptimizationTier::MinOptJitted; - } - else - { - jitOptimizationTier = (int)JitOptimizationTier::Optimized; - } - } - static_assert_no_msg((unsigned int)JitOptimizationTier::Count - 1 <= MethodFlagsJitOptimizationTierLowMask); - _ASSERTE((unsigned int)jitOptimizationTier <= MethodFlagsJitOptimizationTierLowMask); + unsigned int jitOptimizationTier = (unsigned int)PrepareCodeConfig::GetJitOptimizationTier(pConfig, pMethodDesc); + static_assert_no_msg((unsigned int)PrepareCodeConfig::JitOptimizationTier::Count - 1 <= MethodFlagsJitOptimizationTierLowMask); + _ASSERTE(jitOptimizationTier <= MethodFlagsJitOptimizationTierLowMask); _ASSERTE(((ulMethodFlags >> MethodFlagsJitOptimizationTierShift) & MethodFlagsJitOptimizationTierLowMask) == 0); - ulMethodFlags |= (unsigned int)jitOptimizationTier << MethodFlagsJitOptimizationTierShift; + ulMethodFlags |= jitOptimizationTier << MethodFlagsJitOptimizationTierShift; // Intentionally set the extent flags (cold vs. hot) only after all the other common // flags (above) have been set. diff --git a/src/coreclr/src/vm/method.hpp b/src/coreclr/src/vm/method.hpp index 3122e4e..e77ff9ce 100644 --- a/src/coreclr/src/vm/method.hpp +++ b/src/coreclr/src/vm/method.hpp @@ -2068,6 +2068,21 @@ public: void SetReadyToRunRejectedPrecompiledCode(); #ifndef CROSSGEN_COMPILE +public: + enum class JitOptimizationTier : UINT8 + { + Unknown, // to identify older runtimes that would send this value + MinOptJitted, + Optimized, + QuickJitted, + OptimizedTier1, + + Count + }; + + static JitOptimizationTier GetJitOptimizationTier(PrepareCodeConfig *config, MethodDesc *methodDesc); + static const char *GetJitOptimizationTierStr(PrepareCodeConfig *config, MethodDesc *methodDesc); + bool JitSwitchedToMinOpt() const { LIMITED_METHOD_CONTRACT; @@ -2085,6 +2100,7 @@ public: } #ifdef FEATURE_TIERED_COMPILATION +public: bool JitSwitchedToOptimized() const { LIMITED_METHOD_CONTRACT; @@ -2102,6 +2118,7 @@ public: } #endif +public: PrepareCodeConfig *GetNextInSameThread() const { LIMITED_METHOD_CONTRACT; diff --git a/src/coreclr/src/vm/perfmap.cpp b/src/coreclr/src/vm/perfmap.cpp index 593ab8d..968e937 100644 --- a/src/coreclr/src/vm/perfmap.cpp +++ b/src/coreclr/src/vm/perfmap.cpp @@ -23,6 +23,7 @@ #endif PerfMap * PerfMap::s_Current = nullptr; +bool PerfMap::s_ShowOptimizationTiers = false; // Initialize the map for the process - called from EEStartupHelper. void PerfMap::Initialize() @@ -44,6 +45,11 @@ void PerfMap::Initialize() { PAL_IgnoreProfileSignal(signalNum); } + + if (CLRConfig::GetConfigValue(CLRConfig::EXTERNAL_PerfMapShowOptimizationTiers) != 0) + { + s_ShowOptimizationTiers = true; + } } } @@ -156,7 +162,7 @@ void PerfMap::WriteLine(SString& line) } // Log a method to the map. -void PerfMap::LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize) +void PerfMap::LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, const char *optimizationTier) { CONTRACTL{ THROWS; @@ -183,7 +189,15 @@ void PerfMap::LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize) // Build the map file line. StackScratchBuffer scratch; SString line; - line.Printf(FMT_CODE_ADDR " %x %s\n", pCode, codeSize, fullMethodSignature.GetANSI(scratch)); + line.Printf(FMT_CODE_ADDR " %x %s", pCode, codeSize, fullMethodSignature.GetANSI(scratch)); + if (optimizationTier != nullptr && s_ShowOptimizationTiers) + { + line.AppendPrintf("[%s]\n", optimizationTier); + } + else + { + line.Append(W('\n')); + } // Write the line. WriteLine(line); @@ -229,14 +243,24 @@ void PerfMap::LogImage(PEFile * pFile) // Log a method to the map. -void PerfMap::LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize) +void PerfMap::LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, PrepareCodeConfig *pConfig) { LIMITED_METHOD_CONTRACT; - if (s_Current != nullptr) + if (s_Current == nullptr) + { + return; + } + + const char *optimizationTier = nullptr; +#ifndef CROSSGEN_COMPILE + if (s_ShowOptimizationTiers) { - s_Current->LogMethod(pMethod, pCode, codeSize); + optimizationTier = PrepareCodeConfig::GetJitOptimizationTierStr(pConfig, pMethod); } +#endif // CROSSGEN_COMPILE + + s_Current->LogMethod(pMethod, pCode, codeSize, optimizationTier); } // Log a set of stub to the map. @@ -333,7 +357,7 @@ void NativeImagePerfMap::LogDataForModule(Module * pModule) MethodDesc *hotDesc = mi.GetMethodDesc(); hotDesc->CheckRestore(); - LogPreCompiledMethod(hotDesc, mi.GetMethodStartAddress(), baseAddr); + LogPreCompiledMethod(hotDesc, mi.GetMethodStartAddress(), baseAddr, nullptr); } return; } @@ -344,12 +368,12 @@ void NativeImagePerfMap::LogDataForModule(Module * pModule) { MethodDesc* hotDesc = mi.GetMethodDesc(); - LogPreCompiledMethod(hotDesc, mi.GetMethodStartAddress(), baseAddr); + LogPreCompiledMethod(hotDesc, mi.GetMethodStartAddress(), baseAddr, "ReadyToRun"); } } // Log a pre-compiled method to the perfmap. -void NativeImagePerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode, SIZE_T baseAddr) +void NativeImagePerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode, SIZE_T baseAddr, const char *optimizationTier) { STANDARD_VM_CONTRACT; @@ -364,12 +388,12 @@ void NativeImagePerfMap::LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode, // Emit an entry for each section if it is used. if (methodRegionInfo.hotSize > 0) { - LogMethod(pMethod, (PCODE)methodRegionInfo.hotStartAddress - baseAddr, methodRegionInfo.hotSize); + LogMethod(pMethod, (PCODE)methodRegionInfo.hotStartAddress - baseAddr, methodRegionInfo.hotSize, optimizationTier); } if (methodRegionInfo.coldSize > 0) { - LogMethod(pMethod, (PCODE)methodRegionInfo.coldStartAddress - baseAddr, methodRegionInfo.coldSize); + LogMethod(pMethod, (PCODE)methodRegionInfo.coldStartAddress - baseAddr, methodRegionInfo.coldSize, optimizationTier); } } diff --git a/src/coreclr/src/vm/perfmap.h b/src/coreclr/src/vm/perfmap.h index 1f06bd4..23f1812 100644 --- a/src/coreclr/src/vm/perfmap.h +++ b/src/coreclr/src/vm/perfmap.h @@ -19,6 +19,9 @@ private: // The one and only PerfMap for the process. static PerfMap * s_Current; + // Indicates whether optimization tiers should be shown for methods in perf maps + static bool s_ShowOptimizationTiers; + // The file stream to write the map to. CFileStream * m_FileStream; @@ -49,7 +52,7 @@ protected: void OpenFile(SString& path); // Does the actual work to log a method to the map. - void LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize); + void LogMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, const char *optimizationTier); // Does the actual work to log an image void LogImage(PEFile * pFile); @@ -65,7 +68,7 @@ public: static void LogImageLoad(PEFile * pFile); // Log a JIT compiled method to the map. - static void LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize); + static void LogJITCompiledMethod(MethodDesc * pMethod, PCODE pCode, size_t codeSize, PrepareCodeConfig *pConfig); // Log a set of stub to the map. static void LogStubs(const char* stubType, const char* stubOwner, PCODE pCode, size_t codeSize); @@ -79,7 +82,7 @@ class NativeImagePerfMap : PerfMap { private: // Log a pre-compiled method to the map. - void LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode, SIZE_T baseAddr); + void LogPreCompiledMethod(MethodDesc * pMethod, PCODE pCode, SIZE_T baseAddr, const char *optimizationTier); public: // Construct a new map for a native image. diff --git a/src/coreclr/src/vm/prestub.cpp b/src/coreclr/src/vm/prestub.cpp index 056fdb7..d83c419 100644 --- a/src/coreclr/src/vm/prestub.cpp +++ b/src/coreclr/src/vm/prestub.cpp @@ -867,7 +867,7 @@ PCODE MethodDesc::JitCompileCodeLockedEventWrapper(PrepareCodeConfig* pConfig, J { #ifdef FEATURE_PERFMAP // Save the JIT'd method information so that perf can resolve JIT'd call frames. - PerfMap::LogJITCompiledMethod(this, pCode, sizeOfCode); + PerfMap::LogJITCompiledMethod(this, pCode, sizeOfCode, pConfig); #endif } @@ -1143,6 +1143,67 @@ BOOL PrepareCodeConfig::MayUsePrecompiledCode() return m_mayUsePrecompiledCode; } +PrepareCodeConfig::JitOptimizationTier PrepareCodeConfig::GetJitOptimizationTier( + PrepareCodeConfig *config, + MethodDesc *methodDesc) +{ + WRAPPER_NO_CONTRACT; + _ASSERTE(methodDesc != nullptr); + _ASSERTE(config == nullptr || methodDesc == config->GetMethodDesc()); + + if (config != nullptr) + { + if (config->JitSwitchedToMinOpt()) + { + return JitOptimizationTier::MinOptJitted; + } + #ifdef FEATURE_TIERED_COMPILATION + else if (config->JitSwitchedToOptimized()) + { + _ASSERTE(methodDesc->IsEligibleForTieredCompilation()); + _ASSERTE(config->GetCodeVersion().GetOptimizationTier() == NativeCodeVersion::OptimizationTierOptimized); + return JitOptimizationTier::Optimized; + } + else if (methodDesc->IsEligibleForTieredCompilation()) + { + switch (config->GetCodeVersion().GetOptimizationTier()) + { + case NativeCodeVersion::OptimizationTier0: + return JitOptimizationTier::QuickJitted; + + case NativeCodeVersion::OptimizationTier1: + return JitOptimizationTier::OptimizedTier1; + + case NativeCodeVersion::OptimizationTierOptimized: + return JitOptimizationTier::Optimized; + + default: + UNREACHABLE(); + } + } + #endif + } + + return methodDesc->IsJitOptimizationDisabled() ? JitOptimizationTier::MinOptJitted : JitOptimizationTier::Optimized; +} + +const char *PrepareCodeConfig::GetJitOptimizationTierStr(PrepareCodeConfig *config, MethodDesc *methodDesc) +{ + WRAPPER_NO_CONTRACT; + + switch (GetJitOptimizationTier(config, methodDesc)) + { + case JitOptimizationTier::Unknown: return "Unknown"; + case JitOptimizationTier::MinOptJitted: return "MinOptJitted"; + case JitOptimizationTier::Optimized: return "Optimized"; + case JitOptimizationTier::QuickJitted: return "QuickJitted"; + case JitOptimizationTier::OptimizedTier1: return "OptimizedTier1"; + + default: + UNREACHABLE(); + } +} + #ifdef FEATURE_CODE_VERSIONING VersionedPrepareCodeConfig::VersionedPrepareCodeConfig() {} -- 2.7.4