Use SuspendRuntime/ResumeRuntime with async DoStackSnapshot ICorProfilerInfo API calls in sampling thread to get samples
for all managed threads instead of sending Linux signals to all managed threads with sample gathered in signal handler.
Alternative CPU sampling mode also disables Enter/Leave hooks and allows usage of ni.dll.
of this subsystem is to track changes of execution stack.
`PROF_EXECUTION_TRACE` is defaulted to `false`.
+ - PROF_USE_ALTERNATIVE_SAMPLING_EXECUTION_TRACING` - use real sampling profiling API (async `DoStackSnapshot`) instead of instrumentation (Enter/Leave hooks) for execution tracing. Default value is `false` (alternative sampling is disabled).
+
- `PROF_MEMORY_TRACE` - enables Memory Trace subsystem. This subsystem
collects data about allocations. `PROF_MEMORY_TRACE` is defaulted to
`false`.
FetchValue("PROF_CPU_TRACE_TIMEOUT", new_config.CpuTraceTimeoutMs);
FetchValue("PROF_EXECUTION_TRACE", new_config.ExecutionTraceEnabled);
+ FetchValue("PROF_USE_ALTERNATIVE_SAMPLING_EXECUTION_TRACING", new_config.AlternativeSamplingExecutionTraceEnabled);
FetchValue("PROF_MEMORY_TRACE", new_config.MemoryTraceEnabled);
if (new_config.MemoryTraceEnabled &&
new_config.ExecutionTraceEnabled &&
, CpuTraceThreadEnabled(false)
, CpuTraceTimeoutMs(10)
, ExecutionTraceEnabled(false)
+ , AlternativeSamplingExecutionTraceEnabled(false)
, MemoryTraceEnabled(false)
, StackTrackingEnabled(false)
, MemoryMinSizeLimit(0)
}
}
+ if (!ExecutionTraceEnabled)
+ {
+ if (AlternativeSamplingExecutionTraceEnabled)
+ {
+ warnings.push_back(
+ "real sampling requires execution tracing");
+ AlternativeSamplingExecutionTraceEnabled = false;
+ }
+ }
+ if (MemoryTraceEnabled || CollectionMethod != CollectionMethod::Sampling)
+ {
+ if (AlternativeSamplingExecutionTraceEnabled)
+ {
+ warnings.push_back(
+ "real sampling is disabled if memory tracing is required or instrumentation mode is used");
+ AlternativeSamplingExecutionTraceEnabled = false;
+ }
+ }
+
if (!ExecutionTraceEnabled && !MemoryTraceEnabled)
{
// When all traces are disabled.
//
bool ExecutionTraceEnabled;
+ bool AlternativeSamplingExecutionTraceEnabled;
//
// Memory Trace features.
, m_pProfilerInfo7(nullptr)
, m_pProfilerInfo8(nullptr)
, m_pProfilerInfo9(nullptr)
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
, m_pProfilerInfo10(nullptr)
+#endif
, m_version(0)
{
}
HRESULT hr = S_OK; // for case when Initialize() called twice
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
if (m_pProfilerInfo10 == nullptr)
{
hr = pICorProfilerInfoUnk->QueryInterface(
m_pProfilerInfo10);
}
}
+#endif
if (m_pProfilerInfo9 == nullptr)
{
}
}
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
_ASSERTE(m_version < 10 || m_pProfilerInfo10 != nullptr);
+#endif
+
_ASSERTE(m_version < 9 || m_pProfilerInfo9 != nullptr);
_ASSERTE(m_version < 8 || m_pProfilerInfo8 != nullptr);
_ASSERTE(m_version < 7 || m_pProfilerInfo7 != nullptr);
m_pProfilerInfo7 = nullptr;
m_pProfilerInfo8 = nullptr;
m_pProfilerInfo9 = nullptr;
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
m_pProfilerInfo10 = nullptr;
+#endif
m_version = 0;
}
}
return m_pProfilerInfo9;
}
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
ICorProfilerInfo10 *v10() const noexcept
{
_ASSERTE(m_version >= 10);
return m_pProfilerInfo10;
}
+#endif
private:
// Pointers to the implementation of the ProfilerInfo interface(s).
ICorProfilerInfo7 *m_pProfilerInfo7;
ICorProfilerInfo8 *m_pProfilerInfo8;
ICorProfilerInfo9 *m_pProfilerInfo9;
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
ICorProfilerInfo10 *m_pProfilerInfo10;
+#endif
// Version of the Profiler Info API.
unsigned int m_version;
}
}
+ if (config.AlternativeSamplingExecutionTraceEnabled)
+ {
+#ifndef __ICorProfilerInfo10_INTERFACE_DEFINED__
+ config.AlternativeSamplingExecutionTraceEnabled = false;
+ LOG().Warn() << "Real CPU sampling option is disabled";
+#endif
+ }
+
//
// Starting service threads.
//
// This events are required for tracing of call stack dynamics.
- if (config.LineTraceEnabled)
+ if (config.LineTraceEnabled || config.AlternativeSamplingExecutionTraceEnabled)
{
events |= COR_PRF_ENABLE_STACK_SNAPSHOT;
}
m_sampleAction . Release();
}
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
+static HRESULT __stdcall StackCallback(FunctionID funcId, UINT_PTR ip, COR_PRF_FRAME_INFO frameInfo, ULONG32 contextSize, BYTE context[], void *clientData)
+{
+ std::vector< std::pair<FunctionID, UINT_PTR> > *frames = reinterpret_cast<std::vector< std::pair<FunctionID, UINT_PTR> > *>(clientData);
+
+ frames->emplace_back(funcId, ip);
+
+ return S_OK;
+}
+#endif
+
+// return true on successful sample, false otherwise
+bool CommonTrace::DoSampleManually() noexcept
+{
+#ifdef __ICorProfilerInfo10_INTERFACE_DEFINED__
+ // 1. suspend runtime
+ HRESULT hr = m_info.v10()->SuspendRuntime();
+ if (FAILED(hr))
+ {
+ // can't suspend runtime for some reason, ignore this frame
+ // (this can happen when runtime is already suspended, e.g. by GC, etc.)
+ // don't sleep in this case and retry again immediately to try to get the closest frame
+ return false;
+ }
+
+ // 2. check all already registered threads
+ ExecutionTrace &executionTrace = m_profiler.GetExecutionTrace();
+
+ std::vector< std::pair<FunctionID, UINT_PTR> > frames_reverse;
+ std::vector< std::pair<FunctionInfo*, UINT_PTR> > frames;
+
+ auto storage_lock = m_threadStorage.lock_shared();
+ for (ThreadInfo &threadInfo : storage_lock->GetLiveRange())
+ {
+ frames_reverse.clear();
+ frames.clear();
+
+ // 3. do DoStackSnapshot for thread and fill all frames
+ hr = m_info.v2()->DoStackSnapshot(threadInfo.id, StackCallback, COR_PRF_SNAPSHOT_DEFAULT, &frames_reverse, nullptr, 0);
+ if (FAILED(hr))
+ {
+ // ignore this, we can add even zero frames as a sample
+ }
+
+ // 4. Write new frames to EventChannel of thread
+ std::vector< std::pair<FunctionID, UINT_PTR> >::reverse_iterator it = frames_reverse.rbegin();
+ for (; it != frames_reverse.rend(); ++it)
+ {
+ FunctionInfo *funcInfo = nullptr;
+
+ if (it->first == 0)
+ {
+ // unmanaged frame, insert special UnmanagedFunctionInfo if this was managed->unmanaged transition
+ if (it != frames_reverse.rbegin())
+ {
+ std::vector< std::pair<FunctionID, UINT_PTR> >::reverse_iterator prev = std::prev(it);
+
+ if (prev->first)
+ {
+ // managed->unmanaged transition
+ funcInfo = executionTrace.getUnmanagedFuncInfo();
+ }
+ }
+ }
+ else
+ {
+ // managed frame
+ BOOL bHookFunction;
+ funcInfo = reinterpret_cast<FunctionInfo*>(executionTrace.FunctionIDMap(it->first, &bHookFunction));
+ }
+
+ if (funcInfo)
+ {
+ if (m_profiler.GetConfig().StackDepthLimit && // In case `StackDepthLimit` == 0 limit is disabled.
+ frames.size() > m_profiler.GetConfig().StackDepthLimit)
+ {
+ break;
+ }
+
+ UINT_PTR ip = 0;
+ if (m_profiler.GetConfig().LineTraceEnabled)
+ {
+ ip = it->second;
+ }
+ frames.emplace_back(funcInfo, ip);
+ }
+ }
+
+ // insert jit frames
+ size_t jitDepth = threadInfo.eventChannel.getJitDepth();
+ for (size_t i = 0; i < jitDepth; ++i)
+ {
+ if (m_profiler.GetConfig().StackDepthLimit && // In case `StackDepthLimit` == 0 limit is disabled.
+ frames.size() > m_profiler.GetConfig().StackDepthLimit)
+ {
+ break;
+ }
+ frames.emplace_back(executionTrace.getJitFuncInfo(), 0);
+ }
+
+ threadInfo.eventChannel.UpdateFully(frames);
+
+ // 5. create new sample for thread
+ if (threadInfo.eventChannel.Sample(m_profiler.GetTickCountFromInit(), 1))
+ {
+ this->SendDoLog(threadInfo);
+ }
+ }
+
+ // 6. resume runtime
+ hr = m_info.v10()->ResumeRuntime();
+ if (FAILED(hr))
+ {
+ // unable to resume runtime for some reason
+ // consider this as successful sample, let runtime handle this situation in next SuspendRuntime call
+ return true;
+ }
+
+ return true;
+#else
+ _ASSERTE(false);
+ return false;
+#endif
+}
+
__forceinline void CommonTrace::SendDoSample(ThreadInfo &thrInfo) noexcept
{
union sigval val;
{
struct timespec ts;
- if (itThrInfo == endThrInfo)
+ if (!m_profiler.GetConfig().AlternativeSamplingExecutionTraceEnabled)
{
+ if (itThrInfo == endThrInfo)
{
- auto storage_lock = this->GetThreadStorageShared();
- liveThreads = storage_lock->GetLiveContainer();
+ {
+ auto storage_lock = this->GetThreadStorageShared();
+ liveThreads = storage_lock->GetLiveContainer();
+ }
+ itThrInfo = liveThreads.begin();
+ endThrInfo = liveThreads.end();
+ splitter.Reset(liveThreads.size());
}
- itThrInfo = liveThreads.begin();
- endThrInfo = liveThreads.end();
- splitter.Reset(liveThreads.size());
- }
-
- if (!liveThreads.empty())
- {
- auto storage_lock = this->GetThreadStorageShared();
- ThreadInfo &thrInfo = *itThrInfo++;
- // We update all live threads if they are attached to OS
- // threads.
- if (thrInfo.id != 0 && thrInfo.nativeHandle != 0)
+ if (!liveThreads.empty())
{
- thrInfo.genTicks++; // OK with unsigned overflows.
- if (m_profiler.GetConfig().HighGranularityEnabled)
+ auto storage_lock = this->GetThreadStorageShared();
+ ThreadInfo &thrInfo = *itThrInfo++;
+
+ // We update all live threads if they are attached to OS
+ // threads.
+ if (thrInfo.id != 0 && thrInfo.nativeHandle != 0)
{
- this->SendDoSample(thrInfo);
+ thrInfo.genTicks++; // OK with unsigned overflows.
+ if (m_profiler.GetConfig().HighGranularityEnabled)
+ {
+ this->SendDoSample(thrInfo);
+ }
}
- }
- ts = MsToTS(splitter.GetNext());
+ ts = MsToTS(splitter.GetNext());
+ }
+ else
+ {
+ ts = MsToTS(m_profiler.GetConfig().SamplingTimeoutMs);
+ }
}
else
{
- ts = MsToTS(m_profiler.GetConfig().SamplingTimeoutMs);
+ bool successful = this->DoSampleManually();
+ if (successful)
+ {
+ ts = MsToTS(m_profiler.GetConfig().SamplingTimeoutMs);
+ }
+ else
+ {
+ ts.tv_sec = 0;
+ ts.tv_nsec = 0;
+ }
}
// NOTE: Sleep() function has better precision so we use it
- // for short pauses.
- if (ts.tv_sec == 0)
+ // for short pauses (except 0 pauses).
+ if (ts.tv_sec == 0 && ts.tv_nsec != 0)
{
Sleep(ts.tv_nsec / 1000000);
ts.tv_nsec = 0;
InternalID threadIid =
m_threadStorage.lock()->Place(threadId).first.internalId;
TRACE().DumpThreadCreated(threadId, threadIid);
+
+ ThreadID currentThreadId = 0;
+ m_info.v1()->GetCurrentThreadID(¤tThreadId);
+
+ if (currentThreadId == threadId)
+ {
+ // init local thread cache if we're on the same thread (this is used in jit related callbacks)
+ ThreadInfo *pThreadInfo = m_profiler.GetCommonTrace().GetThreadInfo();
+ }
}
catch (const std::exception &e)
{
threadIid = thrInfo.internalId;
}
TRACE().DumpThreadAssignedToOSThread(threadIid, osThreadId);
+
+ ThreadID currentThreadId = 0;
+ m_info.v1()->GetCurrentThreadID(¤tThreadId);
+
+ if (currentThreadId == managedThreadId)
+ {
+ // init local thread cache if we're on the same thread (this is used in jit related callbacks)
+ ThreadInfo *pThreadInfo = m_profiler.GetCommonTrace().GetThreadInfo();
+ }
}
catch (const std::exception &e)
{
SAMPLING_EVENT_RESUME,
};
+ bool DoSampleManually() noexcept;
+
void SendDoSample(ThreadInfo &thrInfo) noexcept;
void SendDoLog(ThreadInfo &thrInfo) noexcept;
, m_buffer(EVENT_CHANNEL_START_CAP)
, m_mutex()
, m_bufferCapacityIncreaseIsPlanned(false)
+ , m_jitDepth(0)
{}
private:
// Writer methods.
//
+ void UpdateFully(std::vector< std::pair<FunctionInfo*, UINT_PTR> > &frames) noexcept
+ {
+ size_t size = m_stack.size();
+ for (size_t i = 0; i < size; ++i)
+ {
+ Pop();
+ }
+
+ for (size_t i = 0; i < frames.size(); ++i)
+ {
+ Push(*frames[i].first, frames[i].second);
+ }
+ }
+
+ void Push(const FunctionInfo &funcInfo, UINT_PTR ip) noexcept
+ {
+ assert(m_currentState.matchPrefixSize <= m_stack.size());
+ m_stack.push_back(Frame{&funcInfo, ip});
+
+ // XXX: exception in this call will terminate process!
+ this->EnsureBufferCapacity(); // Perform planned reallocation.
+ }
+
void Push(const FunctionInfo &funcInfo) noexcept
{
assert(m_currentState.matchPrefixSize <= m_stack.size());
m_mutex.unlock();
}
+ void increaseJitDepth() noexcept
+ {
+ ++m_jitDepth;
+ }
+
+ void decreaseJitDepth() noexcept
+ {
+ assert(jitDepth != 0);
+ --m_jitDepth;
+ }
+
+ size_t getJitDepth() noexcept
+ {
+ return m_jitDepth;
+ }
+
private:
Stack m_stack;
EventSummary m_currentState;
ring_buffer<EventSummary> m_buffer;
std::mutex m_mutex;
bool m_bufferCapacityIncreaseIsPlanned;
+
+ std::atomic<size_t> m_jitDepth;
};
#endif // _EVENT_CHANNEL_H_
m_info.v3()->SetFunctionIDMapper2(FunctionIDMapStub, this);
- hr = m_info.v3()->SetEnterLeaveFunctionHooks3(
- EnterNaked3, LeaveNaked3, TailcallNaked3);
- if (FAILED(hr))
+ if (!config.AlternativeSamplingExecutionTraceEnabled)
{
- m_profiler.HandleHresult(
- "ExecutionTrace::ProcessConfig(): "
- "SetEnterLeaveFunctionHooks3()", hr
- );
- goto next_stage;
- }
+ hr = m_info.v3()->SetEnterLeaveFunctionHooks3(
+ EnterNaked3, LeaveNaked3, TailcallNaked3);
+ if (FAILED(hr))
+ {
+ m_profiler.HandleHresult(
+ "ExecutionTrace::ProcessConfig(): "
+ "SetEnterLeaveFunctionHooks3()", hr
+ );
+ goto next_stage;
+ }
- // This events are required for tracing of call stack dynamics.
- events = events
- | COR_PRF_MONITOR_ENTERLEAVE
- | COR_PRF_MONITOR_CODE_TRANSITIONS
- | COR_PRF_MONITOR_EXCEPTIONS;
+ // This events are required for tracing of call stack dynamics.
+ events = events
+ | COR_PRF_MONITOR_ENTERLEAVE
+ | COR_PRF_MONITOR_CODE_TRANSITIONS
+ | COR_PRF_MONITOR_EXCEPTIONS;
+ }
}
next_stage:
ThreadInfo *pThreadInfo = m_profiler.GetCommonTrace().GetThreadInfoR();
if (pThreadInfo != nullptr)
{
- if (m_profiler.GetConfig().LineTraceEnabled)
+ if (!m_profiler.GetConfig().AlternativeSamplingExecutionTraceEnabled)
{
- this->UpdateCallStackPush(*m_pJitFunctionInfo, 0);
+ if (m_profiler.GetConfig().LineTraceEnabled)
+ {
+ this->UpdateCallStackPush(*m_pJitFunctionInfo, 0);
+ }
+ else
+ {
+ this->UpdateCallStackPush(*m_pJitFunctionInfo);
+ }
}
else
{
- this->UpdateCallStackPush(*m_pJitFunctionInfo);
+ pThreadInfo->eventChannel.increaseJitDepth();
}
}
ThreadInfo *pThreadInfo = m_profiler.GetCommonTrace().GetThreadInfoR();
if (pThreadInfo != nullptr)
{
- this->UpdateCallStackPop();
+ if (!m_profiler.GetConfig().AlternativeSamplingExecutionTraceEnabled)
+ {
+ this->UpdateCallStackPop();
+ }
+ else
+ {
+ pThreadInfo->eventChannel.decreaseJitDepth();
+ }
}
__forceinline void AfterSample(
ThreadInfo &thrInfo, SamplingSharedState &state) noexcept;
+ FunctionInfo * getUnmanagedFuncInfo() noexcept
+ {
+ return m_pUnmanagedFunctionInfo;
+ }
+
+ FunctionInfo * getJitFuncInfo() noexcept
+ {
+ return m_pJitFunctionInfo;
+ }
+
private:
//
// Various useful instance methods.