From 1f43fc7f18a866301193d3f68911b0f0c37eb2d3 Mon Sep 17 00:00:00 2001 From: Katelyn Gadd Date: Mon, 6 Feb 2023 17:09:47 -0800 Subject: [PATCH] [wasm] Migrate jiterpreter hit counting to C (#81729) * Move jiterpreter trace hit counting to C for better performance --- src/mono/mono/mini/interp/interp.c | 2 +- src/mono/mono/mini/interp/jiterpreter.c | 97 ++++++++++++++++++++++++++++++++- src/mono/mono/mini/interp/jiterpreter.h | 10 +++- src/mono/wasm/runtime/cwraps.ts | 2 + src/mono/wasm/runtime/jiterpreter.ts | 70 +++++++++++------------- 5 files changed, 140 insertions(+), 41 deletions(-) diff --git a/src/mono/mono/mini/interp/interp.c b/src/mono/mono/mini/interp/interp.c index 81ccb95..ab5b9d7 100644 --- a/src/mono/mono/mini/interp/interp.c +++ b/src/mono/mono/mini/interp/interp.c @@ -7609,7 +7609,7 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK; * JS workers in order to register them at the appropriate slots in the function pointer * table. when growing the function pointer table we will also need to synchronize that. */ - JiterpreterThunk prepare_result = mono_interp_tier_prepare_jiterpreter(frame, frame->imethod->method, ip, frame->imethod->jinfo->code_start, frame->imethod->jinfo->code_size); + JiterpreterThunk prepare_result = mono_interp_tier_prepare_jiterpreter_fast(frame, frame->imethod->method, ip, frame->imethod->jinfo->code_start, frame->imethod->jinfo->code_size); switch ((guint32)(void*)prepare_result) { case JITERPRETER_TRAINING: // jiterpreter still updating hit count before deciding to generate a trace, diff --git a/src/mono/mono/mini/interp/jiterpreter.c b/src/mono/mono/mini/interp/jiterpreter.c index 0864322..60664b3 100644 --- a/src/mono/mono/mini/interp/jiterpreter.c +++ b/src/mono/mono/mini/interp/jiterpreter.c @@ -42,6 +42,7 @@ void jiterp_preserve_module (void); #include #include #include +#include #include "jiterpreter.h" @@ -861,6 +862,57 @@ should_generate_trace_here (InterpBasicBlock *bb) { return FALSE; } +typedef struct { + // 64-bits because it can get very high if estimate heat is turned on + gint64 hit_count; + JiterpreterThunk thunk; +} TraceInfo; + +#define MAX_TRACE_SEGMENTS 256 +#define TRACE_SEGMENT_SIZE 1024 + +static volatile gint32 trace_count = 0; +static TraceInfo *trace_segments[MAX_TRACE_SEGMENTS] = { NULL }; + +static TraceInfo * +trace_info_allocate_segment (gint32 index) { + g_assert (index < MAX_TRACE_SEGMENTS); + + volatile gpointer *slot = (volatile gpointer *)&trace_segments[index]; + gpointer segment = g_malloc0 (sizeof(TraceInfo) * TRACE_SEGMENT_SIZE); + gpointer result = mono_atomic_cas_ptr (slot, segment, NULL); + if (result != NULL) { + g_free (segment); + return (TraceInfo *)result; + } else { + return (TraceInfo *)segment; + } +} + +static TraceInfo * +trace_info_get (gint32 index) { + g_assert (index >= 0); + int segment_index = index / TRACE_SEGMENT_SIZE, + element_index = index % TRACE_SEGMENT_SIZE; + + g_assert (segment_index < MAX_TRACE_SEGMENTS); + + TraceInfo *segment = trace_segments[segment_index]; + if (!segment) + segment = trace_info_allocate_segment (segment_index); + + return &segment[element_index]; +} + +static gint32 +trace_info_alloc () { + gint32 index = trace_count++; + TraceInfo *info = trace_info_get (index); + info->hit_count = 0; + info->thunk = NULL; + return index; +} + /* * Insert jiterpreter entry points at the correct candidate locations: * The first basic block of the function, @@ -907,8 +959,12 @@ jiterp_insert_entry_points (void *_td) should_generate = TRUE; if (enabled && should_generate) { + gint32 trace_index = trace_info_alloc (); + td->cbb = bb; - mono_jiterp_insert_ins (td, NULL, MINT_TIER_PREPARE_JITERPRETER); + InterpInst *ins = mono_jiterp_insert_ins (td, NULL, MINT_TIER_PREPARE_JITERPRETER); + memcpy(ins->data, &trace_index, sizeof (trace_index)); + // Note that we only clear enter_at_next here, after generating a trace. // This means that the flag will stay set intentionally if we keep failing // to generate traces, perhaps due to a string of small basic blocks @@ -918,6 +974,45 @@ jiterp_insert_entry_points (void *_td) } } +EMSCRIPTEN_KEEPALIVE double +mono_jiterp_get_trace_hit_count (gint32 trace_index) { + return trace_info_get (trace_index)->hit_count; +} + +JiterpreterThunk +mono_interp_tier_prepare_jiterpreter_fast ( + void *frame, MonoMethod *method, const guint16 *ip, + const guint16 *start_of_body, int size_of_body +) { + if (!mono_opt_jiterpreter_traces_enabled) + return (JiterpreterThunk)(void*)JITERPRETER_NOT_JITTED; + + guint32 trace_index = READ32 (ip + 1); + TraceInfo *trace_info = trace_info_get (trace_index); + g_assert (trace_info); + + if (trace_info->thunk) + return trace_info->thunk; + +#ifdef DISABLE_THREADS + gint64 count = trace_info->hit_count++; +#else + gint64 count = mono_atomic_inc_i64(&trace_info->hit_count); +#endif + + if (count == mono_opt_jiterpreter_minimum_trace_hit_count) { + JiterpreterThunk result = mono_interp_tier_prepare_jiterpreter( + frame, method, ip, (gint32)trace_index, + start_of_body, size_of_body + ); + trace_info->thunk = result; + return result; + } else { + // Hit count not reached, or already reached but compilation is not done yet + return (JiterpreterThunk)(void*)JITERPRETER_TRAINING; + } +} + // Used to parse runtime options that control the jiterpreter. This is *also* used at runtime // by the jiterpreter typescript to reconfigure the jiterpreter, for example if WASM EH is not // actually available even though it was enabled (to turn it off). diff --git a/src/mono/mono/mini/interp/jiterpreter.h b/src/mono/mono/mini/interp/jiterpreter.h index 4def33e..7552e20 100644 --- a/src/mono/mono/mini/interp/jiterpreter.h +++ b/src/mono/mono/mini/interp/jiterpreter.h @@ -52,10 +52,18 @@ mono_interp_jit_wasm_entry_trampoline ( int unbox, int has_this, int has_return, const char *name, void *default_implementation ); +// Fast-path implemented in C +JiterpreterThunk +mono_interp_tier_prepare_jiterpreter_fast ( + void *frame, MonoMethod *method, const guint16 *ip, + const guint16 *start_of_body, int size_of_body +); + // HACK: Pass void* so that this header can include safely in files without definition for InterpFrame +// Slow-path implemented in TypeScript, actually performs JIT extern JiterpreterThunk mono_interp_tier_prepare_jiterpreter ( - void *frame, MonoMethod *method, const guint16 *ip, + void *frame, MonoMethod *method, const guint16 *ip, gint32 trace_index, const guint16 *start_of_body, int size_of_body ); diff --git a/src/mono/wasm/runtime/cwraps.ts b/src/mono/wasm/runtime/cwraps.ts index 8625c80..c77eae3 100644 --- a/src/mono/wasm/runtime/cwraps.ts +++ b/src/mono/wasm/runtime/cwraps.ts @@ -123,6 +123,7 @@ const fn_signatures: SigLine[] = [ [true, "mono_jiterp_type_to_stind", "number", ["number"]], [true, "mono_jiterp_imethod_to_ftnptr", "number", ["number"]], [true, "mono_jiterp_debug_count", "number", []], + [true, "mono_jiterp_get_trace_hit_count", "number", ["number"]], ]; export interface t_Cwraps { @@ -264,6 +265,7 @@ export interface t_Cwraps { mono_jiterp_type_to_stind(type: MonoType): number; mono_jiterp_imethod_to_ftnptr(imethod: VoidPtr): VoidPtr; mono_jiterp_debug_count(): number; + mono_jiterp_get_trace_hit_count(traceIndex: number): number; } const wrapped_c_functions: t_Cwraps = {}; diff --git a/src/mono/wasm/runtime/jiterpreter.ts b/src/mono/wasm/runtime/jiterpreter.ts index 63bd416..b236e2e 100644 --- a/src/mono/wasm/runtime/jiterpreter.ts +++ b/src/mono/wasm/runtime/jiterpreter.ts @@ -78,14 +78,18 @@ export class InstrumentedTraceState { export class TraceInfo { ip: MintOpcodePtr; - hitCount: number; + index: number; // used to look up hit count name: string | undefined; abortReason: string | undefined; fnPtr: Number | undefined; - constructor (ip: MintOpcodePtr) { + constructor (ip: MintOpcodePtr, index: number) { this.ip = ip; - this.hitCount = 1; + this.index = index; + } + + get hitCount () { + return cwraps.mono_jiterp_get_trace_hit_count(this.index); } } @@ -763,7 +767,7 @@ const JITERPRETER_TRAINING = 0; const JITERPRETER_NOT_JITTED = 1; export function mono_interp_tier_prepare_jiterpreter ( - frame: NativePointer, method: MonoMethod, ip: MintOpcodePtr, + frame: NativePointer, method: MonoMethod, ip: MintOpcodePtr, index: number, startOfBody: MintOpcodePtr, sizeOfBody: MintOpcodePtr ) : number { mono_assert(ip, "expected instruction pointer"); @@ -779,40 +783,30 @@ export function mono_interp_tier_prepare_jiterpreter ( let info = traceInfo[ip]; if (!info) - traceInfo[ip] = info = new TraceInfo(ip); - else - info.hitCount++; - - const minHitCount = mostRecentOptions.minimumTraceHitCount; - - if (info.hitCount < minHitCount) - return JITERPRETER_TRAINING; - else if (info.hitCount === minHitCount) { - counters.traceCandidates++; - let methodFullName: string | undefined; - if (trapTraceErrors || mostRecentOptions.estimateHeat || (instrumentedMethodNames.length > 0) || useFullNames) { - const pMethodName = cwraps.mono_wasm_method_get_full_name(method); - methodFullName = Module.UTF8ToString(pMethodName); - Module._free(pMethodName); - } - const methodName = Module.UTF8ToString(cwraps.mono_wasm_method_get_name(method)); - info.name = methodFullName || methodName; - const fnPtr = generate_wasm( - frame, methodName, ip, startOfBody, sizeOfBody, methodFullName - ); - if (fnPtr) { - counters.tracesCompiled++; - // FIXME: These could theoretically be 0 or 1, in which case the trace - // will never get invoked. Oh well - info.fnPtr = fnPtr; - return fnPtr; - } else { - return mostRecentOptions.estimateHeat ? JITERPRETER_TRAINING : JITERPRETER_NOT_JITTED; - } - } else if (!mostRecentOptions.estimateHeat) - throw new Error("prepare should not be invoked at this point"); - else - return JITERPRETER_TRAINING; + traceInfo[ip] = info = new TraceInfo(ip, index); + + counters.traceCandidates++; + let methodFullName: string | undefined; + if (trapTraceErrors || mostRecentOptions.estimateHeat || (instrumentedMethodNames.length > 0) || useFullNames) { + const pMethodName = cwraps.mono_wasm_method_get_full_name(method); + methodFullName = Module.UTF8ToString(pMethodName); + Module._free(pMethodName); + } + const methodName = Module.UTF8ToString(cwraps.mono_wasm_method_get_name(method)); + info.name = methodFullName || methodName; + const fnPtr = generate_wasm( + frame, methodName, ip, startOfBody, sizeOfBody, methodFullName + ); + + if (fnPtr) { + counters.tracesCompiled++; + // FIXME: These could theoretically be 0 or 1, in which case the trace + // will never get invoked. Oh well + info.fnPtr = fnPtr; + return fnPtr; + } else { + return mostRecentOptions.estimateHeat ? JITERPRETER_TRAINING : JITERPRETER_NOT_JITTED; + } } export function jiterpreter_dump_stats (b?: boolean, concise?: boolean) { -- 2.7.4