[wasm] Jiterpreter monitoring phase take 2 (#83489)
authorKatelyn Gadd <kg@luminance.org>
Tue, 21 Mar 2023 20:47:40 +0000 (13:47 -0700)
committerGitHub <noreply@github.com>
Tue, 21 Mar 2023 20:47:40 +0000 (13:47 -0700)
* Add a monitoring phase to jiterpreter traces, that monitors an approximate number of opcodes executed before specific types of bailouts.
If a trace bails out frequently without executing enough opcodes, it will be rejected and turned into a nop to improve performance.
* Fix assert when running out of TraceInfo space

src/mono/mono/mini/interp/interp.c
src/mono/mono/mini/interp/jiterpreter.c
src/mono/mono/mini/interp/jiterpreter.h
src/mono/mono/mini/interp/mintops.def
src/mono/mono/utils/options-def.h
src/mono/wasm/runtime/cwraps.ts
src/mono/wasm/runtime/jiterpreter-support.ts
src/mono/wasm/runtime/jiterpreter-trace-generator.ts
src/mono/wasm/runtime/jiterpreter.ts

index 45b9243..a8d6df7 100644 (file)
@@ -3798,6 +3798,11 @@ max_d (double lhs, double rhs)
                return fmax (lhs, rhs);
 }
 
+#if HOST_BROWSER
+// Dummy call info used outside of monitoring phase. We don't care what's in it
+static JiterpreterCallInfo jiterpreter_call_info = { 0 };
+#endif
+
 /*
  * If CLAUSE_ARGS is non-null, start executing from it.
  * The ERROR argument is used to avoid declaring an error object for every interp frame, its not used
@@ -7782,15 +7787,11 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
                                                 * (note that right now threading doesn't work, but it's worth being correct
                                                 *  here so that implementing thread support will be easier later.)
                                                 */
-                                               *mutable_ip = MINT_TIER_NOP_JITERPRETER;
-                                               mono_memory_barrier ();
-                                               *(volatile JiterpreterThunk*)(ip + 1) = prepare_result;
-                                               mono_memory_barrier ();
-                                               *mutable_ip = MINT_TIER_ENTER_JITERPRETER;
+                                               *mutable_ip = MINT_TIER_MONITOR_JITERPRETER;
                                                // now execute the trace
                                                // this isn't important for performance, but it makes it easier to use the
                                                //  jiterpreter early in automated tests where code only runs once
-                                               offset = prepare_result(frame, locals);
+                                               offset = prepare_result(frame, locals, &jiterpreter_call_info);
                                                ip = (guint16*) (((guint8*)ip) + offset);
                                                break;
                                }
@@ -7801,9 +7802,18 @@ MINT_IN_CASE(MINT_BRTRUE_I8_SP) ZEROP_SP(gint64, !=); MINT_IN_BREAK;
                        MINT_IN_BREAK;
                }
 
+               MINT_IN_CASE(MINT_TIER_MONITOR_JITERPRETER) {
+                       // The trace is in monitoring mode, where we track how far it actually goes
+                       //  each time it is executed for a while. After N more hits, we either
+                       //  turn it into an ENTER or a NOP depending on how well it is working
+                       ptrdiff_t offset = mono_jiterp_monitor_trace (ip, frame, locals);
+                       ip = (guint16*) (((guint8*)ip) + offset);
+                       MINT_IN_BREAK;
+               }
+
                MINT_IN_CASE(MINT_TIER_ENTER_JITERPRETER) {
                        JiterpreterThunk thunk = (void*)READ32(ip + 1);
-                       ptrdiff_t offset = thunk(frame, locals);
+                       ptrdiff_t offset = thunk(frame, locals, &jiterpreter_call_info);
                        ip = (guint16*) (((guint8*)ip) + offset);
                        MINT_IN_BREAK;
                }
index 5dcc92e..31ba66e 100644 (file)
@@ -877,13 +877,17 @@ typedef struct {
        // 64-bits because it can get very high if estimate heat is turned on
        gint64 hit_count;
        JiterpreterThunk thunk;
+       int penalty_total;
 } TraceInfo;
 
-#define MAX_TRACE_SEGMENTS 256
+// The maximum number of trace segments used to store TraceInfo. This limits
+//  the maximum total number of traces to MAX_TRACE_SEGMENTS * TRACE_SEGMENT_SIZE
+#define MAX_TRACE_SEGMENTS 1024
 #define TRACE_SEGMENT_SIZE 1024
 
 static volatile gint32 trace_count = 0;
 static TraceInfo *trace_segments[MAX_TRACE_SEGMENTS] = { NULL };
+static gint32 traces_rejected = 0;
 
 static TraceInfo *
 trace_info_allocate_segment (gint32 index) {
@@ -917,7 +921,14 @@ trace_info_get (gint32 index) {
 
 static gint32
 trace_info_alloc () {
-       gint32 index = trace_count++;
+       gint32 index = trace_count++,
+               limit = (MAX_TRACE_SEGMENTS * TRACE_SEGMENT_SIZE);
+       // Make sure we're not out of space in the trace info table.
+       if (index == limit)
+               g_print ("MONO_WASM: Reached maximum number of jiterpreter trace entry points (%d).\n", limit);
+       if (index >= limit)
+               return -1;
+
        TraceInfo *info = trace_info_get (index);
        info->hit_count = 0;
        info->thunk = NULL;
@@ -984,20 +995,24 @@ jiterp_insert_entry_points (void *_imethod, void *_td)
 
                if (enabled && should_generate) {
                        gint32 trace_index = trace_info_alloc ();
-
-                       td->cbb = bb;
-                       imethod->contains_traces = TRUE;
-                       InterpInst *ins = mono_jiterp_insert_ins (td, NULL, MINT_TIER_PREPARE_JITERPRETER);
-                       memcpy(ins->data, &trace_index, sizeof (trace_index));
-
-                       // Clear the instruction counter
-                       instruction_count = 0;
-
-                       // Note that we only clear enter_at_next here, after generating a trace.
-                       // This means that the flag will stay set intentionally if we keep failing
-                       //  to generate traces, perhaps due to a string of small basic blocks
-                       //  or multiple call instructions.
-                       enter_at_next = bb->contains_call_instruction;
+                       if (trace_index < 0) {
+                               // We're out of space in the TraceInfo table.
+                               return;
+                       } else {
+                               td->cbb = bb;
+                               imethod->contains_traces = TRUE;
+                               InterpInst *ins = mono_jiterp_insert_ins (td, NULL, MINT_TIER_PREPARE_JITERPRETER);
+                               memcpy(ins->data, &trace_index, sizeof (trace_index));
+
+                               // Clear the instruction counter
+                               instruction_count = 0;
+
+                               // Note that we only clear enter_at_next here, after generating a trace.
+                               // This means that the flag will stay set intentionally if we keep failing
+                               //  to generate traces, perhaps due to a string of small basic blocks
+                               //  or multiple call instructions.
+                               enter_at_next = bb->contains_call_instruction;
+                       }
                } else if (is_backwards_branch && enabled && !should_generate) {
                        // We failed to start a trace at a backwards branch target, but that might just mean
                        //  that the loop body starts with one or two unsupported opcodes, so it may be
@@ -1233,7 +1248,7 @@ mono_jiterp_stelem_ref (
 
 EMSCRIPTEN_KEEPALIVE int
 mono_jiterp_trace_transfer (
-       int displacement, JiterpreterThunk trace, void *frame, void *pLocals
+       int displacement, JiterpreterThunk trace, void *frame, void *pLocals, JiterpreterCallInfo *cinfo
 ) {
        // This indicates that we lost a race condition, so there's no trace to call. Just bail out.
        // FIXME: Detect this at trace generation time and spin until the trace is available
@@ -1245,7 +1260,7 @@ mono_jiterp_trace_transfer (
        //  safepoint was already performed by the trace.
        int relative_displacement = 0;
        while (relative_displacement == 0)
-               relative_displacement = trace(frame, pLocals);
+               relative_displacement = trace(frame, pLocals, cinfo);
 
        // We got a relative displacement other than 0, so the trace bailed out somewhere or
        //  branched to another branch target. Time to return (and our caller will return too.)
@@ -1326,6 +1341,80 @@ mono_jiterp_write_number_unaligned (void *dest, double value, int mode) {
        }
 }
 
+#define TRACE_PENALTY_LIMIT 200
+#define TRACE_MONITORING_DETAILED FALSE
+
+ptrdiff_t
+mono_jiterp_monitor_trace (const guint16 *ip, void *_frame, void *locals)
+{
+       gint32 index = READ32 (ip + 1);
+       TraceInfo *info = trace_info_get (index);
+       g_assert (info);
+
+       JiterpreterThunk thunk = info->thunk;
+       // FIXME: This shouldn't be possible
+       g_assert (((guint32)(void *)thunk) > JITERPRETER_NOT_JITTED);
+
+       JiterpreterCallInfo cinfo;
+       cinfo.backward_branch_taken = 0;
+       cinfo.bailout_opcode_count = -1;
+
+       InterpFrame *frame = _frame;
+
+       ptrdiff_t result = thunk (frame, locals, &cinfo);
+       // If a backward branch was taken, we can treat the trace as if it successfully
+       //  executed at least one time. We don't know how long it actually ran, but back
+       //  branches are almost always going to be loops. It's fine if a bailout happens
+       //  after multiple loop iterations.
+       if (
+               (cinfo.bailout_opcode_count >= 0) &&
+               !cinfo.backward_branch_taken &&
+               (cinfo.bailout_opcode_count < mono_opt_jiterpreter_trace_monitoring_long_distance)
+       ) {
+               // Start with a penalty of 2 and lerp all the way down to 0
+               float scaled = (float)(cinfo.bailout_opcode_count - mono_opt_jiterpreter_trace_monitoring_short_distance)
+                       / (mono_opt_jiterpreter_trace_monitoring_long_distance - mono_opt_jiterpreter_trace_monitoring_short_distance);
+               int penalty = MIN ((int)((1.0f - scaled) * TRACE_PENALTY_LIMIT), TRACE_PENALTY_LIMIT);
+               info->penalty_total += penalty;
+
+               // g_print ("trace #%d @%d '%s' bailout recorded at opcode #%d, penalty=%d\n", index, ip, frame->imethod->method->name, cinfo.bailout_opcode_count, penalty);
+       }
+
+       gint64 hit_count = info->hit_count++ - mono_opt_jiterpreter_minimum_trace_hit_count;
+       if (hit_count == mono_opt_jiterpreter_trace_monitoring_period) {
+               // Prepare to enable the trace
+               volatile guint16 *mutable_ip = (volatile guint16*)ip;
+               *mutable_ip = MINT_TIER_NOP_JITERPRETER;
+
+               mono_memory_barrier ();
+               float average_penalty = info->penalty_total / (float)hit_count / 100.0f,
+                       threshold = (mono_opt_jiterpreter_trace_monitoring_max_average_penalty / 100.0f);
+
+               if (average_penalty <= threshold) {
+                       *(volatile JiterpreterThunk*)(ip + 1) = thunk;
+                       mono_memory_barrier ();
+                       *mutable_ip = MINT_TIER_ENTER_JITERPRETER;
+                       if (mono_opt_jiterpreter_stats_enabled && TRACE_MONITORING_DETAILED)
+                               g_print ("trace #%d @%d '%s' accepted; average_penalty %f <= %f\n", index, ip, frame->imethod->method->name, average_penalty, threshold);
+               } else {
+                       traces_rejected++;
+                       if (mono_opt_jiterpreter_stats_enabled) {
+                               char * full_name = mono_method_get_full_name (frame->imethod->method);
+                               g_print ("trace #%d @%d '%s' rejected; average_penalty %f > %f\n", index, ip, full_name, average_penalty, threshold);
+                               g_free (full_name);
+                       }
+               }
+       }
+
+       return result;
+}
+
+EMSCRIPTEN_KEEPALIVE gint32
+mono_jiterp_get_rejected_trace_count ()
+{
+       return traces_rejected;
+}
+
 // HACK: fix C4206
 EMSCRIPTEN_KEEPALIVE
 #endif // HOST_BROWSER
index db198f4..896387b 100644 (file)
 // NOT_JITTED indicates that the trace was not jitted and it should be turned into a NOP
 #define JITERPRETER_NOT_JITTED 1
 
-typedef const ptrdiff_t (*JiterpreterThunk) (void *frame, void *pLocals);
+typedef struct {
+       gint32 backward_branch_taken;
+       gint32 bailout_opcode_count;
+} JiterpreterCallInfo;
+
+typedef const ptrdiff_t (*JiterpreterThunk) (void *frame, void *pLocals, JiterpreterCallInfo *cinfo);
 typedef void (*WasmJitCallThunk) (void *ret_sp, void *sp, void *ftndesc, gboolean *thrown);
 typedef void (*WasmDoJitCall) (gpointer cb, gpointer arg, gboolean *out_thrown);
 
@@ -139,6 +144,9 @@ mono_jiterp_imethod_to_ftnptr (InterpMethod *imethod);
 void
 mono_jiterp_enum_hasflag (MonoClass *klass, gint32 *dest, stackval *sp1, stackval *sp2);
 
+ptrdiff_t
+mono_jiterp_monitor_trace (const guint16 *ip, void *frame, void *locals);
+
 #endif // __MONO_MINI_INTERPRETER_INTERNALS_H__
 
 extern WasmDoJitCall jiterpreter_do_jit_call;
index a0418ad..28b87e2 100644 (file)
@@ -840,6 +840,7 @@ OPDEF(MINT_METADATA_UPDATE_LDFLDA, "metadata_update.ldflda", 5, 1, 1, MintOpTwoS
 OPDEF(MINT_TIER_PREPARE_JITERPRETER, "tier_prepare_jiterpreter", 3, 0, 0, MintOpInt)
 OPDEF(MINT_TIER_NOP_JITERPRETER, "tier_nop_jiterpreter", 3, 0, 0, MintOpInt)
 OPDEF(MINT_TIER_ENTER_JITERPRETER, "tier_enter_jiterpreter", 3, 0, 0, MintOpInt)
+OPDEF(MINT_TIER_MONITOR_JITERPRETER, "tier_monitor_jiterpreter", 3, 0, 0, MintOpInt)
 #endif // HOST_BROWSER
 
 IROPDEF(MINT_NOP, "nop", 1, 0, 0, MintOpNoArgs)
index 27914b7..9637e05 100644 (file)
@@ -119,6 +119,14 @@ DEFINE_INT(jiterpreter_minimum_trace_length, "jiterpreter-minimum-trace-length",
 DEFINE_INT(jiterpreter_minimum_distance_between_traces, "jiterpreter-minimum-distance-between-traces", 4, "Don't insert entry points closer together than this")
 // once a trace entry point is inserted, we only actually JIT code for it once it's been hit this many times
 DEFINE_INT(jiterpreter_minimum_trace_hit_count, "jiterpreter-minimum-trace-hit-count", 5000, "JIT trace entry points once they are hit this many times")
+// trace prepares turn into a monitor opcode and stay one this long before being converted to enter or nop
+DEFINE_INT(jiterpreter_trace_monitoring_period, "jiterpreter-trace-monitoring-period", 1000, "Monitor jitted traces for this many calls to determine whether to keep them")
+// traces that process less than this many opcodes have a high exit penalty, more than this have a low exit penalty
+DEFINE_INT(jiterpreter_trace_monitoring_short_distance, "jiterpreter-trace-monitoring-short-distance", 4, "Traces that exit after processing this many opcodes have a reduced exit penalty")
+// traces that process this many opcodes have no exit penalty
+DEFINE_INT(jiterpreter_trace_monitoring_long_distance, "jiterpreter-trace-monitoring-long-distance", 10, "Traces that exit after processing this many opcodes have no exit penalty")
+// the average penalty value for a trace is compared against this threshold / 100 to decide whether to discard it
+DEFINE_INT(jiterpreter_trace_monitoring_max_average_penalty, "jiterpreter-trace-monitoring-max-average-penalty", 75, "If the average penalty value for a trace is above this value it will be rejected")
 // After a do_jit_call call site is hit this many times, we will queue it to be jitted
 DEFINE_INT(jiterpreter_jit_call_trampoline_hit_count, "jiterpreter-jit-call-hit-count", 1000, "Queue specialized do_jit_call trampoline for JIT after this many hits")
 // After a do_jit_call call site is hit this many times without being jitted, we will flush the JIT queue
index 0549ef7..ab1cff3 100644 (file)
@@ -121,6 +121,7 @@ const fn_signatures: SigLine[] = [
     [true, "mono_jiterp_debug_count", "number", []],
     [true, "mono_jiterp_get_trace_hit_count", "number", ["number"]],
     [true, "mono_jiterp_get_polling_required_address", "number", []],
+    [true, "mono_jiterp_get_rejected_trace_count", "number", []],
     ...legacy_interop_cwraps
 ];
 
@@ -238,6 +239,7 @@ export interface t_Cwraps {
     mono_jiterp_get_trace_hit_count(traceIndex: number): number;
     mono_jiterp_get_polling_required_address(): Int32Ptr;
     mono_jiterp_write_number_unaligned(destination: VoidPtr, value: number, mode: number): void;
+    mono_jiterp_get_rejected_trace_count(): number;
 }
 
 const wrapped_c_functions: t_Cwraps = <any>{};
index c2d5099..d9ee083 100644 (file)
@@ -1182,6 +1182,17 @@ class Cfg {
                             const disp = this.dispatchTable.get(segment.target)!;
                             if (this.trace)
                                 console.log(`backward br from ${(<any>segment.from).toString(16)} to ${(<any>segment.target).toString(16)}: disp=${disp}`);
+
+                            // set the backward branch taken flag in the cinfo so that the monitoring phase
+                            //  knows we took a backward branch. this is unfortunate but unavoidable overhead
+                            // we just make it a flag instead of an increment to reduce the cost
+                            this.builder.local("cinfo");
+                            // TODO: Store the offset in opcodes instead? Probably not useful information
+                            this.builder.i32_const(1);
+                            this.builder.appendU8(WasmOpcode.i32_store);
+                            this.builder.appendMemarg(0, 0); // JiterpreterCallInfo.backward_branch_taken
+
+                            // set the dispatch index for the br_table
                             this.builder.i32_const(disp);
                             this.builder.local("disp", WasmOpcode.set_local);
                         } else {
@@ -1276,6 +1287,24 @@ export function append_bailout (builder: WasmBuilder, ip: MintOpcodePtr, reason:
     builder.appendU8(WasmOpcode.return_);
 }
 
+// generate a bailout that is recorded for the monitoring phase as a possible early exit.
+export function append_exit (builder: WasmBuilder, ip: MintOpcodePtr, opcodeCounter: number, reason: BailoutReason) {
+    if (opcodeCounter <= (builder.options.monitoringLongDistance + 1)) {
+        builder.local("cinfo");
+        builder.i32_const(opcodeCounter);
+        builder.appendU8(WasmOpcode.i32_store);
+        builder.appendMemarg(4, 0); // bailout_opcode_count
+    }
+
+    builder.ip_const(ip);
+    if (builder.options.countBailouts) {
+        builder.i32_const(builder.base);
+        builder.i32_const(reason);
+        builder.callImport("bailout");
+    }
+    builder.appendU8(WasmOpcode.return_);
+}
+
 export function copyIntoScratchBuffer (src: NativePointer, size: number) : NativePointer {
     if (!scratchBuffer)
         scratchBuffer = Module._malloc(64);
@@ -1551,6 +1580,10 @@ export type JiterpreterOptions = {
     eliminateNullChecks: boolean;
     minimumTraceLength: number;
     minimumTraceHitCount: number;
+    monitoringPeriod: number;
+    monitoringShortDistance: number;
+    monitoringLongDistance: number;
+    monitoringMaxAveragePenalty: number;
     jitCallHitCount: number;
     jitCallFlushThreshold: number;
     interpEntryHitCount: number;
@@ -1577,6 +1610,10 @@ const optionNames : { [jsName: string] : string } = {
     "directJitCalls": "jiterpreter-direct-jit-calls",
     "minimumTraceLength": "jiterpreter-minimum-trace-length",
     "minimumTraceHitCount": "jiterpreter-minimum-trace-hit-count",
+    "monitoringPeriod": "jiterpreter-trace-monitoring-period",
+    "monitoringShortDistance": "jiterpreter-trace-monitoring-short-distance",
+    "monitoringLongDistance": "jiterpreter-trace-monitoring-long-distance",
+    "monitoringMaxAveragePenalty": "jiterpreter-trace-monitoring-max-average-penalty",
     "jitCallHitCount": "jiterpreter-jit-call-hit-count",
     "jitCallFlushThreshold": "jiterpreter-jit-call-queue-flush-threshold",
     "interpEntryHitCount": "jiterpreter-interp-entry-hit-count",
index 5993107..e588433 100644 (file)
@@ -12,7 +12,7 @@ import { MintOpcode, OpcodeInfo } from "./mintops";
 import cwraps from "./cwraps";
 import {
     MintOpcodePtr, WasmValtype, WasmBuilder,
-    append_memset_dest, append_bailout,
+    append_memset_dest, append_bailout, append_exit,
     append_memmove_dest_src, try_append_memset_fast,
     try_append_memmove_fast, counters,
     getMemberOffset, JiterpMember, BailoutReason,
@@ -147,7 +147,9 @@ export function generate_wasm_body (
     const abort = <MintOpcodePtr><any>0;
     let isFirstInstruction = true, inBranchBlock = false,
         firstOpcodeInBlock = true;
-    let result = 0;
+    let result = 0,
+        prologueOpcodeCounter = 0,
+        conditionalOpcodeCounter = 0;
     const traceIp = ip;
 
     addressTakenLocals.clear();
@@ -196,7 +198,14 @@ export function generate_wasm_body (
             startBranchBlock = isBackBranchTarget || isForwardBranchTarget ||
                 // If a method contains backward branches, we also need to check eip at the first insn
                 //  because a backward branch might target a point in the middle of the trace
-                (isFirstInstruction && backwardBranchTable);
+                (isFirstInstruction && backwardBranchTable),
+            // We want to approximate the number of unconditionally executed instructions along with
+            //  the ones that were probably conditionally executed by the time we reached the exit point
+            // We don't know the exact path that would have taken us to a given point, but it's a reasonable
+            //  guess that methods dense with branches are more likely to take a complex path to reach
+            //  a given exit
+            exitOpcodeCounter = conditionalOpcodeCounter + prologueOpcodeCounter +
+                builder.branchTargets.size;
         let isLowValueOpcode = false,
             skipDregInvalidation = false;
 
@@ -220,6 +229,11 @@ export function generate_wasm_body (
             inBranchBlock = true;
             firstOpcodeInBlock = true;
             eraseInferredState();
+            // Monitoring wants an opcode count that is a measurement of how many opcodes
+            //  we definitely executed, so we want to ignore any opcodes that might
+            //  have been skipped due to forward branching. This gives us an approximation
+            //  of that by only counting how far we are from the most recent branch target
+            conditionalOpcodeCounter = 0;
         }
 
         isFirstInstruction = false;
@@ -387,6 +401,7 @@ export function generate_wasm_body (
                         builder.i32_const(targetTrace);
                         builder.local("frame");
                         builder.local("pLocals");
+                        builder.local("cinfo");
                         builder.callImport("transfer");
                         builder.appendU8(WasmOpcode.return_);
                         ip = abort;
@@ -394,6 +409,7 @@ export function generate_wasm_body (
                 }
                 break;
 
+            case MintOpcode.MINT_TIER_MONITOR_JITERPRETER:
             case MintOpcode.MINT_TIER_PREPARE_JITERPRETER:
             case MintOpcode.MINT_TIER_NOP_JITERPRETER: // FIXME: Should we abort for NOPs like ENTERs?
             case MintOpcode.MINT_NOP:
@@ -826,7 +842,7 @@ export function generate_wasm_body (
                     // We generate a bailout instead of aborting, because we don't want calls
                     //  to abort the entire trace if we have branch support enabled - the call
                     //  might be infrequently hit and as a result it's worth it to keep going.
-                    append_bailout(builder, ip, BailoutReason.Call);
+                    append_exit(builder, ip, exitOpcodeCounter, BailoutReason.Call);
                     isLowValueOpcode = true;
                 } else {
                     // We're in a block that executes unconditionally, and no branches have been
@@ -846,7 +862,7 @@ export function generate_wasm_body (
             case MintOpcode.MINT_CALL_DELEGATE:
                 // See comments for MINT_CALL
                 if (builder.branchTargets.size > 0) {
-                    append_bailout(builder, ip,
+                    append_exit(builder, ip, exitOpcodeCounter,
                         opcode == MintOpcode.MINT_CALL_DELEGATE
                             ? BailoutReason.CallDelegate
                             : BailoutReason.Call
@@ -880,6 +896,8 @@ export function generate_wasm_body (
                 // As above, only abort if this throw happens unconditionally.
                 // Otherwise, it may be in a branch that is unlikely to execute
                 if (builder.branchTargets.size > 0) {
+                    // Not an exit, because throws are by definition unlikely
+                    // We shouldn't make optimization decisions based on them.
                     append_bailout(builder, ip, BailoutReason.Throw);
                     isLowValueOpcode = true;
                 } else {
@@ -1078,6 +1096,9 @@ export function generate_wasm_body (
                     )
                 ) {
                     if ((builder.branchTargets.size > 0) || trapTraceErrors || builder.options.countBailouts) {
+                        // Not an exit, because returns are normal and we don't want to make them more expensive.
+                        // FIXME: Or do we want to record them? Early conditional returns might reduce the value of a trace,
+                        //  but the main problem is more likely to be calls early in traces. Worth testing later.
                         append_bailout(builder, ip, BailoutReason.Return);
                         isLowValueOpcode = true;
                     } else
@@ -1150,7 +1171,8 @@ export function generate_wasm_body (
                     //  to only perform a conditional bailout
                     // complex safepoint branches, just generate a bailout
                     if (builder.branchTargets.size > 0) {
-                        append_bailout(builder, ip, BailoutReason.ComplexBranch);
+                        // FIXME: Try to reduce the number of these
+                        append_exit(builder, ip, exitOpcodeCounter, BailoutReason.ComplexBranch);
                         isLowValueOpcode = true;
                     } else
                         ip = abort;
@@ -1195,8 +1217,13 @@ export function generate_wasm_body (
                 builder.traceBuf.push(stmtText);
             }
 
-            if (!isLowValueOpcode)
+            if (!isLowValueOpcode) {
+                if (inBranchBlock)
+                    conditionalOpcodeCounter++;
+                else
+                    prologueOpcodeCounter++;
                 result++;
+            }
 
             ip += <any>(info[1] * 2);
             if (<any>ip <= (<any>endOfBody))
index dc58c9d..852199a 100644 (file)
@@ -349,7 +349,8 @@ function initialize_builder (builder: WasmBuilder) {
     builder.defineType(
         "trace", {
             "frame": WasmValtype.i32,
-            "pLocals": WasmValtype.i32
+            "pLocals": WasmValtype.i32,
+            "cinfo": WasmValtype.i32,
         }, WasmValtype.i32, true
     );
     builder.defineType(
@@ -593,6 +594,7 @@ function initialize_builder (builder: WasmBuilder) {
             "trace": WasmValtype.i32,
             "frame": WasmValtype.i32,
             "locals": WasmValtype.i32,
+            "cinfo": WasmValtype.i32,
         }, WasmValtype.i32, true
     );
     builder.defineType(
@@ -969,7 +971,8 @@ export function jiterpreter_dump_stats (b?: boolean, concise?: boolean) {
 
     console.log(`// jitted ${counters.bytesGenerated} bytes; ${counters.tracesCompiled} traces (${counters.traceCandidates} candidates, ${(counters.tracesCompiled / counters.traceCandidates * 100).toFixed(1)}%); ${counters.jitCallsCompiled} jit_calls (${(counters.directJitCallsCompiled / counters.jitCallsCompiled * 100).toFixed(1)}% direct); ${counters.entryWrappersCompiled} interp_entries`);
     const backBranchHitRate = (counters.backBranchesEmitted / (counters.backBranchesEmitted + counters.backBranchesNotEmitted)) * 100;
-    console.log(`// time: ${elapsedTimes.generation | 0}ms generating, ${elapsedTimes.compilation | 0}ms compiling wasm. ${counters.nullChecksEliminated} null checks eliminated. ${counters.backBranchesEmitted} back-branches emitted (${counters.backBranchesNotEmitted} failed, ${backBranchHitRate.toFixed(1)}%)`);
+    const tracesRejected = cwraps.mono_jiterp_get_rejected_trace_count();
+    console.log(`// time: ${elapsedTimes.generation | 0}ms generating, ${elapsedTimes.compilation | 0}ms compiling wasm. ${counters.nullChecksEliminated} cknulls removed. ${counters.backBranchesEmitted} back-branches (${counters.backBranchesNotEmitted} failed, ${backBranchHitRate.toFixed(1)}%), ${tracesRejected} traces rejected`);
     if (concise)
         return;