[jiterp] Inline some performance-sensitive operations into traces (#82469)
authorKatelyn Gadd <kg@luminance.org>
Wed, 22 Feb 2023 12:48:07 +0000 (04:48 -0800)
committerGitHub <noreply@github.com>
Wed, 22 Feb 2023 12:48:07 +0000 (04:48 -0800)
Inline getchr, strlen, and getitem_span into traces
Use offsetof to compute more offsets used by the jiterpreter instead of hand computing them

src/mono/mono/mini/interp/jiterpreter.c
src/mono/wasm/runtime/cwraps.ts
src/mono/wasm/runtime/jiterpreter-interp-entry.ts
src/mono/wasm/runtime/jiterpreter-support.ts
src/mono/wasm/runtime/jiterpreter-trace-generator.ts
src/mono/wasm/runtime/jiterpreter.ts

index 33f1d6b..abb004c 100644 (file)
@@ -196,29 +196,6 @@ mono_jiterp_value_copy (void *dest, void *src, MonoClass *klass) {
 }
 
 EMSCRIPTEN_KEEPALIVE int
-mono_jiterp_strlen_ref (MonoString **ppString, int *result) {
-       MonoString *pString = *ppString;
-       if (!pString)
-               return 0;
-
-       *result = mono_string_length_internal(pString);
-       return 1;
-}
-
-EMSCRIPTEN_KEEPALIVE int
-mono_jiterp_getchr_ref (MonoString **ppString, int *pIndex, int *result) {
-       int index = *pIndex;
-       MonoString *pString = *ppString;
-       if (!pString)
-               return 0;
-       if ((index < 0) || (index >= mono_string_length_internal(pString)))
-               return 0;
-
-       *result = mono_string_chars_internal(pString)[index];
-       return 1;
-}
-
-EMSCRIPTEN_KEEPALIVE int
 mono_jiterp_try_newobj_inlined (MonoObject **destination, MonoVTable *vtable) {
        *destination = 0;
        if (!vtable->initialized)
@@ -232,22 +209,6 @@ mono_jiterp_try_newobj_inlined (MonoObject **destination, MonoVTable *vtable) {
 }
 
 EMSCRIPTEN_KEEPALIVE int
-mono_jiterp_getitem_span (
-       void **destination, MonoSpanOfVoid *span, int index, size_t element_size
-) {
-       if (!span)
-               return 0;
-
-       const gint32 length = span->_length;
-       if ((index < 0) || (index >= length))
-               return 0;
-
-       unsigned char * pointer = (unsigned char *)span->_reference;
-       *destination = pointer + (index * element_size);
-       return 1;
-}
-
-EMSCRIPTEN_KEEPALIVE int
 mono_jiterp_gettype_ref (
        MonoObject **destination, MonoObject **source
 ) {
@@ -511,15 +472,41 @@ mono_jiterp_relop_fp (double lhs, double rhs, int opcode) {
 
 #undef JITERP_RELOP
 
-// we use these helpers at JIT time to figure out where to do memory loads and stores
-EMSCRIPTEN_KEEPALIVE size_t
-mono_jiterp_get_offset_of_vtable_initialized_flag () {
-       return offsetof(MonoVTable, initialized);
-}
+#define JITERP_MEMBER_VT_INITIALIZED 0
+#define JITERP_MEMBER_ARRAY_DATA 1
+#define JITERP_MEMBER_STRING_LENGTH 2
+#define JITERP_MEMBER_STRING_DATA 3
+#define JITERP_MEMBER_IMETHOD 4
+#define JITERP_MEMBER_DATA_ITEMS 5
+#define JITERP_MEMBER_RMETHOD 6
+#define JITERP_MEMBER_SPAN_LENGTH 7
+#define JITERP_MEMBER_SPAN_DATA 8
 
+// we use these helpers at JIT time to figure out where to do memory loads and stores
 EMSCRIPTEN_KEEPALIVE size_t
-mono_jiterp_get_offset_of_array_data () {
-       return MONO_STRUCT_OFFSET (MonoArray, vector);
+mono_jiterp_get_member_offset (int member) {
+       switch (member) {
+               case JITERP_MEMBER_VT_INITIALIZED:
+                       return MONO_STRUCT_OFFSET (MonoVTable, initialized);
+               case JITERP_MEMBER_ARRAY_DATA:
+                       return MONO_STRUCT_OFFSET (MonoArray, vector);
+               case JITERP_MEMBER_STRING_LENGTH:
+                       return MONO_STRUCT_OFFSET (MonoString, length);
+               case JITERP_MEMBER_STRING_DATA:
+                       return MONO_STRUCT_OFFSET (MonoString, chars);
+               case JITERP_MEMBER_IMETHOD:
+                       return offsetof (InterpFrame, imethod);
+               case JITERP_MEMBER_DATA_ITEMS:
+                       return offsetof (InterpMethod, data_items);
+               case JITERP_MEMBER_RMETHOD:
+                       return offsetof (JiterpEntryDataHeader, rmethod);
+               case JITERP_MEMBER_SPAN_LENGTH:
+                       return offsetof (MonoSpanOfVoid, _length);
+               case JITERP_MEMBER_SPAN_DATA:
+                       return offsetof (MonoSpanOfVoid, _reference);
+               default:
+                       g_assert_not_reached();
+       }
 }
 
 EMSCRIPTEN_KEEPALIVE size_t
@@ -713,6 +700,7 @@ jiterp_should_abort_trace (InterpInst *ins, gboolean *inside_branch_block)
                case MINT_STRLEN:
                case MINT_GETCHR:
                case MINT_GETITEM_SPAN:
+               case MINT_GETITEM_LOCALSPAN:
                case MINT_INTRINS_SPAN_CTOR:
                case MINT_INTRINS_UNSAFE_BYTE_OFFSET:
                case MINT_INTRINS_GET_TYPE:
index c6096e4..7842d98 100644 (file)
@@ -102,8 +102,7 @@ const fn_signatures: SigLine[] = [
     // jiterpreter
     [true, "mono_jiterp_get_trace_bailout_count", "number", ["number"]],
     [true, "mono_jiterp_value_copy", "void", ["number", "number", "number"]],
-    [true, "mono_jiterp_get_offset_of_vtable_initialized_flag", "number", []],
-    [true, "mono_jiterp_get_offset_of_array_data", "number", []],
+    [true, "mono_jiterp_get_member_offset", "number", ["number"]],
     [false, "mono_jiterp_encode_leb52", "number", ["number", "number", "number"]],
     [false, "mono_jiterp_encode_leb64_ref", "number", ["number", "number", "number"]],
     [false, "mono_jiterp_encode_leb_signed_boundary", "number", ["number", "number", "number"]],
@@ -243,8 +242,7 @@ export interface t_Cwraps {
 
     mono_jiterp_get_trace_bailout_count(reason: number): number;
     mono_jiterp_value_copy(destination: VoidPtr, source: VoidPtr, klass: MonoClass): void;
-    mono_jiterp_get_offset_of_vtable_initialized_flag(): number;
-    mono_jiterp_get_offset_of_array_data(): number;
+    mono_jiterp_get_member_offset(id: number): number;
     // Returns bytes written (or 0 if writing failed)
     mono_jiterp_encode_leb52(destination: VoidPtr, value: number, valueIsSigned: number): number;
     // Returns bytes written (or 0 if writing failed)
index 2dbf50a..bc649e6 100644 (file)
@@ -13,7 +13,8 @@ import {
     WasmValtype, WasmBuilder, addWasmFunctionPointer,
     _now, elapsedTimes, counters, getRawCwrap, importDef,
     getWasmFunctionTable, recordFailure, getOptions,
-    JiterpreterOptions, shortNameBase
+    JiterpreterOptions, shortNameBase,
+    getMemberOffset, JiterpMember
 } from "./jiterpreter-support";
 
 // Controls miscellaneous diagnostic output.
@@ -39,11 +40,10 @@ typedef struct {
 } JiterpEntryDataHeader;
 */
 
-const // offsetOfStack = 12,
+const
     maxInlineArgs = 16,
     // just allocate a bunch of extra space
-    sizeOfJiterpEntryData = 64,
-    offsetOfRMethod = 0;
+    sizeOfJiterpEntryData = 64;
 
 const maxJitQueueLength = 4,
     queueFlushDelayMs = 10;
@@ -541,7 +541,7 @@ function generate_wasm_body (
 
     // Store the cleaned up rmethod value into the data.rmethod field of the scratch buffer
     builder.appendU8(WasmOpcode.i32_store);
-    builder.appendMemarg(offsetOfRMethod, 0); // data.rmethod
+    builder.appendMemarg(getMemberOffset(JiterpMember.Rmethod), 0); // data.rmethod
 
     // prologue takes data->rmethod and initializes data->context, then returns a value for sp_args
     // prologue also performs thread attach
index 133c90d..3e91b3d 100644 (file)
@@ -936,6 +936,28 @@ export function recordFailure () : void {
     }
 }
 
+export const enum JiterpMember {
+    VtableInitialized = 0,
+    ArrayData = 1,
+    StringLength = 2,
+    StringData = 3,
+    Imethod = 4,
+    DataItems = 5,
+    Rmethod = 6,
+    SpanLength = 7,
+    SpanData = 8,
+}
+
+const memberOffsets : { [index: number] : number } = {};
+
+export function getMemberOffset (member: JiterpMember) {
+    const cached = memberOffsets[member];
+    if (cached === undefined)
+        return memberOffsets[member] = cwraps.mono_jiterp_get_member_offset(<any>member);
+    else
+        return cached;
+}
+
 export function getRawCwrap (name: string): Function {
     const result = (<any>Module)["asm"][name];
     if (typeof (result) !== "function")
index 2218808..11b0956 100644 (file)
@@ -14,10 +14,10 @@ import {
     MintOpcodePtr, WasmValtype, WasmBuilder,
     copyIntoScratchBuffer, append_memset_dest,
     append_memmove_dest_src, try_append_memset_fast,
-    try_append_memmove_fast, counters
+    try_append_memmove_fast, counters,
+    getMemberOffset, JiterpMember
 } from "./jiterpreter-support";
 import {
-    offsetOfDataItems, offsetOfImethod,
     sizeOfDataItem, maxModuleSize,
 
     disabledOpcodes, countCallTargets,
@@ -109,8 +109,8 @@ function getArgF64 (ip: MintOpcodePtr, indexPlusOne: number) {
 
 function get_imethod_data (frame: NativePointer, index: number) {
     // FIXME: Encoding this data directly into the trace will prevent trace reuse
-    const iMethod = getU32(<any>frame + offsetOfImethod);
-    const pData = getU32(iMethod + offsetOfDataItems);
+    const iMethod = getU32(<any>frame + getMemberOffset(JiterpMember.Imethod));
+    const pData = getU32(iMethod + getMemberOffset(JiterpMember.DataItems));
     const dataOffset = pData + (index * sizeOfDataItem);
     return getU32(dataOffset);
 }
@@ -247,7 +247,7 @@ export function generate_wasm_body (
             case MintOpcode.MINT_TIER_PATCHPOINT: {
                 // We need to make sure to notify the interpreter about tiering opcodes
                 //  so that tiering up will still happen
-                const iMethod = getU32(<any>frame + offsetOfImethod);
+                const iMethod = getU32(<any>frame + getMemberOffset(JiterpMember.Imethod));
                 builder.ptr_const(iMethod);
                 // increase_entry_count will return 1 if we can continue, otherwise
                 //  we need to bail out into the interpreter so it can perform tiering
@@ -371,56 +371,107 @@ export function generate_wasm_body (
                 break;
             }
 
-            case MintOpcode.MINT_STRLEN:
-                builder.block();
-                append_ldloca(builder, getArgU16(ip, 2), 0, true);
-                append_ldloca(builder, getArgU16(ip, 1), 4, true);
-                builder.callImport("strlen");
-                builder.appendU8(WasmOpcode.br_if);
-                builder.appendULeb(0);
-                append_bailout(builder, ip, BailoutReason.StringOperationFailed);
-                builder.endBlock();
+            case MintOpcode.MINT_STRLEN: {
+                builder.local("pLocals");
+                append_ldloc_cknull(builder, getArgU16(ip, 2), ip, true);
+                builder.appendU8(WasmOpcode.i32_load);
+                builder.appendMemarg(getMemberOffset(JiterpMember.StringLength), 2);
+                append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
                 break;
-            case MintOpcode.MINT_GETCHR:
+            }
+
+            case MintOpcode.MINT_GETCHR: {
                 builder.block();
-                append_ldloca(builder, getArgU16(ip, 2), 0, true);
-                append_ldloca(builder, getArgU16(ip, 3), 0, true);
-                append_ldloca(builder, getArgU16(ip, 1), 4, true);
-                builder.callImport("getchr");
+                // index
+                append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.i32_load);
+                // stash it, we'll be using it multiple times
+                builder.local("math_lhs32", WasmOpcode.tee_local);
+                // str
+                append_ldloc_cknull(builder, getArgU16(ip, 2), ip, true);
+                // get string length
+                builder.appendU8(WasmOpcode.i32_load);
+                builder.appendMemarg(getMemberOffset(JiterpMember.StringLength), 2);
+                // index < length
+                builder.appendU8(WasmOpcode.i32_lt_s);
+                // index >= 0
+                builder.local("math_lhs32");
+                builder.i32_const(0);
+                builder.appendU8(WasmOpcode.i32_ge_s);
+                // (index >= 0) && (index < length)
+                builder.appendU8(WasmOpcode.i32_and);
+                // If either of the index checks failed we will fall through to the bailout
                 builder.appendU8(WasmOpcode.br_if);
                 builder.appendULeb(0);
                 append_bailout(builder, ip, BailoutReason.StringOperationFailed);
                 builder.endBlock();
+
+                // The null check and range check both passed so we can load the character now
+                // Pre-load destination for the stloc at the end (we can't do this inside the block above)
+                builder.local("pLocals");
+                // (index * 2) + offsetof(MonoString, chars) + pString
+                builder.local("math_lhs32");
+                builder.i32_const(2);
+                builder.appendU8(WasmOpcode.i32_mul);
+                builder.local("cknull_ptr");
+                builder.appendU8(WasmOpcode.i32_add);
+                // Load char
+                builder.appendU8(WasmOpcode.i32_load16_u);
+                builder.appendMemarg(getMemberOffset(JiterpMember.StringData), 1);
+                // Store into result
+                append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
                 break;
+            }
 
-                /*
-                EMSCRIPTEN_KEEPALIVE int mono_jiterp_getitem_span (
-                    void **destination, MonoSpanOfVoid *span, int index, size_t element_size
-                ) {
-                */
             case MintOpcode.MINT_GETITEM_SPAN:
             case MintOpcode.MINT_GETITEM_LOCALSPAN: {
                 const elementSize = getArgI16(ip, 4);
-                // FIXME
                 builder.block();
-                // destination = &locals[1]
-                append_ldloca(builder, getArgU16(ip, 1), elementSize, true);
+                // Load index and stash it in lhs32
+                append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.i32_load);
+                builder.local("math_lhs32", WasmOpcode.tee_local);
+
+                // Load address of the span structure
                 if (opcode === MintOpcode.MINT_GETITEM_SPAN) {
-                    // span = (MonoSpanOfVoid *)locals[2]
-                    append_ldloc(builder, getArgU16(ip, 2), WasmOpcode.i32_load);
+                    // span = *(MonoSpanOfVoid *)locals[2]
+                    append_ldloc_cknull(builder, getArgU16(ip, 2), ip, true);
                 } else {
                     // span = (MonoSpanOfVoid)locals[2]
                     append_ldloca(builder, getArgU16(ip, 2), 0);
+                    builder.local("cknull_ptr", WasmOpcode.tee_local);
+                    cknullOffset = -1;
                 }
-                // index = locals[3]
-                append_ldloc(builder, getArgU16(ip, 3), WasmOpcode.i32_load);
-                // element_size = ip[4]
-                builder.i32_const(elementSize);
-                builder.callImport("getspan");
+
+                // length = span->length
+                builder.appendU8(WasmOpcode.i32_load);
+                builder.appendMemarg(getMemberOffset(JiterpMember.SpanLength), 2);
+                // index < length
+                builder.appendU8(WasmOpcode.i32_lt_u);
+                // index >= 0
+                builder.local("math_lhs32");
+                builder.i32_const(0);
+                builder.appendU8(WasmOpcode.i32_ge_s);
+                // (index >= 0) && (index < length)
+                builder.appendU8(WasmOpcode.i32_and);
                 builder.appendU8(WasmOpcode.br_if);
                 builder.appendULeb(0);
                 append_bailout(builder, ip, BailoutReason.SpanOperationFailed);
                 builder.endBlock();
+
+                // We successfully null checked and bounds checked. Now compute
+                //  the address and store it to the destination
+                builder.local("pLocals");
+
+                // src = span->_reference + (index * element_size);
+                builder.local("cknull_ptr");
+                builder.appendU8(WasmOpcode.i32_load);
+                builder.appendMemarg(getMemberOffset(JiterpMember.SpanData), 2);
+
+                builder.local("math_lhs32");
+                builder.i32_const(elementSize);
+                builder.appendU8(WasmOpcode.i32_mul);
+                builder.appendU8(WasmOpcode.i32_add);
+
+                append_stloc_tail(builder, getArgU16(ip, 1), WasmOpcode.i32_store);
                 break;
             }
 
@@ -450,6 +501,7 @@ export function generate_wasm_body (
                 builder.appendMemarg(4, 0);
                 break;
             }
+
             case MintOpcode.MINT_LD_DELEGATE_METHOD_PTR: {
                 // FIXME: ldloca invalidation size
                 append_ldloca(builder, getArgU16(ip, 1), 8, true);
@@ -493,7 +545,7 @@ export function generate_wasm_body (
                 break;
             }
             case MintOpcode.MINT_INTRINS_MEMORYMARSHAL_GETARRAYDATAREF: {
-                const offset = cwraps.mono_jiterp_get_offset_of_array_data();
+                const offset = getMemberOffset(JiterpMember.ArrayData);
                 builder.local("pLocals");
                 append_ldloc_cknull(builder, getArgU16(ip, 2), ip, true);
                 builder.i32_const(offset);
@@ -1289,17 +1341,6 @@ function emit_mov (builder: WasmBuilder, ip: MintOpcodePtr, opcode: MintOpcode)
     return true;
 }
 
-let _offset_of_vtable_initialized_flag = 0;
-
-function get_offset_of_vtable_initialized_flag () {
-    if (!_offset_of_vtable_initialized_flag) {
-        // Manually calculating this by reading the code did not yield the correct result,
-        //  so we ask the compiler (at runtime)
-        _offset_of_vtable_initialized_flag = cwraps.mono_jiterp_get_offset_of_vtable_initialized_flag();
-    }
-    return _offset_of_vtable_initialized_flag;
-}
-
 function append_vtable_initialize (builder: WasmBuilder, pVtable: NativePointer, ip: MintOpcodePtr) {
     // TODO: Actually initialize the vtable instead of just checking and bailing out?
     builder.block();
@@ -1308,7 +1349,7 @@ function append_vtable_initialize (builder: WasmBuilder, pVtable: NativePointer,
     //  in the trace as a constant visible in the wasm
     builder.ptr_const(<any>pVtable);
     builder.appendU8(WasmOpcode.i32_load8_u);
-    builder.appendMemarg(get_offset_of_vtable_initialized_flag(), 0);
+    builder.appendMemarg(getMemberOffset(JiterpMember.VtableInitialized), 0);
     builder.appendU8(WasmOpcode.br_if);
     builder.appendULeb(0);
     append_bailout(builder, ip, BailoutReason.VtableNotInitialized);
index c6d02f5..d6bdf39 100644 (file)
@@ -114,10 +114,10 @@ export let countLimitedPrintCounter = 10;
 export const abortCounts : { [key: string] : number } = {};
 export const traceInfo : { [key: string] : TraceInfo } = {};
 
-export const // offsetOfStack = 12,
-    offsetOfImethod = 4,
-    offsetOfDataItems = 20,
+export const
     sizeOfDataItem = 4,
+    sizeOfObjectHeader = 8,
+
     // HACK: Typically we generate ~12 bytes of extra gunk after the function body so we are
     //  subtracting 20 from the maximum size to make sure we don't produce too much
     // Also subtract some more size since the wasm we generate for one opcode could be big
@@ -244,9 +244,6 @@ function getTraceImports () {
         importDef("array_address", getRawCwrap("mono_jiterp_array_get_element_address_with_size_ref")),
         importDef("entry", getRawCwrap("mono_jiterp_increase_entry_count")),
         importDef("value_copy", getRawCwrap("mono_jiterp_value_copy")),
-        importDef("strlen", getRawCwrap("mono_jiterp_strlen_ref")),
-        importDef("getchr", getRawCwrap("mono_jiterp_getchr_ref")),
-        importDef("getspan", getRawCwrap("mono_jiterp_getitem_span")),
         importDef("gettype", getRawCwrap("mono_jiterp_gettype_ref")),
         importDef("cast", getRawCwrap("mono_jiterp_cast_ref")),
         importDef("try_unbox", getRawCwrap("mono_jiterp_try_unbox_ref")),