[interp] Switch locals and execution stack order (#37927)
authormonojenkins <jo.shields+jenkins@xamarin.com>
Wed, 17 Jun 2020 18:20:43 +0000 (14:20 -0400)
committerGitHub <noreply@github.com>
Wed, 17 Jun 2020 18:20:43 +0000 (21:20 +0300)
The order before was execution stack, vtstack, locals. The order now is locals, vtstack, execution stack. The reason for this is that in the future we would like to access arguments as locals, so the args pushed by the caller will end up being locals in the new frame. Additionally, this enables us to further compact the stack use in the future since a new frame will be allocated immediately following the current stack top. This also can also make calls up to 10% faster.

Co-authored-by: BrzVlad <BrzVlad@users.noreply.github.com>
src/mono/mono/mini/interp/interp-internals.h
src/mono/mono/mini/interp/interp.c
src/mono/mono/mini/interp/transform.c

index 839222a..2c4fb40 100644 (file)
@@ -218,7 +218,7 @@ struct InterpFrame {
        InterpState state;
 };
 
-#define frame_locals(frame) (((guchar*)((frame)->stack)) + (frame)->imethod->stack_size + (frame)->imethod->vt_stack_size)
+#define frame_locals(frame) ((guchar*)(frame)->stack)
 
 typedef struct {
        /* Lets interpreter know it has to resume execution after EH */
index 7f9de72..4ffa96b 100644 (file)
@@ -3297,12 +3297,6 @@ g_warning_d (const char *format, int d)
        g_warning (format, d);
 }
 
-static void
-g_warning_ds (const char *format, int d, const char *s)
-{
-       g_warning (format, d, s);
-}
-
 #if !USE_COMPUTED_GOTO
 static void
 g_error_xsx (const char *format, int x1, const char *s, int x2)
@@ -3359,16 +3353,16 @@ method_entry (ThreadContext *context, InterpFrame *frame,
        sp = frame->state.sp; \
        vt_sp = frame->state.vt_sp; \
        finally_ips = frame->state.finally_ips; \
-       locals = (unsigned char *)frame->stack + frame->imethod->stack_size + frame->imethod->vt_stack_size; \
+       locals = (unsigned char *)frame->stack; \
        frame->state.ip = NULL; \
        } while (0)
 
 /* Initialize interpreter state for executing FRAME */
 #define INIT_INTERP_STATE(frame, _clause_args) do {     \
        ip = _clause_args ? ((FrameClauseArgs *)_clause_args)->start_with_ip : (frame)->imethod->code; \
-       sp = (frame)->stack; \
-       vt_sp = (unsigned char *) sp + (frame)->imethod->stack_size; \
-       locals = (unsigned char *) vt_sp + (frame)->imethod->vt_stack_size; \
+       locals = (unsigned char *)(frame)->stack; \
+       vt_sp = (unsigned char *) locals + (frame)->imethod->total_locals_size; \
+       sp = (stackval*)(vt_sp + (frame)->imethod->vt_stack_size); \
        finally_ips = NULL; \
        } while (0)
 
@@ -3443,9 +3437,14 @@ main_loop:
        while (1) {
                MintOpcode opcode;
 #ifdef ENABLE_CHECKED_BUILD
-               g_assert (sp >= frame->stack);
-               g_assert (vt_sp >= sp);
-               g_assert (locals >= vt_sp);
+               guchar *vt_start = (guchar*)frame->stack + frame->imethod->total_locals_size;
+               guchar *sp_start = vt_start + frame->imethod->vt_stack_size;
+               guchar *sp_end = sp_start + frame->imethod->stack_size;
+               g_assert (locals == (guchar*)frame->stack);
+               g_assert (vt_sp >= vt_start);
+               g_assert (vt_sp <= sp_start);
+               g_assert ((guchar*)sp >= sp_start);
+               g_assert ((guchar*)sp <= sp_end);
 #endif
                DUMP_INSTR();
                MINT_IN_SWITCH (*ip) {
@@ -3594,7 +3593,7 @@ main_loop:
                        MINT_IN_BREAK;
                }
                MINT_IN_CASE(MINT_JMP) {
-                       g_assert (sp == frame->stack);
+                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size + frame->imethod->vt_stack_size));
                        InterpMethod *new_method = (InterpMethod*)frame->imethod->data_items [ip [1]];
 
                        if (frame->imethod->prof_flags & MONO_PROFILER_CALL_INSTRUMENTATION_TAIL_CALL)
@@ -3619,13 +3618,13 @@ main_loop:
                        if (realloc_frame) {
                                alloc_stack_data (context, frame, frame->imethod->alloca_size);
                                memset (frame->stack, 0, frame->imethod->alloca_size);
-                               sp = frame->stack;
+                               locals = (guchar*)frame->stack;
                        }
-                       vt_sp = (unsigned char *) sp + frame->imethod->stack_size;
+                       vt_sp = locals + frame->imethod->total_locals_size;
 #if DEBUG_INTERP
                        vtalloc = vt_sp;
 #endif
-                       locals = vt_sp + frame->imethod->vt_stack_size;
+                       sp = (stackval*)(vt_sp + frame->imethod->vt_stack_size);
                        ip = frame->imethod->code;
                        MINT_IN_BREAK;
                }
@@ -4021,18 +4020,10 @@ call:
                                // FIXME This can only happen in a few wrappers. Add separate opcode for it
                                *frame->retval = *sp;
                        }
-#ifdef ENABLE_CHECKED_BUILD
-                       /* FIXME Fix these warnings and replace with assertions */
-                       if (sp > frame->stack)
-                               g_warning_d ("ret: more values on stack: %d", sp - frame->stack);
-#endif
+                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size + frame->imethod->vt_stack_size));
                        goto exit_frame;
                MINT_IN_CASE(MINT_RET_VOID)
-#ifdef ENABLE_CHECKED_BUILD
-                       /* FIXME Fix these warnings and replace with assertions */
-                       if (sp > frame->stack)
-                               g_warning_ds ("ret.void: more values on stack: %d %s", sp - frame->stack, mono_method_full_name (frame->imethod->method, TRUE));
-#endif
+                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size + frame->imethod->vt_stack_size));
                        goto exit_frame;
                MINT_IN_CASE(MINT_RET_VT) {
                        gpointer dest_vt;
@@ -4048,11 +4039,7 @@ call:
                                dest_vt = frame->retval->data.p;
                        }
                        memcpy (dest_vt, sp->data.p, i32);
-#ifdef ENABLE_CHECKED_BUILD
-                       /* FIXME Fix these warnings and replace with assertions */
-                       if (sp > frame->stack)
-                               g_warning_d ("ret.vt: more values on stack: %d", sp - frame->stack);
-#endif
+                       g_assert_checked (sp == (stackval*)(locals + frame->imethod->total_locals_size + frame->imethod->vt_stack_size));
                        goto exit_frame;
                }
 
@@ -6386,10 +6373,9 @@ call_newobj:
                        if (clause_args && clause_args->exec_frame == frame && clause_index == clause_args->exit_clause)
                                goto exit_clause;
 
-#if DEBUG_INTERP // This assert causes Linux/amd64/clang to use more stack.
-                       g_assert (sp >= frame->stack);
-#endif
-                       sp = frame->stack;
+                       // endfinally empties the stack
+                       vt_sp = (guchar*)frame->stack + frame->imethod->total_locals_size;
+                       sp = (stackval*)(vt_sp + frame->imethod->vt_stack_size);
 
                        if (finally_ips) {
                                ip = (const guint16*)finally_ips->data;
@@ -6410,8 +6396,9 @@ call_newobj:
                MINT_IN_CASE(MINT_LEAVE_CHECK)
                MINT_IN_CASE(MINT_LEAVE_S_CHECK) {
                        guint32 ip_offset = ip - frame->imethod->code;
-                       g_assert (sp >= frame->stack);
-                       sp = frame->stack; /* spec says stack should be empty at endfinally so it should be at the start too */
+                       // leave empties the stack
+                       vt_sp = (guchar*)frame->stack + frame->imethod->total_locals_size;
+                       sp = (stackval*)(vt_sp + frame->imethod->vt_stack_size);
 
                        int opcode = *ip;
                        gboolean const check = opcode == MINT_LEAVE_CHECK || opcode == MINT_LEAVE_S_CHECK;
@@ -6456,8 +6443,6 @@ call_newobj:
                        if (old_list != finally_ips && finally_ips) {
                                ip = (const guint16*)finally_ips->data;
                                finally_ips = g_slist_remove (finally_ips, ip);
-                               // we set vt_sp later here so we relieve stack pressure
-                               vt_sp = (unsigned char*)sp + frame->imethod->stack_size;
                                // goto main_loop instead of MINT_IN_DISPATCH helps the compiler and therefore conserves stack.
                                // This is a slow/rare path and conserving stack is preferred over its performance otherwise.
                                goto main_loop;
@@ -6976,11 +6961,11 @@ call_newobj:
                }
 
                MINT_IN_CASE(MINT_LOCALLOC) {
-                       if (sp != frame->stack + 1) /*FIX?*/
+                       stackval *sp_start = (stackval*)(locals + frame->imethod->total_locals_size + frame->imethod->vt_stack_size);
+                       if (sp != sp_start + 1) /*FIX?*/
                                goto abort_label;
 
                        int len = sp [-1].data.i;
-                       //sp [-1].data.p = alloca (len);
                        sp [-1].data.p = alloc_extra_stack_data (context, ALIGN_TO (len, 8));
 
                        if (frame->imethod->init_locals)
@@ -7256,8 +7241,9 @@ resume:
                        /* Set the current execution state to the resume state in context */
                        ip = context->handler_ip;
                        /* spec says stack should be empty at endfinally so it should be at the start too */
-                       sp = frame->stack;
-                       vt_sp = (guchar*)sp + frame->imethod->stack_size;
+                       locals = (guchar*)frame->stack;
+                       vt_sp = locals + frame->imethod->total_locals_size;
+                       sp = (stackval*)(vt_sp + frame->imethod->vt_stack_size);
                        g_assert (context->exc_gchandle);
                        sp->data.p = mono_gchandle_get_target_internal (context->exc_gchandle);
                        ++sp;
@@ -7298,7 +7284,7 @@ exit_clause:
                if (clause_args->base_frame) {
                        // We finished executing a filter. The execution stack of the base frame
                        // should remain unmodified, but we need to update the local space.
-                       char *locals_base = (char*)clause_args->base_frame->stack + frame->imethod->stack_size + frame->imethod->vt_stack_size;
+                       char *locals_base = (char*)clause_args->base_frame->stack;
 
                        memcpy (locals_base, locals, frame->imethod->locals_size);
                        pop_frame (context, frame);
index 751d8fa..32c37a7 100644 (file)
@@ -1963,7 +1963,7 @@ interp_icall_op_for_sig (MonoMethodSignature *sig)
 #define INLINE_LENGTH_LIMIT 20
 
 static gboolean
-interp_method_check_inlining (TransformData *td, MonoMethod *method)
+interp_method_check_inlining (TransformData *td, MonoMethod *method, MonoMethodSignature *csignature)
 {
        MonoMethodHeaderSummary header;
 
@@ -1971,6 +1971,9 @@ interp_method_check_inlining (TransformData *td, MonoMethod *method)
                /* Used to mark methods containing StackCrawlMark locals */
                return FALSE;
 
+       if (csignature->call_convention == MONO_CALL_VARARG)
+               return FALSE;
+
        if (!mono_method_get_header_summary (method, &header))
                return FALSE;
 
@@ -2129,7 +2132,7 @@ interp_constrained_box (TransformData *td, MonoDomain *domain, MonoClass *constr
                interp_add_ins (td, MINT_BOX_NULLABLE);
                td->last_ins->data [0] = get_data_item_index (td, constrained_class);
                td->last_ins->data [1] = csignature->param_count;
-               td->last_ins->data [2] = (td->sp - 1 - csignature->param_count)->type != STACK_TYPE_MP ? 0 : 1;
+               td->last_ins->data [2] = 1;
        } else {
                MonoVTable *vtable = mono_class_vtable_checked (domain, constrained_class, error);
                return_if_nok (error);
@@ -2138,7 +2141,7 @@ interp_constrained_box (TransformData *td, MonoDomain *domain, MonoClass *constr
                        interp_add_ins (td, MINT_BOX_VT);
                        td->last_ins->data [0] = get_data_item_index (td, vtable);
                        td->last_ins->data [1] = csignature->param_count;
-                       td->last_ins->data [2] = (td->sp - 1 - csignature->param_count)->type != STACK_TYPE_MP ? 0 : 1;
+                       td->last_ins->data [2] = 1;
                } else {
                        interp_add_ins (td, MINT_BOX);
                        td->last_ins->data [0] = get_data_item_index (td, vtable);
@@ -2381,7 +2384,7 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
        }
 
        g_assert (csignature->call_convention != MONO_CALL_FASTCALL);
-       if ((mono_interp_opt & INTERP_OPT_INLINE) && op == -1 && !is_virtual && target_method && interp_method_check_inlining (td, target_method)) {
+       if ((mono_interp_opt & INTERP_OPT_INLINE) && op == -1 && !is_virtual && target_method && interp_method_check_inlining (td, target_method, csignature)) {
                MonoMethodHeader *mheader = interp_method_get_header (target_method, error);
                return_val_if_nok (error, FALSE);
 
@@ -2431,6 +2434,7 @@ interp_transform_call (TransformData *td, MonoMethod *method, MonoMethod *target
                                vt_res_size = mono_class_native_size (klass, NULL);
                        else
                                vt_res_size = mono_class_value_size (klass, NULL);
+                       vt_res_size = ALIGN_TO (vt_res_size, MINT_VT_ALIGNMENT);
                        if (mono_class_has_failure (klass)) {
                                mono_error_set_for_class_failure (error, klass);
                                return FALSE;
@@ -4660,7 +4664,7 @@ generate_code (TransformData *td, MonoMethod *method, MonoMethodHeader *header,
                                        // We don't support inlining ctors of MINT_TYPE_VT which also receive a MINT_TYPE_VT
                                        // as an argument. The reason is that we would need to push this on the vtstack before
                                        // the argument, which is very awkward for uncommon scenario.
-                                       if ((mono_interp_opt & INTERP_OPT_INLINE) && interp_method_check_inlining (td, m) &&
+                                       if ((mono_interp_opt & INTERP_OPT_INLINE) && interp_method_check_inlining (td, m, csignature) &&
                                                        (!is_vtst || !signature_has_vt_params (csignature))) {
                                                MonoMethodHeader *mheader = interp_method_get_header (m, error);
                                                goto_if_nok (error, exit);
@@ -7672,7 +7676,7 @@ generate (MonoMethod *method, MonoMethodHeader *header, InterpMethod *rtm, MonoG
        rtm->stack_size = (sizeof (stackval)) * (td->max_stack_height + 2); /* + 1 for returns of called functions  + 1 for 0-ing in trace*/
        rtm->stack_size = ALIGN_TO (rtm->stack_size, MINT_VT_ALIGNMENT);
        rtm->vt_stack_size = td->max_vt_sp;
-       rtm->total_locals_size = td->total_locals_size;
+       rtm->total_locals_size = ALIGN_TO (td->total_locals_size, MINT_VT_ALIGNMENT);
        rtm->alloca_size = ALIGN_TO (rtm->total_locals_size + rtm->vt_stack_size + rtm->stack_size, 8);
        rtm->data_items = (gpointer*)mono_domain_alloc0 (domain, td->n_data_items * sizeof (td->data_items [0]));
        memcpy (rtm->data_items, td->data_items, td->n_data_items * sizeof (td->data_items [0]));